[bloomberg] Extract the available formats (closes #2776)

It uses a helper method in the InfoExtractor class.
The downloader will pick the requested formats using the bitrate in the info dict.
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-07-28 15:25:56 +02:00
parent 4958ae2058
commit 31bb8d3f51
3 changed files with 28 additions and 5 deletions

View File

@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[download] Downloading f4m manifest') self.to_screen('[download] Downloading f4m manifest')
manifest = self.ydl.urlopen(man_url).read() manifest = self.ydl.urlopen(man_url).read()
self.report_destination(filename) self.report_destination(filename)
@ -233,8 +234,14 @@ def real_download(self, filename, info_dict):
doc = etree.fromstring(manifest) doc = etree.fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
if requested_bitrate is None:
# get the best format
formats = sorted(formats, key=lambda f: f[0]) formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1] rate, media = formats[-1]
else:
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
metadata = base64.b64decode(media.find(_add_ns('metadata')).text) metadata = base64.b64decode(media.find(_add_ns('metadata')).text)

View File

@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
'md5': '7bf08858ff7c203c870e8a6190e221e5', # The md5 checksum changes
'info_dict': { 'info_dict': {
'id': 'qurhIVlJSB6hzkVi229d8g', 'id': 'qurhIVlJSB6hzkVi229d8g',
'ext': 'flv', 'ext': 'flv',
@ -31,8 +31,7 @@ def _real_extract(self, url):
return { return {
'id': name.split('-')[-1], 'id': name.split('-')[-1],
'title': title, 'title': title,
'url': f4m_url, 'formats': self._extract_f4m_formats(f4m_url, name),
'ext': 'flv',
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }

View File

@ -18,6 +18,7 @@
clean_html, clean_html,
compiled_regex_type, compiled_regex_type,
ExtractorError, ExtractorError,
int_or_none,
RegexNotFoundError, RegexNotFoundError,
sanitize_filename, sanitize_filename,
unescapeHTML, unescapeHTML,
@ -590,6 +591,22 @@ def _sleep(self, timeout, video_id, msg_template=None):
self.to_screen(msg) self.to_screen(msg)
time.sleep(timeout) time.sleep(timeout)
def _extract_f4m_formats(self, manifest_url, video_id):
manifest = self._download_xml(manifest_url, video_id)
formats = []
for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
formats.append({
'url': manifest_url,
'ext': 'flv',
'tbr': int_or_none(media_el.attrib.get('bitrate')),
'width': int_or_none(media_el.attrib.get('width')),
'height': int_or_none(media_el.attrib.get('height')),
})
self._sort_formats(formats)
return formats
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """