[extractor/generic] Extract f4m formats and refactor common info

This commit is contained in:
Sergey M․ 2016-03-13 03:17:25 +06:00
parent 0fdbb3322b
commit f930e0c76e

View File

@ -1242,28 +1242,34 @@ def _real_extract(self, url):
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
head_response = full_response head_response = full_response
info_dict = {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
}
# Check for direct link to a video # Check for direct link to a video
content_type = head_response.headers.get('Content-Type', '') content_type = head_response.headers.get('Content-Type', '')
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type) m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type)
if m: if m:
upload_date = unified_strdate( upload_date = unified_strdate(
head_response.headers.get('Last-Modified')) head_response.headers.get('Last-Modified'))
formats = [] format_id = m.group('format_id')
if m.group('format_id').endswith('mpegurl'): if format_id.endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4') formats = self._extract_m3u8_formats(url, video_id, 'mp4')
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else: else:
formats = [{ formats = [{
'format_id': m.group('format_id'), 'format_id': m.group('format_id'),
'url': url, 'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
return { info_dict.update({
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True, 'direct': True,
'formats': formats, 'formats': formats,
'upload_date': upload_date, 'upload_date': upload_date,
} })
return info_dict
if not self._downloader.params.get('test', False) and not is_intentional: if not self._downloader.params.get('test', False) and not is_intentional:
force = self._downloader.params.get('force_generic_extractor', False) force = self._downloader.params.get('force_generic_extractor', False)
@ -1291,13 +1297,12 @@ def _real_extract(self, url):
'URL could be a direct video link, returning it as such.') 'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate( upload_date = unified_strdate(
head_response.headers.get('Last-Modified')) head_response.headers.get('Last-Modified'))
return { info_dict.update({
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True, 'direct': True,
'url': url, 'url': url,
'upload_date': upload_date, 'upload_date': upload_date,
} })
return info_dict
webpage = self._webpage_read_content( webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes) full_response, url, video_id, prefix=first_bytes)
@ -1314,12 +1319,12 @@ def _real_extract(self, url):
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
return { info_dict['formats'] = self._parse_mpd_formats(
'id': video_id, doc, video_id, mpd_base_url=url.rpartition('/')[0])
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), return info_dict
'formats': self._parse_mpd_formats( elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
doc, video_id, mpd_base_url=url.rpartition('/')[0]), info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
} return info_dict
except compat_xml_parse_error: except compat_xml_parse_error:
pass pass