[openload] Fallback on f-page extraction (closes #14665, closes #14879)

This commit is contained in:
Sergey M․ 2017-12-30 05:52:35 +07:00
parent 580f3c79d5
commit d2c5b5a951
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -284,6 +284,11 @@ class OpenloadIE(InfoExtractor):
# for title and ext # for title and ext
'url': 'https://openload.co/embed/Sxz5sADo82g/', 'url': 'https://openload.co/embed/Sxz5sADo82g/',
'only_matching': True, 'only_matching': True,
}, {
# unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
# via https://openload.co/f/e-Ixz9ZR5L0/
'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
'only_matching': True,
}, { }, {
'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
'only_matching': True, 'only_matching': True,
@ -305,18 +310,27 @@ def _extract_urls(webpage):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = 'https://openload.co/embed/%s/' % video_id url_pattern = 'https://openload.co/%%s/%s/' % video_id
headers = { headers = {
'User-Agent': self._USER_AGENT, 'User-Agent': self._USER_AGENT,
} }
webpage = self._download_webpage(url, video_id, headers=headers) for path in ('embed', 'f'):
page_url = url_pattern % path
last = path == 'f'
webpage = self._download_webpage(
page_url, video_id, 'Downloading %s webpage' % path,
headers=headers, fatal=last)
if not webpage:
continue
if 'File not found' in webpage or 'deleted by the owner' in webpage: if 'File not found' in webpage or 'deleted by the owner' in webpage:
if not last:
continue
raise ExtractorError('File not found', expected=True, video_id=video_id) raise ExtractorError('File not found', expected=True, video_id=video_id)
break
phantom = PhantomJSwrapper(self, required_version='2.0') phantom = PhantomJSwrapper(self, required_version='2.0')
webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers)
decoded_id = get_element_by_id('streamurl', webpage) decoded_id = get_element_by_id('streamurl', webpage)
@ -327,7 +341,7 @@ def _real_extract(self, url):
'title', default=None) or self._html_search_meta( 'title', default=None) or self._html_search_meta(
'description', webpage, 'title', fatal=True) 'description', webpage, 'title', fatal=True)
entries = self._parse_html5_media_entries(url, webpage, video_id) entries = self._parse_html5_media_entries(page_url, webpage, video_id)
entry = entries[0] if entries else {} entry = entries[0] if entries else {}
subtitles = entry.get('subtitles') subtitles = entry.get('subtitles')