[vevo] Fix videos with the new streams/streamsV3 format (closes #11719)

This commit is contained in:
Yen Chi Hsuan 2017-02-25 01:40:12 +08:00
parent e498758b9c
commit 9d0c08a02c
No known key found for this signature in database
GPG Key ID: 7F902A182457CA23
2 changed files with 19 additions and 5 deletions

View File

@ -1,6 +1,8 @@
version <unreleased> version <unreleased>
Extractors Extractors
* [vevo] Fix extraction for videos with the new streams/streamsV3 format
(#11719)
+ [njpwworld] Add new extractor (#11561) + [njpwworld] Add new extractor (#11561)

View File

@ -17,12 +17,12 @@
class VevoBaseIE(InfoExtractor): class VevoBaseIE(InfoExtractor):
def _extract_json(self, webpage, video_id, item): def _extract_json(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex( self._search_regex(
r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>', r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
webpage, 'initial store'), webpage, 'initial store'),
video_id)['default'][item] video_id)
class VevoIE(VevoBaseIE): class VevoIE(VevoBaseIE):
@ -139,6 +139,11 @@ class VevoIE(VevoBaseIE):
# no genres available # no genres available
'url': 'http://www.vevo.com/watch/INS171400764', 'url': 'http://www.vevo.com/watch/INS171400764',
'only_matching': True, 'only_matching': True,
}, {
# Another case available only via the webpage; using streams/streamsV3 formats
# Geo-restricted to Netherlands/Germany
'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
'only_matching': True,
}] }]
_VERSIONS = { _VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions 0: 'youtube', # only in AuthenticateVideo videoVersions
@ -193,7 +198,14 @@ def _real_extract(self, url):
# https://github.com/rg3/youtube-dl/issues/9366) # https://github.com/rg3/youtube-dl/issues/9366)
if not video_versions: if not video_versions:
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0] json_data = self._extract_json(webpage, video_id)
if 'streams' in json_data.get('default', {}):
video_versions = json_data['default']['streams'][video_id][0]
else:
video_versions = [
value
for key, value in json_data['apollo']['data'].items()
if key.startswith('%s.streams' % video_id)]
uploader = None uploader = None
artist = None artist = None
@ -207,7 +219,7 @@ def _real_extract(self, url):
formats = [] formats = []
for video_version in video_versions: for video_version in video_versions:
version = self._VERSIONS.get(video_version['version']) version = self._VERSIONS.get(video_version.get('version'), 'generic')
version_url = video_version.get('url') version_url = video_version.get('url')
if not version_url: if not version_url:
continue continue
@ -339,7 +351,7 @@ def _real_extract(self, url):
if video_id: if video_id:
return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind) playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
playlist = (list(playlists.values())[0] playlist = (list(playlists.values())[0]
if playlist_kind == 'playlist' else playlists[playlist_id]) if playlist_kind == 'playlist' else playlists[playlist_id])