[CBS] Add fallback (#579)

Related: https://github.com/ytdl-org/youtube-dl/issues/29564
Authored-by: llacb47, pukkandan
This commit is contained in:
LE 2021-08-01 22:16:12 -04:00 committed by GitHub
parent 605cad0be7
commit 68f5867cf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 13 deletions

View File

@ -53,6 +53,54 @@ class CBSIE(CBSBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'_skip': 'Blocked outside the US', '_skip': 'Blocked outside the US',
}, {
'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
'info_dict': {
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
'ext': 'mp4',
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
'description': 'md5:7ac835000645a69933df226940e3c859',
'duration': 1418,
'timestamp': 920264400,
'upload_date': '19990301',
'uploader': 'CBSI-NEW',
},
'params': {
'skip_download': 'm3u8',
},
'_skip': 'Blocked outside the US',
}, {
'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
'info_dict': {
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
'ext': 'mp4',
'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
'description': 'md5:f4adcea3e8b106192022e121f1565bae',
'duration': 2506,
'timestamp': 1627063200,
'upload_date': '20210723',
'uploader': 'CBSI-NEW',
},
'params': {
'skip_download': 'm3u8',
},
'_skip': 'Blocked outside the US',
}, {
'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
'info_dict': {
'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
'timestamp': 1624507140,
'description': 'md5:e01af24e95c74d55e8775aef86117b95',
'uploader': 'CBSI-NEW',
'upload_date': '20210624',
},
'params': {
'ignore_no_formats_error': True,
'skip_download': True,
},
'expected_warnings': [
'This content expired on', 'No video formats found', 'Requested format is not available'],
}, { }, {
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
'only_matching': True, 'only_matching': True,
@ -79,17 +127,26 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
asset_types = [] asset_types = []
subtitles = {} subtitles = {}
formats = [] formats = []
useXMLmetadata = True
last_e = None last_e = None
for item in items_data.findall('.//item'): for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType') asset_type = xpath_text(item, 'assetType')
if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
continue
asset_types.append(asset_type)
query = { query = {
'mbr': 'true', 'mbr': 'true',
'assetTypes': asset_type, 'assetTypes': asset_type,
} }
if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): if not asset_type:
# fallback for content_ids that videoPlayerService doesn't return anything for
useXMLmetadata = False
asset_type = 'fallback'
query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
del query['assetTypes']
elif asset_type in asset_types:
continue
elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
continue
asset_types.append(asset_type)
if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
query['formats'] = 'MPEG4,M3U' query['formats'] = 'MPEG4,M3U'
elif asset_type in ('RTMP', 'WIFI', '3G'): elif asset_type in ('RTMP', 'WIFI', '3G'):
query['formats'] = 'MPEG4,FLV' query['formats'] = 'MPEG4,FLV'
@ -97,26 +154,38 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
tp_formats, tp_subtitles = self._extract_theplatform_smil( tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query(tp_release_url, query), content_id, update_url_query(tp_release_url, query), content_id,
'Downloading %s SMIL data' % asset_type) 'Downloading %s SMIL data' % asset_type)
except ExtractorError as e:
last_e = e
if useXMLmetadata:
continue
query['formats'] = '' # blank query to check if expired
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query(tp_release_url, query), content_id,
'Downloading %s SMIL data, trying again with another format' % asset_type)
except ExtractorError as e: except ExtractorError as e:
last_e = e last_e = e
continue continue
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats: if last_e and not formats:
raise last_e self.raise_no_formats(last_e, True, content_id)
self._sort_formats(formats) self._sort_formats(formats)
info = self._extract_theplatform_metadata(tp_path, content_id) info = self._extract_theplatform_metadata(tp_path, content_id)
info.update({ info.update({
'id': content_id, 'formats': formats,
'subtitles': subtitles,
'id': content_id
})
if useXMLmetadata:
info.update({
'title': title, 'title': title,
'series': xpath_text(video_data, 'seriesTitle'), 'series': xpath_text(video_data, 'seriesTitle'),
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
'thumbnail': xpath_text(video_data, 'previewImageURL'), 'thumbnail': xpath_text(video_data, 'previewImageURL')
'formats': formats,
'subtitles': subtitles,
}) })
return info return info

View File

@ -1052,6 +1052,8 @@ def raise_geo_restricted(
def raise_no_formats(self, msg, expected=False, video_id=None): def raise_no_formats(self, msg, expected=False, video_id=None):
if expected and self.get_param('ignore_no_formats_error'): if expected and self.get_param('ignore_no_formats_error'):
self.report_warning(msg, video_id) self.report_warning(msg, video_id)
elif isinstance(msg, ExtractorError):
raise msg
else: else:
raise ExtractorError(msg, expected=expected, video_id=video_id) raise ExtractorError(msg, expected=expected, video_id=video_id)