[extractor/youtube] Support changing extraction language (#4470)

Adds `--extractor-args youtube:lang=<supported lang code>` extractor arg to prefer translated fields (e.g. title and description) of that language, if available, for all YouTube extractors. See README or error message for list of supported language codes.

Closes https://github.com/yt-dlp/yt-dlp/issues/387

Authored by: coletdjnz
This commit is contained in:
coletdjnz 2022-09-09 05:16:46 +00:00 committed by GitHub
parent 0c0b78b273
commit c26f9b991a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 286 additions and 75 deletions

View File

@ -1705,6 +1705,8 @@ #### youtube
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests * `innertube_key`: Innertube API key to use for all API requests
* `lang`: Supported content language code to prefer translated metadata of this language (case-sensitive). By default, video primary language metadata is preferred, with a fallback to `en` translated.
* See youtube.py for list of supported content language codes.
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)

View File

@ -2,6 +2,7 @@
import calendar import calendar
import copy import copy
import datetime import datetime
import enum
import hashlib import hashlib
import itertools import itertools
import json import json
@ -275,6 +276,15 @@ def build_innertube_clients():
build_innertube_clients() build_innertube_clients()
class BadgeType(enum.Enum):
AVAILABILITY_UNLISTED = enum.auto()
AVAILABILITY_PRIVATE = enum.auto()
AVAILABILITY_PUBLIC = enum.auto()
AVAILABILITY_PREMIUM = enum.auto()
AVAILABILITY_SUBSCRIPTION = enum.auto()
LIVE_NOW = enum.auto()
class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
@ -367,6 +377,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'(?:www\.)?piped\.privacy\.com\.de', r'(?:www\.)?piped\.privacy\.com\.de',
) )
# extracted from account/account_menu ep
# XXX: These are the supported YouTube UI and API languages,
# which is slightly different from languages supported for translation in YouTube studio
_SUPPORTED_LANG_CODES = [
'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
]
@functools.cached_property
def _preferred_lang(self):
"""
Returns a language code supported by YouTube for the user preferred language.
Returns None if no preferred language set.
"""
preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
if not preferred_lang:
return
if preferred_lang not in self._SUPPORTED_LANG_CODES:
raise ExtractorError(
f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
expected=True)
elif preferred_lang != 'en':
self.report_warning(
f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
return preferred_lang
def _initialize_consent(self): def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/') cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'): if cookies.get('__Secure-3PSID'):
@ -391,7 +431,7 @@ def _initialize_pref(self):
pref = dict(urllib.parse.parse_qsl(pref_cookie.value)) pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError: except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'}) pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref)) self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self): def _real_initialize(self):
@ -439,7 +479,7 @@ def _extract_context(self, ytcfg=None, default_client='web'):
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
# Enforce language and tz for extraction # Enforce language and tz for extraction
client_context = traverse_obj(context, 'client', expected_type=dict, default={}) client_context = traverse_obj(context, 'client', expected_type=dict, default={})
client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0}) client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
return context return context
_SAPISID = None _SAPISID = None
@ -678,13 +718,49 @@ def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs) return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
def _extract_badges(self, renderer: dict): def _extract_badges(self, renderer: dict):
badges = set() privacy_icon_map = {
for badge in try_get(renderer, lambda x: x['badges'], list) or []: 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str) 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
if label: 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
badges.add(label.lower()) }
badge_style_map = {
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
}
label_map = {
'unlisted': BadgeType.AVAILABILITY_UNLISTED,
'private': BadgeType.AVAILABILITY_PRIVATE,
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
'live': BadgeType.LIVE_NOW,
'premium': BadgeType.AVAILABILITY_PREMIUM
}
badges = []
for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
badge_type = (
privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
or badge_style_map.get(traverse_obj(badge, 'style'))
)
if badge_type:
badges.append({'type': badge_type})
continue
# fallback, won't work in some languages
label = traverse_obj(badge, 'label', expected_type=str, default='')
for match, label_badge_type in label_map.items():
if match in label.lower():
badges.append({'type': badge_type})
continue
return badges return badges
@staticmethod
def _has_badge(badges, badge_type):
return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
@staticmethod @staticmethod
def _get_text(data, *path_list, max_runs=None): def _get_text(data, *path_list, max_runs=None):
for path in path_list or [None]: for path in path_list or [None]:
@ -755,9 +831,9 @@ def extract_relative_time(relative_time_text):
except ValueError: except ValueError:
return None return None
def _extract_time_text(self, renderer, *path_list): def _parse_time_text(self, text):
"""@returns (timestamp, time_text)""" if not text:
text = self._get_text(renderer, *path_list) or '' return
dt = self.extract_relative_time(text) dt = self.extract_relative_time(text)
timestamp = None timestamp = None
if isinstance(dt, datetime.datetime): if isinstance(dt, datetime.datetime):
@ -770,9 +846,10 @@ def _extract_time_text(self, renderer, *path_list):
(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'), (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
text.lower(), 'time text', default=None))) text.lower(), 'time text', default=None)))
if text and timestamp is None: if text and timestamp is None and self._preferred_lang in (None, 'en'):
self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True) self.report_warning(
return timestamp, text f'Cannot parse localized time text "{text}"', only_once=True)
return timestamp
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
@ -848,7 +925,7 @@ def _extract_video(self, renderer):
channel_id = traverse_obj( channel_id = traverse_obj(
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
expected_type=str, get_all=False) expected_type=str, get_all=False)
timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') time_text = self._get_text(renderer, 'publishedTimeText') or ''
scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
overlay_style = traverse_obj( overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
@ -874,15 +951,21 @@ def _extract_video(self, renderer):
'uploader': uploader, 'uploader': uploader,
'channel_id': channel_id, 'channel_id': channel_id,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'upload_date': (strftime_or_none(timestamp, '%Y%m%d') 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key='youtubetab') if self._configuration_arg('approximate_date', ie_key='youtubetab')
else None), else None),
'live_status': ('is_upcoming' if scheduled_timestamp is not None 'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower() else 'was_live' if 'streamed' in time_text.lower()
else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
else None), else None),
'release_timestamp': scheduled_timestamp, 'release_timestamp': scheduled_timestamp,
'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges) 'availability':
'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
else self._availability(
is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
} }
@ -2306,6 +2389,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': [], 'tags': [],
'uploader_url': 'http://www.youtube.com/user/nao20010128nao', 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
} }
}, {
# Prefer primary title+description language metadata by default
# Do not prefer translated description if primary is empty
'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
'info_dict': {
'id': 'el3E4MbxRqQ',
'ext': 'mp4',
'title': 'dlp test video 2 - primary sv no desc',
'description': '',
'channel': 'cole-dlp-test-acc',
'tags': [],
'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'like_count': int,
'playable_in_embed': True,
'availability': 'unlisted',
'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
'age_limit': 0,
'duration': 5,
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'live_status': 'not_live',
'upload_date': '20220908',
'categories': ['People & Blogs'],
'uploader': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
},
'params': {'skip_download': True}
}, {
# Extractor argument: prefer translated title+description
'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
'info_dict': {
'id': 'gHKT4uU8Zng',
'ext': 'mp4',
'channel': 'cole-dlp-test-acc',
'tags': [],
'duration': 5,
'live_status': 'not_live',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'upload_date': '20220728',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'view_count': int,
'categories': ['People & Blogs'],
'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
'title': 'dlp test video title translated (fr)',
'availability': 'public',
'uploader': 'cole-dlp-test-acc',
'age_limit': 0,
'description': 'dlp test video description translated (fr)',
'playable_in_embed': True,
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
},
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
'expected_warnings': [r'Preferring "fr" translated fields'],
}, { }, {
'note': '6 channel audio', 'note': '6 channel audio',
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo', 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
@ -2907,8 +3045,10 @@ def _extract_comment(self, comment_renderer, parent=None):
text = self._get_text(comment_renderer, 'contentText') text = self._get_text(comment_renderer, 'contentText')
# note: timestamp is an estimate calculated from the current time and time_text # Timestamp is an estimate calculated from the current time and time_text
timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText') time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
timestamp = self._parse_time_text(time_text)
author = self._get_text(comment_renderer, 'authorText') author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer, author_id = try_get(comment_renderer,
lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str) lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
@ -3554,11 +3694,19 @@ def _real_extract(self, url):
microformats = traverse_obj( microformats = traverse_obj(
player_responses, (..., 'microformat', 'playerMicroformatRenderer'), player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[]) expected_type=dict, default=[])
video_title = (
get_first(video_details, 'title') translated_title = self._get_text(microformats, (..., 'title'))
or self._get_text(microformats, (..., 'title')) video_title = (self._preferred_lang and translated_title
or search_meta(['og:title', 'twitter:title', 'title'])) or get_first(video_details, 'title') # primary
video_description = get_first(video_details, 'shortDescription') or translated_title
or search_meta(['og:title', 'twitter:title', 'title']))
translated_description = self._get_text(microformats, (..., 'description'))
original_description = get_first(video_details, 'shortDescription')
video_description = (
self._preferred_lang and translated_description
# If original description is blank, it will be an empty string.
# Do not prefer translated description in this case.
or original_description if original_description is not None else translated_description)
multifeed_metadata_list = get_first( multifeed_metadata_list = get_first(
player_responses, player_responses,
@ -3988,7 +4136,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
and info.get('live_status') != 'is_upcoming' and info.get('live_status') != 'is_upcoming'
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
): ):
upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date upload_date = strftime_or_none(
self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
info['upload_date'] = upload_date info['upload_date'] = upload_date
for to, frm in fallbacks.items(): for to, frm in fallbacks.items():
@ -4000,33 +4149,25 @@ def process_language(container, base_url, lang_code, sub_name, query):
if v: if v:
info[d_k] = v info[d_k] = v
is_private = get_first(video_details, 'isPrivate', expected_type=bool) badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
is_membersonly = None
is_premium = None
if initial_data and is_private is not None:
is_membersonly = False
is_premium = False
contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
badge_labels = set()
for content in contents:
if not isinstance(content, dict):
continue
badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
for badge_label in badge_labels:
if badge_label.lower() == 'members only':
is_membersonly = True
elif badge_label.lower() == 'premium':
is_premium = True
elif badge_label.lower() == 'unlisted':
is_unlisted = True
info['availability'] = self._availability( is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
is_private=is_private, or get_first(video_details, 'isPrivate', expected_type=bool))
needs_premium=is_premium,
needs_subscription=is_membersonly, info['availability'] = (
needs_auth=info['age_limit'] >= 18, 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
is_unlisted=None if is_private is None else is_unlisted) else self._availability(
is_private=is_private,
needs_premium=(
self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
or False if initial_data and is_private is not None else None),
needs_subscription=(
self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
or False if initial_data and is_private is not None else None),
needs_auth=info['age_limit'] >= 18,
is_unlisted=None if is_private is None else (
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
or get_first(microformats, 'isUnlisted', expected_type=bool))))
info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage) info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
@ -4472,7 +4613,7 @@ def _get_uncropped(url):
playlist_id = item_id playlist_id = item_id
playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats') playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
last_updated_unix, _ = self._extract_time_text(playlist_stats, 2) last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
if title is None: if title is None:
title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
title += format_field(selected_tab, 'title', ' - %s') title += format_field(selected_tab, 'title', ' - %s')
@ -4566,31 +4707,37 @@ def _extract_availability(self, data):
Note: Unless YouTube tells us explicitly, we do not assume it is public Note: Unless YouTube tells us explicitly, we do not assume it is public
@param data: response @param data: response
""" """
is_private = is_unlisted = None
renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
badge_labels = self._extract_badges(renderer)
player_header_privacy = traverse_obj(
data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
badges = self._extract_badges(renderer)
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_dropdown_entries = try_get( privacy_setting_icon = traverse_obj(
renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or [] renderer, (
for renderer_dict in privacy_dropdown_entries: 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
is_selected = try_get( lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False get_all=False, expected_type=str)
if not is_selected:
continue
label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
if label:
badge_labels.add(label.lower())
break
for badge_label in badge_labels: return (
if badge_label == 'unlisted': 'public' if (
is_unlisted = True self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
elif badge_label == 'private': or player_header_privacy == 'PUBLIC'
is_private = True or privacy_setting_icon == 'PRIVACY_PUBLIC')
elif badge_label == 'public': else self._availability(
is_unlisted = is_private = False is_private=(
return self._availability(is_private, False, False, False, is_unlisted) self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
is_unlisted=(
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
needs_auth=False))
@staticmethod @staticmethod
def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict): def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
@ -4866,6 +5013,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'availability': 'public',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
@ -4883,6 +5031,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'availability': 'public',
}, },
'playlist_count': 0, 'playlist_count': 0,
}, { }, {
@ -5029,6 +5178,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg', 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008', 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
'channel': 'Christiaan008', 'channel': 'Christiaan008',
'availability': 'public',
}, },
'playlist_count': 96, 'playlist_count': 96,
}, { }, {
@ -5047,6 +5197,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'view_count': int, 'view_count': int,
'description': '', 'description': '',
'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
'availability': 'public',
}, },
'playlist_mincount': 1123, 'playlist_mincount': 1123,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@ -5070,6 +5221,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': 'Interstellar Movie', 'channel': 'Interstellar Movie',
'description': '', 'description': '',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public',
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
@ -5088,6 +5240,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public',
}, },
'playlist_mincount': 200, 'playlist_mincount': 200,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@ -5107,6 +5260,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/c/blanktv', 'uploader_url': 'https://www.youtube.com/c/blanktv',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'description': '', 'description': '',
'availability': 'public',
}, },
'playlist_mincount': 1000, 'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@ -5125,6 +5279,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA', 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
'channel_url': 'https://www.youtube.com/user/Computerphile', 'channel_url': 'https://www.youtube.com/user/Computerphile',
'channel': 'Computerphile', 'channel': 'Computerphile',
'availability': 'public',
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}, { }, {
@ -5290,6 +5445,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'tags': [], 'tags': [],
'channel': 'NoCopyrightSounds', 'channel': 'NoCopyrightSounds',
'availability': 'public',
}, },
'playlist_mincount': 166, 'playlist_mincount': 166,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@ -5310,6 +5466,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw', 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
'description': '', 'description': '',
'availability': 'public',
}, },
'expected_warnings': [ 'expected_warnings': [
'The URL does not have a videos tab', 'The URL does not have a videos tab',
@ -5410,6 +5567,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': 'Royalty Free Music - Topic', 'channel': 'Royalty Free Music - Topic',
'view_count': int, 'view_count': int,
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw', 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
'availability': 'public',
}, },
'expected_warnings': [ 'expected_warnings': [
'does not have a videos tab', 'does not have a videos tab',
@ -5443,6 +5601,45 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
}, },
'playlist_mincount': 2 'playlist_mincount': 2
}, {
'note': 'translated tab name',
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA',
'tags': [],
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'description': '',
'title': 'cole-dlp-test-acc - 再生リスト',
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
},
'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
'expected_warnings': ['Preferring "ja"'],
}, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': {
'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'tags': [],
'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader': 'cole-dlp-test-acc',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'description': 'test',
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'title': 'dlp test playlist',
'availability': 'public',
},
'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
'expected_warnings': ['Preferring "ja"'],
}] }]
@classmethod @classmethod
@ -5527,10 +5724,20 @@ def get_mobj(url):
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
if tabs: if tabs:
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
selected_tab_name = selected_tab.get('title', '').lower() selected_tab_url = urljoin(
url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
translated_tab_name = selected_tab.get('title', '').lower()
# Prefer tab name from tab url as it is always in en,
# but only when preferred lang is set as it may not extract reliably in all cases.
selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
or translated_tab_name)
if selected_tab_name == 'home': if selected_tab_name == 'home':
selected_tab_name = 'featured' selected_tab_name = 'featured'
requested_tab_name = mobj['tab'][1:] requested_tab_name = mobj['tab'][1:]
if 'no-youtube-channel-redirect' not in compat_opts: if 'no-youtube-channel-redirect' not in compat_opts:
if requested_tab_name == 'live': # Live tab should have redirected to the video if requested_tab_name == 'live': # Live tab should have redirected to the video
raise UserNotLive(video_id=mobj['id']) raise UserNotLive(video_id=mobj['id'])
@ -5642,6 +5849,7 @@ class YoutubePlaylistIE(InfoExtractor):
'channel': 'milan', 'channel': 'milan',
'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw', 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw', 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
'availability': 'public',
}, },
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
@ -5660,6 +5868,7 @@ class YoutubePlaylistIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/c/愛低音的國王', 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA', 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public',
}, },
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
@ -5848,7 +6057,7 @@ def _extract_notification_renderer(self, notification):
title = self._search_regex( title = self._search_regex(
rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title, rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
'video title', default=None) 'video title', default=None)
upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d') upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
else None) else None)
return { return {