From 6c73052c0a44c90dad22468d598e1bc6aba7534c Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 21 Jan 2022 06:04:36 +0000 Subject: [PATCH] [youtube] Extract channel subscriber count (#2399) Closes #2350 * Adds `channel_follower_count` field Authored-by: coletdjnz --- README.md | 1 + yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/youtube.py | 55 ++++++++++++++++++++++++++++++++----- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 71c4b24b39..358f563c16 100644 --- a/README.md +++ b/README.md @@ -1138,6 +1138,7 @@ # OUTPUT TEMPLATE - `uploader_id` (string): Nickname or id of the video uploader - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel + - `channel_follower_count` (numeric): Number of followers of the channel - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3f5e15103f..e289a4ef82 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -261,6 +261,7 @@ class InfoExtractor(object): fields. This depends on a particular extractor. channel_id: Id of the channel. channel_url: Full URL to a channel webpage. + channel_follower_count: Number of followers of the channel. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {tag: subformats}. "tag" is usually a language code, and diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 988135516b..09a0b22799 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -515,7 +515,7 @@ def _extract_visitor_data(*args): Appears to be used to track session state """ return get_first( - args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))], expected_type=str) @property @@ -1034,6 +1034,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 0, 'start_time': 1, 'end_time': 9, + 'channel_follower_count': int } }, { @@ -1077,6 +1078,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', 'live_status': 'not_live', 'age_limit': 0, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1129,6 +1131,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Music'], 'age_limit': 0, 'alt_title': 'The Spark', + 'channel_follower_count': int }, 'params': { 'youtube_include_dash_manifest': True, @@ -1161,6 +1164,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg', 'playable_in_embed': True, 'view_count': int, + 'channel_follower_count': int }, }, { @@ -1188,6 +1192,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'duration': 177, 'playable_in_embed': True, + 'channel_follower_count': int }, }, { @@ -1215,6 +1220,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Entertainment'], 'duration': 106, 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_follower_count': int }, }, { @@ -1246,6 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA', 'live_status': 'not_live', 'artist': 'OOMPH!', + 'channel_follower_count': int }, }, { @@ -1284,6 +1291,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ', 'categories': ['Music'], 'album': 'Some Chords', + 'channel_follower_count': int }, 'expected_warnings': [ 'DASH manifest missing', @@ -1316,6 +1324,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'live_status': 'was_live', 'view_count': int, 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q', + 'channel_follower_count': int }, 'params': { 'skip_download': 'requires avconv', @@ -1347,6 +1356,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'availability': 'unlisted', + 'channel_follower_count': int }, }, # url_encoded_fmt_stream_map is empty string @@ -1515,6 +1525,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'like_count': int, 'age_limit': 0, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1573,6 +1584,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp', 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1604,6 +1616,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'live_status': 'not_live', 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1667,6 +1680,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'playable_in_embed': True, 'live_status': 'not_live', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1776,6 +1790,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'tags': 'count:11', 'live_status': 'not_live', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1831,6 +1846,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'live_status': 'not_live', 'channel': 'ElevageOrVert', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1864,6 +1880,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'duration': 522, 'channel': 'kudvenkat', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1908,6 +1925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg', 'categories': ['Music'], 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1943,6 +1961,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int } }, { @@ -1969,6 +1988,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -2010,6 +2030,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'format': '17', # 3gp format available on android @@ -2053,6 +2074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 248, 'categories': ['Education'], 'age_limit': 0, + 'channel_follower_count': int }, 'params': {'format': 'mhtml', 'skip_download': True} } ] @@ -3489,7 +3511,11 @@ def process_language(container, base_url, lang_code, sub_name, query): }) vsir = content.get('videoSecondaryInfoRenderer') if vsir: - info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title')) + vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer')) + info.update({ + 'channel': self._get_text(vor, 'title'), + 'channel_follower_count': self._get_count(vor, 'subscriberCountText')}) + rows = try_get( vsir, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], @@ -3969,7 +3995,8 @@ def _get_uncropped(url): 'view_count': self._get_count(playlist_stats, 1), 'availability': self._extract_availability(data), 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'), - 'playlist_count': self._get_count(playlist_stats, 0) + 'playlist_count': self._get_count(playlist_stats, 0), + 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), } if not channel_id: metadata.update(self._extract_uploader(data)) @@ -4265,6 +4292,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'], 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', + 'channel_follower_count': int }, }, { 'note': 'playlists, multipage, different order', @@ -4281,6 +4309,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel': 'Igor Kleiner', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', + 'channel_follower_count': int }, }, { 'note': 'playlists, series', @@ -4297,6 +4326,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': '3Blue1Brown', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'tags': ['Mathematics'], + 'channel_follower_count': int }, }, { 'note': 'playlists, singlepage', @@ -4313,6 +4343,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 'tags': 'count:13', 'channel': 'ThirstForScience', + 'channel_follower_count': int } }, { 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', @@ -4366,6 +4397,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'channel_follower_count': int }, 'playlist_mincount': 2, }, { @@ -4382,6 +4414,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel': 'lex will', + 'channel_follower_count': int }, 'playlist_mincount': 975, }, { @@ -4398,6 +4431,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'lex will', 'tags': ['bible', 'history', 'prophesy'], 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', + 'channel_follower_count': int }, 'playlist_mincount': 199, }, { @@ -4414,6 +4448,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 17, }, { @@ -4430,6 +4465,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 18, }, { @@ -4446,6 +4482,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 12, }, { @@ -4463,6 +4500,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': ['Mathematics'], 'channel': '3Blue1Brown', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'channel_follower_count': int }, }, { 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', @@ -4622,7 +4660,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'info_dict': { - 'id': 'zpsbVPFwsqk', # This will keep changing + 'id': 'GgL890LIznQ', # This will keep changing 'ext': 'mp4', 'title': str, 'uploader': 'Sky News', @@ -4633,17 +4671,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'categories': ['News & Politics'], 'tags': list, 'like_count': int, - 'release_timestamp': 1640164857, + 'release_timestamp': 1642502819, 'channel': 'Sky News', 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg', + 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg', 'playable_in_embed': True, - 'release_date': '20211222', + 'release_date': '20220118', 'availability': 'public', 'live_status': 'is_live', 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -4825,6 +4864,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'recommended', 'title': 'recommended', + 'tags': [], }, 'playlist_mincount': 50, 'params': { @@ -4845,6 +4885,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', + 'channel_follower_count': int }, 'playlist_mincount': 650, 'params': {