From a39a7ba8d6efccf8d2fc8029ecebcb10e6c11d59 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Jun 2022 11:21:53 +0530 Subject: [PATCH] [extractor/tiktok] Extract `SIGI_STATE` Based on #3624, https://github.com/ytdl-org/youtube-dl/pull/30479 Closes #3551 Authored by dirkf, sulyi, pukkandan --- yt_dlp/extractor/tiktok.py | 60 ++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4926096c0..680358d5e 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -12,6 +12,7 @@ HEADRequest, LazyList, UnsupportedError, + get_element_by_id, get_first, int_or_none, join_nonempty, @@ -33,11 +34,22 @@ class TikTokBaseIE(InfoExtractor): _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') + _session_initialized = False @staticmethod def _create_url(user_id, video_id): return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}' + def _get_sigi_state(self, webpage, display_id): + return self._parse_json(get_element_by_id( + 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id) + + def _real_initialize(self): + if self._session_initialized: + return + self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False) + TikTokBaseIE._session_initialized = True + def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) @@ -263,6 +275,9 @@ def extract_addr(addr, add_meta={}): return { 'id': aweme_id, + 'extractor_key': TikTokIE.ie_key(), + 'extractor': TikTokIE.IE_NAME, + 'webpage_url': self._create_url(author_info.get('uid'), aweme_id), 'title': aweme_detail.get('desc'), 'description': aweme_detail.get('desc'), 'view_count': int_or_none(stats_info.get('play_count')), @@ -461,7 +476,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available'] + 'expected_warnings': ['trying with webpage', 'Unable to find video in feed'] }, { # Video without title and description 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694', @@ -485,7 +500,29 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available', 'Creating a generic title'] + }, { + # hydration JSON is sent in a