From d984a98deff23f5fef6a18bd2062f7cbe94ffed4 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Wed, 15 Dec 2021 03:09:57 +0900 Subject: [PATCH] [ok.ru] add mobile fallback (#1972) Authored by: nao20010128nao --- yt_dlp/extractor/odnoklassniki.py | 51 ++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 9cacd3815..eeb57f2fe 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -12,6 +12,7 @@ ) from ..utils import ( ExtractorError, + float_or_none, unified_strdate, int_or_none, qualities, @@ -96,6 +97,14 @@ class OdnoklassnikiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Video has not been found', + }, { + 'note': 'Only available in mobile webpage', + 'url': 'https://m.ok.ru/video/2361249957145', + 'info_dict': { + 'id': '2361249957145', + 'title': 'Быковское крещение', + 'duration': 3038.181, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -131,13 +140,24 @@ def _extract_url(webpage): return mobj.group('url') def _real_extract(self, url): + try: + return self._extract_desktop(url) + except ExtractorError as e: + try: + return self._extract_mobile(url) + except ExtractorError: + # error message of desktop webpage is in English + raise e + + def _extract_desktop(self, url): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) video_id = self._match_id(url) webpage = self._download_webpage( - 'http://ok.ru/video/%s' % video_id, video_id) + 'http://ok.ru/video/%s' % video_id, video_id, + note='Downloading desktop webpage') error = self._search_regex( r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', @@ -265,3 +285,32 @@ def _real_extract(self, url): info['formats'] = formats return info + + def _extract_mobile(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://m.ok.ru/video/%s' % video_id, video_id, + note='Downloading mobile webpage') + + error = self._search_regex( + r'видео\s*(.+?)', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + json_data = self._search_regex( + r'data-video="(.+?)"', webpage, 'json data') + json_data = self._parse_json(unescapeHTML(json_data), video_id) or {} + + return { + 'id': video_id, + 'title': json_data.get('videoName'), + 'duration': float_or_none(json_data.get('videoDuration'), scale=1000), + 'thumbnail': json_data.get('videoPosterSrc'), + 'formats': [{ + 'format_id': 'mobile', + 'url': json_data.get('videoSrc'), + 'ext': 'mp4', + }] + }