From 5625e6073fc9bd0ca944eaa18b1038ca8eddabd7 Mon Sep 17 00:00:00 2001 From: Bricio <216170+Bricio@users.noreply.github.com> Date: Fri, 18 Feb 2022 13:02:14 -0300 Subject: [PATCH] [Biqle] Fix extractor (#2731) Closes #193 Authored by: Bricio --- yt_dlp/extractor/biqle.py | 93 +++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 17ebbb2576..2b57bade3d 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -3,27 +3,28 @@ from .common import InfoExtractor from .vk import VKIE -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, +from ..compat import compat_b64decode +from ..utils import ( + int_or_none, + js_to_json, + traverse_obj, + unified_timestamp, ) -from ..utils import int_or_none class BIQLEIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P-?\d+_\d+)' _TESTS = [{ - # Youtube embed - 'url': 'https://biqle.ru/watch/-115995369_456239081', - 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06', + 'url': 'https://biqle.ru/watch/-2000421746_85421746', + 'md5': 'ae6ef4f04d19ac84e4658046d02c151c', 'info_dict': { - 'id': '8v4f-avW-VI', + 'id': '-2000421746_85421746', 'ext': 'mp4', - 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", - 'description': 'Passe-Partout', - 'uploader_id': 'mrsimpsonstef3', - 'uploader': 'Phanolito', - 'upload_date': '20120822', + 'title': 'Forsaken By Hope Studio Clip', + 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн', + 'upload_date': '19700101', + 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb', + 'timestamp': 0, }, }, { 'url': 'http://biqle.org/watch/-44781847_168547604', @@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor): 'id': '-44781847_168547604', 'ext': 'mp4', 'title': 'Ребенок в шоке от автоматической мойки', + 'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн', 'timestamp': 1396633454, - 'uploader': 'Dmitry Kotov', 'upload_date': '20140404', - 'uploader_id': '47850140', + 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - embed_url = self._proto_relative_url(self._search_regex( - r'', - webpage, 'embed url')) + + title = self._html_search_meta('name', webpage, 'Title', fatal=False) + timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) + description = self._html_search_meta('description', webpage, 'Description', default=None) + + global_embed_url = self._search_regex( + r'', webpage, 'Hash') + + embed_url = global_embed_url + hash + if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) embed_page = self._download_webpage( - embed_url, video_id, headers={'Referer': url}) - video_ext = self._get_cookies(embed_url).get('video_ext') - if video_ext: - video_ext = compat_urllib_parse_unquote(video_ext.value) - if not video_ext: - video_ext = compat_b64decode(self._search_regex( - r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', - embed_page, 'video_ext')).decode() - video_id, sig, _, access_token = video_ext.split(':') + embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url}) + + glob_params = self._parse_json(self._search_regex( + r'', + embed_page, 'Global Parameters'), video_id, transform_source=js_to_json) + host_name = compat_b64decode(glob_params['server'][::-1]).decode() + item = self._download_json( - 'https://api.vk.com/method/video.get', video_id, - headers={'User-Agent': 'okhttp/3.4.1'}, query={ - 'access_token': access_token, - 'sig': sig, - 'v': 5.44, + f'https://{host_name}/method/video.get/{video_id}', video_id, + headers={'Referer': url}, query={ + 'token': glob_params['video']['access_token'], 'videos': video_id, + 'ckey': glob_params['c_key'], + 'credentials': glob_params['video']['credentials'], })['response']['items'][0] - title = item['title'] formats = [] for f_id, f_url in item.get('files', {}).items(): if f_id == 'external': return self.url_result(f_url) ext, height = f_id.split('_') - formats.append({ - 'format_id': height + 'p', - 'url': f_url, - 'height': int_or_none(height), - 'ext': ext, - }) + height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height)) + if height_extra_key: + formats.append({ + 'format_id': f'{height}p', + 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', + 'height': int_or_none(height), + 'ext': ext, + }) self._sort_formats(formats) thumbnails = [] @@ -96,10 +106,9 @@ def _real_extract(self, url): 'title': title, 'formats': formats, 'comment_count': int_or_none(item.get('comments')), - 'description': item.get('description'), + 'description': description, 'duration': int_or_none(item.get('duration')), 'thumbnails': thumbnails, - 'timestamp': int_or_none(item.get('date')), - 'uploader': item.get('owner_id'), + 'timestamp': timestamp, 'view_count': int_or_none(item.get('views')), }