From 89f383c4ee7b0b7674acc5a584fc754df6e5f118 Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Sat, 7 May 2022 15:44:41 +0700 Subject: [PATCH] [gronkh] Add playlist extractors (#3337) Closes #3300 Authored by: hatienl0i261299 --- yt_dlp/extractor/extractors.py | 6 +++- yt_dlp/extractor/gronkh.py | 59 +++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6f6862915a..0ba129f96f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -601,7 +601,11 @@ from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE -from .gronkh import GronkhIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index 52bbf3bc73..c112c78577 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -1,5 +1,11 @@ +import functools + from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + OnDemandPagedList, + traverse_obj, + unified_strdate, +) class GronkhIE(InfoExtractor): @@ -41,3 +47,54 @@ def _real_extract(self, url): 'formats': formats, 'subtitles': subtitles, } + + +class GronkhFeedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)' + IE_NAME = 'gronkh:feed' + + _TESTS = [{ + 'url': 'https://gronkh.tv/feed', + 'info_dict': { + 'id': 'feed', + }, + 'playlist_count': 16, + }, { + 'url': 'https://gronkh.tv', + 'only_matching': True, + }] + + def _entries(self): + for type_ in ('recent', 'views'): + info = self._download_json( + f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON') + for item in traverse_obj(info, ('discovery', ...)) or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title')) + + def _real_extract(self, url): + return self.playlist_result(self._entries(), 'feed') + + +class GronkhVodsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)' + IE_NAME = 'gronkh:vods' + + _TESTS = [{ + 'url': 'https://gronkh.tv/vods/streams', + 'info_dict': { + 'id': 'vods', + }, + 'playlist_mincount': 150, + }] + _PER_PAGE = 25 + + def _fetch_page(self, page): + items = traverse_obj(self._download_json( + 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE}, + note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...)) + for item in items or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title')) + + def _real_extract(self, url): + entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE) + return self.playlist_result(entries, 'vods')