From 143a2ccab39a4e6477521f0d563f940a97fa9dc6 Mon Sep 17 00:00:00 2001 From: columndeeply <106948293+columndeeply@users.noreply.github.com> Date: Tue, 4 Oct 2022 05:33:46 +0200 Subject: [PATCH] [extractor/prankcast] Add extractor (#4774) Authored by: columndeeply, HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/prankcast.py | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 yt_dlp/extractor/prankcast.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2804886cda..3ecd7748bf 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1382,6 +1382,7 @@ PuhuTVIE, PuhuTVSerieIE, ) +from .prankcast import PrankCastIE from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py new file mode 100644 index 0000000000..7446caf3c0 --- /dev/null +++ b/yt_dlp/extractor/prankcast.py @@ -0,0 +1,49 @@ +from .common import InfoExtractor +from ..utils import parse_iso8601, traverse_obj, try_call + + +class PrankCastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P\d+)-(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-', + 'info_dict': { + 'id': '1561', + 'ext': 'mp3', + 'title': 'Beverly is back like a heart attack!', + 'display_id': 'Beverly-is-back-like-a-heart-attack-', + 'timestamp': 1661391575, + 'uploader': 'Devonanustart', + 'channel_id': 4, + 'duration': 7918, + 'cast': ['Devonanustart', 'Phonelosers'], + 'description': '', + 'categories': ['prank'], + 'tags': ['prank call', 'prank'], + 'upload_date': '20220825' + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + + webpage = self._download_webpage(url, video_id) + json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel'] + + uploader = json_info.get('user_name') + guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id) + start_date = parse_iso8601(json_info.get('start_date')) + + return { + 'id': video_id, + 'title': json_info.get('broadcast_title') or self._og_search_title(webpage), + 'display_id': display_id, + 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3', + 'timestamp': start_date, + 'uploader': uploader, + 'channel_id': json_info.get('user_id'), + 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date), + 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'description': json_info.get('broadcast_description'), + 'categories': [json_info.get('broadcast_category')], + 'tags': self._parse_json(json_info.get('broadcast_tags') or '{}', video_id) + }