From 91d54e9b99dacae74b3e55bb429365e9fbbac50f Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Wed, 4 Jan 2023 16:50:23 +0900 Subject: [PATCH] [extractor/volejtv] Add extractor (#5943) Authored by: HobbyistDev Closes #5883 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/volejtv.py | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 yt_dlp/extractor/volejtv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 83e732189c..f3707948f9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2184,6 +2184,7 @@ VoicyIE, VoicyChannelIE, ) +from .volejtv import VolejTVIE from .voot import ( VootIE, VootSeriesIE, diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py new file mode 100644 index 0000000000..622d841f12 --- /dev/null +++ b/yt_dlp/extractor/volejtv.py @@ -0,0 +1,40 @@ +from .common import InfoExtractor + + +class VolejTVIE(InfoExtractor): + _VALID_URL = r'https?://volej\.tv/video/(?P\d+)' + _TESTS = [{ + 'url': 'https://volej.tv/video/725742/', + 'info_dict': { + 'id': '725742', + 'ext': 'mp4', + 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', + 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', + 'title': 'VK Královo Pole vs VK Prostějov', + } + }, { + 'url': 'https://volej.tv/video/725605/', + 'info_dict': { + 'id': '725605', + 'ext': 'mp4', + 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', + 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', + 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_data = self._search_json( + r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id) + formats, subtitle = self._extract_m3u8_formats_and_subtitles( + json_data['urls']['hls'], video_id) + return { + 'id': video_id, + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), + 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + 'formats': formats, + 'subtitles': subtitle, + }