yt-dlp/yt_dlp/extractor/epoch.py
Tejas Arlimatti f8c7ba9984
[extractor/epoch] Add extractor (#4772)
Closes #4714
Authored by: tejasa97
2022-08-31 22:16:26 +05:30

47 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from .common import InfoExtractor
class EpochIE(InfoExtractor):
_VALID_URL = r'https?://www.theepochtimes\.com/[\w-]+_(?P<id>\d+).html'
_TESTS = [
{
'url': 'https://www.theepochtimes.com/they-can-do-audio-video-physical-surveillance-on-you-24h-365d-a-year-rex-lee-on-intrusive-apps_4661688.html',
'info_dict': {
'id': 'a3dd732c-4750-4bc8-8156-69180668bda1',
'ext': 'mp4',
'title': 'They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year: Rex Lee on Intrusive Apps',
}
},
{
'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html',
'info_dict': {
'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a',
'ext': 'mp4',
'title': 'The Communist Partys Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare',
}
},
{
'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html',
'info_dict': {
'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6',
'ext': 'mp4',
'title': 'Kash Patel: A 6-Year-Saga of Government Corruption, From Russiagate to Mar-a-Lago',
}
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
youmaker_video_id = self._search_regex(r'data-trailer="[\w-]+" data-id="([\w-]+)"', webpage, 'url')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'http://vs1.youmaker.com/assets/{youmaker_video_id}/playlist.m3u8', video_id, 'mp4', m3u8_id='hls')
return {
'id': youmaker_video_id,
'formats': formats,
'subtitles': subtitles,
'title': self._html_extract_title(webpage)
}