diff --git a/.gitignore b/.gitignore index 232096916c..ff00620f57 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,10 @@ cookies *.frag.urls *.info.json *.live_chat.json +*.meta *.part* +*.tmp +*.temp *.unknown_video *.ytdl .cache/ diff --git a/Makefile b/Makefile index 56ec509c06..32aabfbe67 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com clean-test: rm -rf test/testdata/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ - *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.part* *.unknown_video *.ytdl \ + *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.avi *.desktop *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: diff --git a/test/helper.py b/test/helper.py index c9293f81ae..ce751462e5 100644 --- a/test/helper.py +++ b/test/helper.py @@ -235,7 +235,7 @@ def sanitize(key, value): } # display_id may be generated from id - if test_info_dict.get('display_id') == test_info_dict['id']: + if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') return test_info_dict diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 11708774e2..baf54cd2e4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2682,7 +2682,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions): def _forceprint(self, tmpl, info_dict): mobj = re.match(r'\w+(=?)$', tmpl) if mobj and mobj.group(1): - tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' + tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})r' elif mobj: tmpl = '%({})s'.format(tmpl) @@ -3486,7 +3486,7 @@ def render_thumbnails_table(self, info_dict): return None return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), - [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) + [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 51d30a3213..0365cb2f69 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -78,11 +78,11 @@ def _real_extract(self, url): 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] if manifests.get('hls'): - m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], id) + m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id) formats.extend(m3u8_frmts) subtitles = self._merge_subtitles(subtitles, m3u8_subs) if manifests.get('dash'): - dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'], id) + dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash']) formats.extend(dash_frmts) subtitles = self._merge_subtitles(subtitles, dash_subs) self._sort_formats(formats) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e289a4ef82..74114e3558 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3504,8 +3504,6 @@ def _live_title(self, name): def _int(self, v, name, fatal=False, **kwargs): res = int_or_none(v, **kwargs) - if 'get_attr' in kwargs: - print(getattr(v, kwargs['get_attr'])) if res is None: msg = 'Failed to extract %s: Could not parse value %r' % (name, v) if fatal: diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index be7ad1202b..3d651f3abc 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -74,13 +74,11 @@ def _parse_mp4(self, metadata): tbr = int_or_none(bitrate) vbr = int_or_none(self._search_regex( r'-(\d+)\.mp4', video_path, 'vbr', default=None)) - abr = tbr - vbr if tbr and vbr else None video_formats.append({ 'format_id': bitrate, 'url': url, 'tbr': tbr, 'vbr': vbr, - 'abr': abr, }) return video_formats @@ -121,6 +119,7 @@ def _real_extract(self, url): video_formats = self._parse_mp4(metadata) if video_formats is None: video_formats = self._parse_flv(metadata) + self._sort_formats(video_formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 542902cafc..934b354a99 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3804,6 +3804,7 @@ def _real_extract(self, url): json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles( json_ld['url'], video_id, 'mp4') json_ld.pop('url') + self._sort_formats(json_ld['formats']) return merge_dicts(json_ld, info_dict) def check_video(vurl): @@ -3858,7 +3859,7 @@ def filter_video(urls): protocol, _, _ = url.partition('/') thumbnail = protocol + thumbnail - url_keys = list(filter(re.compile(r'video_url|video_alt_url\d+').fullmatch, flashvars.keys())) + url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) formats = [] for key in url_keys: if '/get_file/' not in flashvars[key]: diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index ec3c35c6f5..1d387bdfd9 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -177,9 +177,6 @@ def build_player_url(cls, video_id, integration, origin_url=None): @classmethod def _extract_urls(cls, webpage, origin_url): - # in comparison with _VALID_URL: - # * make the scheme optional - # * simplify the query string part; after extracting iframe src, the URL will be matched again VALID_SRC = rf'(?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+' # https://docs.glomex.com/publisher/video-player-integration/javascript-api/ diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d8a63a3d2d..0d42c2a895 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -257,7 +257,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _RESERVED_NAMES = ( r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' - r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' + r'shorts|movies|results|shared|hashtag|trending|explore|feed|feeds|' r'browse|oembed|get_video_info|iframe_api|s/player|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index e411cc145c..e47631eb66 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -7,6 +7,7 @@ from .exec import ExecPP, ExecAfterDownloadPP from .ffmpeg import ( FFmpegPostProcessor, + FFmpegCopyStreamPP, FFmpegConcatPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 213de0ecf3..5b98c7d976 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -568,7 +568,7 @@ def run(self, info): else f'already is in target format {source_ext}' if source_ext == target_ext else None) if _skip_msg: - self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}') + self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}') return [], info outpath = replace_extension(filename, target_ext, source_ext) @@ -917,7 +917,7 @@ def run(self, info): return [], info -class FFmpegCopyStreamPostProcessor(FFmpegFixupPostProcessor): +class FFmpegCopyStreamPP(FFmpegFixupPostProcessor): MESSAGE = 'Copying stream' @PostProcessor._restrict_to(images=False) @@ -926,11 +926,11 @@ def run(self, info): return [], info -class FFmpegFixupDurationPP(FFmpegCopyStreamPostProcessor): +class FFmpegFixupDurationPP(FFmpegCopyStreamPP): MESSAGE = 'Fixing video duration' -class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPostProcessor): +class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPP): MESSAGE = 'Fixing duplicate MOOV atoms' @@ -1132,15 +1132,20 @@ def __init__(self, downloader, only_multi_video=False): def concat_files(self, in_files, out_file): if len(in_files) == 1: + if os.path.realpath(in_files[0]) != os.path.realpath(out_file): + self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"') os.replace(in_files[0], out_file) - return + return [] codecs = [traverse_obj(self.get_metadata_object(file), ('streams', ..., 'codec_name')) for file in in_files] if len(set(map(tuple, codecs))) > 1: raise PostProcessingError( 'The files have different streams/codecs and cannot be concatenated. ' 'Either select different formats or --recode-video them to a common format') + + self.to_screen(f'Concatenating {len(in_files)} files; Destination: {out_file}') super().concat_files(in_files, out_file) + return in_files @PostProcessor._restrict_to(images=False) def run(self, info): @@ -1161,10 +1166,10 @@ def run(self, info): ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv' out_file = self._downloader.prepare_filename(ie_copy, 'pl_video') - self.concat_files(in_files, out_file) + files_to_delete = self.concat_files(in_files, out_file) info['requested_downloads'] = [{ 'filepath': out_file, 'ext': ie_copy['ext'], }] - return in_files, info + return files_to_delete, info