From f8d4ad9ab00bca71808cd769c04806f51c3578f0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 10 Apr 2021 20:10:30 +0530 Subject: [PATCH] Format selector `mergeall` to download and merge all formats --- README.md | 16 ++++-- yt_dlp/YoutubeDL.py | 130 +++++++++++++++++++++++--------------------- 2 files changed, 81 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 5b9e4edd5e..67c3c4923f 100644 --- a/README.md +++ b/README.md @@ -979,8 +979,9 @@ # FORMAT SELECTION You can also use special names to select particular edge case formats: - `all`: Select all formats - - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio. - - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio. + - `mergeall`: Select and merge all formats (Must be used with `--audio-multistreams`, `--video-multistreams` or both) + - `b*`, `best*`: Select the best quality format irrespective of whether it contains video or audio + - `w*`, `worst*`: Select the worst quality format irrespective of whether it contains video or audio - `b`, `best`: Select the best quality format that contains both video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` - `bv`, `bestvideo`: Select the best quality video-only format. Equivalent to `best*[acodec=none]` @@ -1094,10 +1095,17 @@ # For this case, an output template should be used since # by default, bestvideo and bestaudio will have the same file name. $ yt-dlp -f 'bv,ba' -o '%(title)s.f%(format_id)s.%(ext)s' +# Download and merge the best format that has a video stream, +# and all audio-only formats into one file +$ yt-dlp -f 'bv*+mergeall[vcodec=none]' --audio-multistreams + +# Download and merge the best format that has a video stream, +# and the best 2 audio-only formats into one file +$ yt-dlp -f 'bv*+ba+ba.2' --audio-multistreams # The following examples show the old method (without -S) of format selection -# and how to use -S to achieve a similar but better result +# and how to use -S to achieve a similar but (generally) better result # Download the worst video available (old method) $ yt-dlp -f 'wv*+wa/w' @@ -1178,7 +1186,7 @@ # or the worst video (still prefering framerate greater than 30) if there is no $ yt-dlp -f '((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)' # Download the video with the largest resolution no better than 720p, -# or the video with the smallest resolution available if there is no such video, +# or the video with the smallest resolution available if there is no such video, # prefering larger framerate for formats with the same resolution $ yt-dlp -S 'res:720,fps' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a77e1fe5e2..249274fb60 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1541,6 +1541,66 @@ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, ins selectors.append(current_selector) return selectors + def _merge(formats_pair): + format_1, format_2 = formats_pair + + formats_info = [] + formats_info.extend(format_1.get('requested_formats', (format_1,))) + formats_info.extend(format_2.get('requested_formats', (format_2,))) + + if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: + get_no_more = {"video": False, "audio": False} + for (i, fmt_info) in enumerate(formats_info): + for aud_vid in ["audio", "video"]: + if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': + if get_no_more[aud_vid]: + formats_info.pop(i) + get_no_more[aud_vid] = True + + if len(formats_info) == 1: + return formats_info[0] + + video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] + audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] + + the_only_video = video_fmts[0] if len(video_fmts) == 1 else None + the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None + + output_ext = self.params.get('merge_output_format') + if not output_ext: + if the_only_video: + output_ext = the_only_video['ext'] + elif the_only_audio and not video_fmts: + output_ext = the_only_audio['ext'] + else: + output_ext = 'mkv' + + new_dict = { + 'requested_formats': formats_info, + 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info), + 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info), + 'ext': output_ext, + } + + if the_only_video: + new_dict.update({ + 'width': the_only_video.get('width'), + 'height': the_only_video.get('height'), + 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), + 'fps': the_only_video.get('fps'), + 'vcodec': the_only_video.get('vcodec'), + 'vbr': the_only_video.get('vbr'), + 'stretched_ratio': the_only_video.get('stretched_ratio'), + }) + + if the_only_audio: + new_dict.update({ + 'acodec': the_only_audio.get('acodec'), + 'abr': the_only_audio.get('abr'), + }) + + return new_dict + def _build_selector_function(selector): if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] @@ -1565,14 +1625,22 @@ def selector_function(ctx): return [] elif selector.type == SINGLE: # atom - format_spec = selector.selector if selector.selector is not None else 'best' + format_spec = (selector.selector if selector.selector is not None else 'best').lower() + # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): formats = list(ctx['formats']) if formats: for f in formats: yield f + elif format_spec == 'mergeall': + def selector_function(ctx): + formats = list(ctx['formats']) + merged_format = formats[0] + for f in formats[1:]: + merged_format = _merge((merged_format, f)) + yield merged_format else: format_fallback = False @@ -1618,66 +1686,6 @@ def selector_function(ctx): yield formats[format_idx] elif selector.type == MERGE: # + - def _merge(formats_pair): - format_1, format_2 = formats_pair - - formats_info = [] - formats_info.extend(format_1.get('requested_formats', (format_1,))) - formats_info.extend(format_2.get('requested_formats', (format_2,))) - - if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: - get_no_more = {"video": False, "audio": False} - for (i, fmt_info) in enumerate(formats_info): - for aud_vid in ["audio", "video"]: - if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': - if get_no_more[aud_vid]: - formats_info.pop(i) - get_no_more[aud_vid] = True - - if len(formats_info) == 1: - return formats_info[0] - - video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] - audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] - - the_only_video = video_fmts[0] if len(video_fmts) == 1 else None - the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None - - output_ext = self.params.get('merge_output_format') - if not output_ext: - if the_only_video: - output_ext = the_only_video['ext'] - elif the_only_audio and not video_fmts: - output_ext = the_only_audio['ext'] - else: - output_ext = 'mkv' - - new_dict = { - 'requested_formats': formats_info, - 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info), - 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info), - 'ext': output_ext, - } - - if the_only_video: - new_dict.update({ - 'width': the_only_video.get('width'), - 'height': the_only_video.get('height'), - 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), - 'fps': the_only_video.get('fps'), - 'vcodec': the_only_video.get('vcodec'), - 'vbr': the_only_video.get('vbr'), - 'stretched_ratio': the_only_video.get('stretched_ratio'), - }) - - if the_only_audio: - new_dict.update({ - 'acodec': the_only_audio.get('acodec'), - 'abr': the_only_audio.get('abr'), - }) - - return new_dict - selector_1, selector_2 = map(_build_selector_function, selector.selector) def selector_function(ctx):