From 7d1eb38af154db77341b28b14776b23172a234e0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 29 Jul 2021 08:26:17 +0530 Subject: [PATCH] Add format types `j`, `l`, `q` for outtmpl Closes #345 --- README.md | 8 +++++--- test/test_YoutubeDL.py | 28 ++++++++++++++++++++++------ yt_dlp/YoutubeDL.py | 24 ++++++++++++++++++------ yt_dlp/options.py | 4 ++-- yt_dlp/utils.py | 2 ++ 5 files changed, 49 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 4a8364e57..7322c2a0a 100644 --- a/README.md +++ b/README.md @@ -789,10 +789,11 @@ ## Post-Processing Options: command. An additional field "filepath" that contains the final path of the downloaded file is also available. If no - fields are passed, "%(filepath)s" is - appended to the end of the command + fields are passed, %(filepath)q is appended + to the end of the command --exec-before-download CMD Execute a command before the actual download. The syntax is the same as --exec + but "filepath" is not available --convert-subs FORMAT Convert the subtitles to another format (currently supported: srt|vtt|ass|lrc) (Alias: --convert-subtitles) @@ -917,10 +918,11 @@ # OUTPUT TEMPLATE It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: -1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields +1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `j`, `l`, `q` can be used for converting to **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively To summarize, the general syntax for a field is: ``` diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e1287f222..9a0b286e2 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy +import json from test.helper import FakeYDL, assertRegexpMatches from yt_dlp import YoutubeDL @@ -647,6 +648,7 @@ def test_add_extra_info(self): 'title1': '$PATH', 'title2': '%PATH%', 'title3': 'foo/bar\\test', + 'title4': 'foo "bar" test', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -669,10 +671,12 @@ def test(tmpl, expected, *, info=None, **params): if callable(expected): self.assertTrue(expected(out)) self.assertTrue(expected(fname)) - elif isinstance(expected, compat_str): - self.assertEqual((out, fname), (expected, expected)) + elif isinstance(expected, str): + self.assertEqual(out, expected) + self.assertEqual(fname, expected) else: - self.assertEqual((out, fname), expected) + self.assertEqual(out, expected[0]) + self.assertEqual(fname, expected[1]) # Auto-generated fields test('%(id)s.%(ext)s', '1234.mp4') @@ -741,14 +745,26 @@ def test(tmpl, expected, *, info=None, **params): test('%(width|0)04d', '0000') test('a%(width|)d', 'a', outtmpl_na_placeholder='none') - # Internal formatting FORMATS = self.outtmpl_info['formats'] + sanitize = lambda x: x.replace(':', ' -').replace('"', "'") + + # Custom type casting + test('%(formats.:.id)l', 'id1, id2, id3') + test('%(ext)l', 'mp4') + test('%(formats.:.id) 15l', ' id1, id2, id3') + test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) + if compat_os_name == 'nt': + test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) + else: + test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'")) + + # Internal formatting test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') - test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -'))) - test('%(formats.0)r', (repr(FORMATS[0]), repr(FORMATS[0]).replace(':', ' -'))) + test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0])))) + test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0])))) test('%(height.0)03d', '001') test('%(-height.0)04d', '-001') test('%(formats.-1.id)s', FORMATS[-1]['id']) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3350042c9..6ce0d19c3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -35,6 +35,7 @@ compat_kwargs, compat_numeric_types, compat_os_name, + compat_shlex_quote, compat_str, compat_tokenize_tokenize, compat_urllib_error, @@ -108,6 +109,7 @@ try_get, UnavailableVideoError, url_basename, + variadic, version_tuple, write_json_file, write_string, @@ -871,9 +873,12 @@ def escape_outtmpl(outtmpl): @classmethod def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' - outtmpl = cls.escape_outtmpl(cls._outtmpl_expandpath(outtmpl)) + outtmpl = re.sub( + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'), + lambda mobj: f'{mobj.group(0)[:-1]}s', + cls._outtmpl_expandpath(outtmpl)) try: - outtmpl % collections.defaultdict(int) + cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) return None except ValueError as err: return err @@ -900,7 +905,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -977,8 +982,15 @@ def create_key(outer_mobj): value = default if value is None else value - if fmt == 'c': - value = compat_str(value) + str_fmt = f'{fmt[:-1]}s' + if fmt[-1] == 'l': + value, fmt = ', '.join(variadic(value)), str_fmt + elif fmt[-1] == 'j': + value, fmt = json.dumps(value), str_fmt + elif fmt[-1] == 'q': + value, fmt = compat_shlex_quote(str(value)), str_fmt + elif fmt[-1] == 'c': + value = str(value) if value is None: value, fmt = default, 's' else: @@ -992,7 +1004,7 @@ def create_key(outer_mobj): if fmt[-1] == 'r': # If value is an object, sanitize might convert it to a string # So we convert it to repr first - value, fmt = repr(value), '%ss' % fmt[:-1] + value, fmt = repr(value), str_fmt if fmt[-1] in 'csr': value = sanitize(mobj['fields'].split('.')[-1], value) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 5c3ac0dcd..9b71427d1 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1286,11 +1286,11 @@ def _dict_from_options_callback( 'Execute a command on the file after downloading and post-processing. ' 'Similar syntax to the output template can be used to pass any field as arguments to the command. ' 'An additional field "filepath" that contains the final path of the downloaded file is also available. ' - 'If no fields are passed, "%(filepath)s" is appended to the end of the command')) + 'If no fields are passed, %(filepath)q is appended to the end of the command')) postproc.add_option( '--exec-before-download', metavar='CMD', dest='exec_before_dl_cmd', - help='Execute a command before the actual download. The syntax is the same as --exec') + help='Execute a command before the actual download. The syntax is the same as --exec but "filepath" is not available') postproc.add_option( '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2bd0925b6..998689efe 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4451,8 +4451,10 @@ def q(qid): ) ''' + STR_FORMAT_TYPES = 'diouxXeEfFgGcrs' + def limit_length(s, length): """ Add ellipses to overly long strings """ if s is None: