Add all format filtering operators also to --match-filter

PR: https://github.com/ytdl-org/youtube-dl/pull/27361

Authored by: max-te
This commit is contained in:
Max Teegen 2021-06-13 16:25:19 +02:00 committed by pukkandan
parent 678da2f21b
commit 77b87f0519
4 changed files with 49 additions and 42 deletions

View File

@ -338,25 +338,21 @@ ## Video Selection:
COUNT views COUNT views
--max-views COUNT Do not download any videos with more than --max-views COUNT Do not download any videos with more than
COUNT views COUNT views
--match-filter FILTER Generic video filter. Specify any key (see --match-filter FILTER Generic video filter. Any field (see
"OUTPUT TEMPLATE" for a list of available "OUTPUT TEMPLATE") can be compared with a
keys) to match if the key is present, !key number or a quoted string using the
to check if the key is not present, operators defined in "Filtering formats".
key>NUMBER (like "view_count > 12", also You can also simply specify a field to
works with >=, <, <=, !=, =) to compare match if the field is present and "!field"
against a number, key = 'LITERAL' (like to check if the field is not present.
"uploader = 'Mike Smith'", also works with Multiple filters can be checked using "&".
!=) to match against a string literal and & For example, to only match videos that are
to require multiple matches. Values which not live, has a like count more than 100, a
are not known are excluded unless you put a dislike count less than 50 (or the dislike
question mark (?) after the operator. For field is not available), and also has a
example, to only match videos that have description that contains "python", use
been liked more than 100 times and disliked --match-filter "!is_live & like_count>100 &
less than 50 times (or the dislike dislike_count<?50 & description*='python'"
functionality is not available at the given
service), but who also have a description,
use --match-filter "like_count > 100 &
dislike_count <? 50 & description"
--no-match-filter Do not use generic video filter (default) --no-match-filter Do not use generic video filter (default)
--no-playlist Download only the video, if the URL refers --no-playlist Download only the video, if the URL refers
to a video and a playlist to a video and a playlist

View File

@ -1207,7 +1207,6 @@ def test_render_table(self):
'9999 51') '9999 51')
def test_match_str(self): def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200})) self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200}))
@ -1224,6 +1223,17 @@ def test_match_str(self):
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
self.assertFalse(match_str( self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description', 'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'})) {'like_count': 90, 'description': 'foo'}))

View File

@ -375,22 +375,16 @@ def _dict_from_options_callback(
'--match-filter', '--match-filter',
metavar='FILTER', dest='match_filter', default=None, metavar='FILTER', dest='match_filter', default=None,
help=( help=(
'Generic video filter. ' 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to ' 'number or a string using the operators defined in "Filtering formats". '
'match if the key is present, ' 'You can also simply specify a field to match if the field is present '
'!key to check if the key is not present, ' 'and "!field" to check if the field is not present. '
'key>NUMBER (like "view_count > 12", also works with ' 'Multiple filters can be checked using "&". '
'>=, <, <=, !=, =) to compare against a number, ' 'For example, to only match videos that are not live, '
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) ' 'has a like count more than 100, a dislike count less than 50 '
'to match against a string literal ' '(or the dislike field is not available), and also has a description '
'and & to require multiple matches. ' 'that contains "python", use --match-filter "!is_live & '
'Values which are not known are excluded unless you ' 'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
'put a question mark (?) after the operator. '
'For example, to only match videos that have been liked more than '
'100 times and disliked less than 50 times (or the dislike '
'functionality is not available at the given service), but who '
'also have a description, use --match-filter '
'"like_count > 100 & dislike_count <? 50 & description"'))
selection.add_option( selection.add_option(
'--no-match-filter', '--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None, metavar='FILTER', dest='match_filter', action='store_const', const=None,

View File

@ -4663,17 +4663,20 @@ def filter_using_list(row, filterArray):
def _match_one(filter_part, dct): def _match_one(filter_part, dct):
# TODO: Generalize code with YoutubeDL._build_format_filter
COMPARISON_OPERATORS = { COMPARISON_OPERATORS = {
'<': operator.lt, '<': operator.lt,
'<=': operator.le, '<=': operator.le,
'>': operator.gt, '>': operator.gt,
'>=': operator.ge, '>=': operator.ge,
'=': operator.eq, '=': operator.eq,
'!=': operator.ne, '*=': operator.contains,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
} }
operator_rex = re.compile(r'''(?x)\s* operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+) (?P<key>[a-z_]+)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?: (?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct):
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part) m = operator_rex.search(filter_part)
if m: if m:
op = COMPARISON_OPERATORS[m.group('op')] unnegated_op = COMPARISON_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not unnegated_op(attr, value)
else:
op = unnegated_op
actual_value = dct.get(m.group('key')) actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None if (m.group('quotedstrval') is not None
or m.group('strval') is not None or m.group('strval') is not None
@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct):
# https://github.com/ytdl-org/youtube-dl/issues/11082). # https://github.com/ytdl-org/youtube-dl/issues/11082).
or actual_value is not None and m.group('intval') is not None or actual_value is not None and m.group('intval') is not None
and isinstance(actual_value, compat_str)): and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
quote = m.group('quote') quote = m.group('quote')
if quote is not None: if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote) comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else: else:
if m.group('op') in ('*=', '^=', '$='):
raise ValueError(
'Operator %s only supports string values!' % m.group('op'))
try: try:
comparison_value = int(m.group('intval')) comparison_value = int(m.group('intval'))
except ValueError: except ValueError: