Some minor fixes and refactoring (see desc)

* [utils] Fix issues with reversal
* check_formats should catch `DownloadError`, not `ExtractorError`
* Simplify format selectors with `LazyList` and `yield from`
This commit is contained in:
pukkandan 2021-06-27 07:35:58 +05:30
parent b1e60d1806
commit 981052c9c6
3 changed files with 38 additions and 44 deletions

View File

@ -1545,8 +1545,8 @@ def test_LazyList(self):
self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(repr(LazyList(it)), repr(it))
self.assertEqual(str(LazyList(it)), str(it)) self.assertEqual(str(LazyList(it)), str(it))
self.assertEqual(list(reversed(LazyList(it))), it[::-1]) self.assertEqual(list(LazyList(it).reverse()), it[::-1])
self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7])
def test_LazyList_laziness(self): def test_LazyList_laziness(self):
@ -1559,13 +1559,13 @@ def test(ll, idx, val, cache):
test(ll, 5, 5, range(6)) test(ll, 5, 5, range(6))
test(ll, -3, 7, range(10)) test(ll, -3, 7, range(10))
ll = reversed(LazyList(range(10))) ll = LazyList(range(10)).reverse()
test(ll, -1, 0, range(1)) test(ll, -1, 0, range(1))
test(ll, 3, 6, range(10)) test(ll, 3, 6, range(10))
ll = LazyList(itertools.count()) ll = LazyList(itertools.count())
test(ll, 10, 10, range(11)) test(ll, 10, 10, range(11))
reversed(ll) ll.reverse()
test(ll, -15, 14, range(15)) test(ll, -15, 14, range(15))

View File

@ -1758,6 +1758,8 @@ def _merge(formats_pair):
return new_dict return new_dict
def _check_formats(formats): def _check_formats(formats):
if not check_formats:
yield from formats
for f in formats: for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id']) self.to_screen('[info] Testing format %s' % f['format_id'])
temp_file = tempfile.NamedTemporaryFile( temp_file = tempfile.NamedTemporaryFile(
@ -1765,16 +1767,16 @@ def _check_formats(formats):
dir=self.get_output_path('temp') or None) dir=self.get_output_path('temp') or None)
temp_file.close() temp_file.close()
try: try:
dl, _ = self.dl(temp_file.name, f, test=True) success, _ = self.dl(temp_file.name, f, test=True)
except (ExtractorError, IOError, OSError, ValueError) + network_exceptions: except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
dl = False success = False
finally: finally:
if os.path.exists(temp_file.name): if os.path.exists(temp_file.name):
try: try:
os.remove(temp_file.name) os.remove(temp_file.name)
except OSError: except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
if dl: if success:
yield f yield f
else: else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
@ -1785,8 +1787,7 @@ def _build_selector_function(selector):
def selector_function(ctx): def selector_function(ctx):
for f in fs: for f in fs:
for format in f(ctx): yield from f(ctx)
yield format
return selector_function return selector_function
elif selector.type == GROUP: # () elif selector.type == GROUP: # ()
@ -1802,22 +1803,24 @@ def selector_function(ctx):
return picked_formats return picked_formats
return [] return []
elif selector.type == MERGE: # +
selector_1, selector_2 = map(_build_selector_function, selector.selector)
def selector_function(ctx):
for pair in itertools.product(
selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
yield _merge(pair)
elif selector.type == SINGLE: # atom elif selector.type == SINGLE: # atom
format_spec = selector.selector or 'best' format_spec = selector.selector or 'best'
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
if format_spec == 'all': if format_spec == 'all':
def selector_function(ctx): def selector_function(ctx):
formats = list(ctx['formats']) yield from _check_formats(ctx['formats'])
if check_formats:
formats = _check_formats(formats)
for f in formats:
yield f
elif format_spec == 'mergeall': elif format_spec == 'mergeall':
def selector_function(ctx): def selector_function(ctx):
formats = ctx['formats'] formats = list(_check_formats(ctx['formats']))
if check_formats:
formats = list(_check_formats(formats))
if not formats: if not formats:
return return
merged_format = formats[-1] merged_format = formats[-1]
@ -1855,29 +1858,17 @@ def selector_function(ctx):
def selector_function(ctx): def selector_function(ctx):
formats = list(ctx['formats']) formats = list(ctx['formats'])
if not formats:
return
matches = list(filter(filter_f, formats)) if filter_f is not None else formats matches = list(filter(filter_f, formats)) if filter_f is not None else formats
if format_fallback and ctx['incomplete_formats'] and not matches: if format_fallback and ctx['incomplete_formats'] and not matches:
# for extractors with incomplete formats (audio only (soundcloud) # for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to # or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format # best/worst {video,audio}-only format
matches = formats matches = formats
if format_reverse: matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
matches = matches[::-1] try:
if check_formats:
matches = list(itertools.islice(_check_formats(matches), format_idx))
n = len(matches)
if -n <= format_idx - 1 < n:
yield matches[format_idx - 1] yield matches[format_idx - 1]
except IndexError:
elif selector.type == MERGE: # + return
selector_1, selector_2 = map(_build_selector_function, selector.selector)
def selector_function(ctx):
for pair in itertools.product(
selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
yield _merge(pair)
filters = [self._build_format_filter(f) for f in selector.filters] filters = [self._build_format_filter(f) for f in selector.filters]
@ -1971,7 +1962,7 @@ def test_thumbnail(t):
t['resolution'] = '%dx%d' % (t['width'], t['height']) t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url']) t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats'): if self.params.get('check_formats'):
info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1]))) info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
@ -3267,7 +3258,7 @@ def _write_thumbnails(self, info_dict, filename): # return the extensions
multiple = write_all and len(thumbnails) > 1 multiple = write_all and len(thumbnails) > 1
ret = [] ret = []
for t in thumbnails[::1 if write_all else -1]: for t in thumbnails[::-1]:
thumb_ext = determine_ext(t['url'], 'jpg') thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '%s.' % t['id'] if multiple else '' suffix = '%s.' % t['id'] if multiple else ''
thumb_display_id = '%s ' % t['id'] if multiple else '' thumb_display_id = '%s ' % t['id'] if multiple else ''

View File

@ -3976,20 +3976,23 @@ def __init__(self, iterable):
def __iter__(self): def __iter__(self):
if self.__reversed: if self.__reversed:
# We need to consume the entire iterable to iterate in reverse # We need to consume the entire iterable to iterate in reverse
yield from self.exhaust()[::-1] yield from self.exhaust()
return return
yield from self.__cache yield from self.__cache
for item in self.__iterable: for item in self.__iterable:
self.__cache.append(item) self.__cache.append(item)
yield item yield item
def exhaust(self): def __exhaust(self):
''' Evaluate the entire iterable '''
self.__cache.extend(self.__iterable) self.__cache.extend(self.__iterable)
return self.__cache return self.__cache
def exhaust(self):
''' Evaluate the entire iterable '''
return self.__exhaust()[::-1 if self.__reversed else 1]
@staticmethod @staticmethod
def _reverse_index(x): def __reverse_index(x):
return -(x + 1) return -(x + 1)
def __getitem__(self, idx): def __getitem__(self, idx):
@ -3998,18 +4001,18 @@ def __getitem__(self, idx):
start = idx.start if idx.start is not None else 0 if step > 0 else -1 start = idx.start if idx.start is not None else 0 if step > 0 else -1
stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0 stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
if self.__reversed: if self.__reversed:
start, stop, step = map(self._reverse_index, (start, stop, step)) (start, stop), step = map(self.__reverse_index, (start, stop)), -step
idx = slice(start, stop, step) idx = slice(start, stop, step)
elif isinstance(idx, int): elif isinstance(idx, int):
if self.__reversed: if self.__reversed:
idx = self._reverse_index(idx) idx = self.__reverse_index(idx)
start = stop = idx start = stop = idx
else: else:
raise TypeError('indices must be integers or slices') raise TypeError('indices must be integers or slices')
if start < 0 or stop < 0: if start < 0 or stop < 0:
# We need to consume the entire iterable to be able to slice from the end # We need to consume the entire iterable to be able to slice from the end
# Obviously, never use this with infinite iterables # Obviously, never use this with infinite iterables
return self.exhaust()[idx] return self.__exhaust()[idx]
n = max(start, stop) - len(self.__cache) + 1 n = max(start, stop) - len(self.__cache) + 1
if n > 0: if n > 0:
@ -4027,7 +4030,7 @@ def __len__(self):
self.exhaust() self.exhaust()
return len(self.__cache) return len(self.__cache)
def __reversed__(self): def reverse(self):
self.__reversed = not self.__reversed self.__reversed = not self.__reversed
return self return self