[lazy_extractor] Create instance only after pre-checking archive

This commit is contained in:
pukkandan 2021-08-23 04:45:30 +05:30
parent 5bc4a65eea
commit 251ae04e6a
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
4 changed files with 14 additions and 12 deletions

View File

@ -9,6 +9,7 @@ def __getattr__(cls, name):
class LazyLoadExtractor(metaclass=LazyLoadMetaClass): class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
_module = None _module = None
_WORKING = True
@classmethod @classmethod
def _get_real_class(cls): def _get_real_class(cls):

View File

@ -30,16 +30,14 @@
with open('devscripts/lazy_load_template.py', 'rt') as f: with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read() module_template = f.read()
CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
module_contents = [ module_contents = [
module_template, module_template,
getsource(InfoExtractor.ie_key), *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
getsource(InfoExtractor._match_valid_url),
getsource(InfoExtractor.suitable),
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
ie_template = ''' ie_template = '''
class {name}({bases}): class {name}({bases}):
_VALID_URL = {valid_url!r}
_module = '{module}' _module = '{module}'
''' '''
@ -60,14 +58,17 @@ def get_base_name(base):
def build_lazy_ie(ie, name): def build_lazy_ie(ie, name):
valid_url = getattr(ie, '_VALID_URL', None)
s = ie_template.format( s = ie_template.format(
name=name, name=name,
bases=', '.join(map(get_base_name, ie.__bases__)), bases=', '.join(map(get_base_name, ie.__bases__)),
valid_url=valid_url,
module=ie.__module__) module=ie.__module__)
valid_url = getattr(ie, '_VALID_URL', None)
if valid_url:
s += f' _VALID_URL = {valid_url!r}\n'
if not ie._WORKING:
s += f' _WORKING = False\n'
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
s += '\n' + getsource(ie.suitable) s += f'\n{getsource(ie.suitable)}'
if hasattr(ie, '_make_valid_url'): if hasattr(ie, '_make_valid_url'):
# search extractors # search extractors
s += make_valid_template.format(valid_url=ie._make_valid_url()) s += make_valid_template.format(valid_url=ie._make_valid_url())

View File

@ -1179,7 +1179,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
ie_key = 'Generic' ie_key = 'Generic'
if ie_key: if ie_key:
ies = [self.get_info_extractor(ie_key)] ies = [get_info_extractor(ie_key)]
else: else:
ies = self._ies ies = self._ies
@ -1188,7 +1188,6 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
continue continue
ie_key = ie.ie_key() ie_key = ie.ie_key()
ie = self.get_info_extractor(ie_key)
if not ie.working(): if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, ' self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.') 'and will probably not work.')
@ -1198,7 +1197,8 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
self.to_screen("[%s] %s: has already been recorded in archive" % ( self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id)) ie_key, temp_id))
break break
return self.__extract_info(url, ie, download, extra_info, process) return self.__extract_info(url, self.get_info_extractor(ie.ie_key()),
download, extra_info, process)
else: else:
self.report_error('no suitable InfoExtractor for URL %s' % url) self.report_error('no suitable InfoExtractor for URL %s' % url)

View File

@ -110,14 +110,14 @@ def _real_main(argv=None):
if opts.list_extractors: if opts.list_extractors:
for ie in list_extractors(opts.age_limit): for ie in list_extractors(opts.age_limit):
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
matchedUrls = [url for url in all_urls if ie.suitable(url)] matchedUrls = [url for url in all_urls if ie.suitable(url)]
for mu in matchedUrls: for mu in matchedUrls:
write_string(' ' + mu + '\n', out=sys.stdout) write_string(' ' + mu + '\n', out=sys.stdout)
sys.exit(0) sys.exit(0)
if opts.list_extractor_descriptions: if opts.list_extractor_descriptions:
for ie in list_extractors(opts.age_limit): for ie in list_extractors(opts.age_limit):
if not ie._WORKING: if not ie.working():
continue continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME) desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if desc is False: if desc is False: