mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-21 20:41:30 +01:00
[extractor/youtube] Fix continuation loop with no comments (#7148)
Deep check the response for incomplete data. Authored by: coletdjnz
This commit is contained in:
parent
c2502cfed9
commit
18f8fba7c8
@ -3314,7 +3314,7 @@ def extract_header(contents):
|
|||||||
expected_comment_count = self._get_count(
|
expected_comment_count = self._get_count(
|
||||||
comments_header_renderer, 'countText', 'commentsCount')
|
comments_header_renderer, 'countText', 'commentsCount')
|
||||||
|
|
||||||
if expected_comment_count:
|
if expected_comment_count is not None:
|
||||||
tracker['est_total'] = expected_comment_count
|
tracker['est_total'] = expected_comment_count
|
||||||
self.to_screen(f'Downloading ~{expected_comment_count} comments')
|
self.to_screen(f'Downloading ~{expected_comment_count} comments')
|
||||||
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
|
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
|
||||||
@ -3385,7 +3385,7 @@ def extract_thread(contents):
|
|||||||
if not tracker:
|
if not tracker:
|
||||||
tracker = dict(
|
tracker = dict(
|
||||||
running_total=0,
|
running_total=0,
|
||||||
est_total=0,
|
est_total=None,
|
||||||
current_page_thread=0,
|
current_page_thread=0,
|
||||||
total_parent_comments=0,
|
total_parent_comments=0,
|
||||||
total_reply_comments=0,
|
total_reply_comments=0,
|
||||||
@ -3418,11 +3418,13 @@ def extract_thread(contents):
|
|||||||
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
||||||
is_forced_continuation = True
|
is_forced_continuation = True
|
||||||
|
|
||||||
|
continuation_items_path = (
|
||||||
|
'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
|
||||||
for page_num in itertools.count(0):
|
for page_num in itertools.count(0):
|
||||||
if not continuation:
|
if not continuation:
|
||||||
break
|
break
|
||||||
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
|
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
|
||||||
comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
|
comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
|
||||||
if page_num == 0:
|
if page_num == 0:
|
||||||
if is_first_continuation:
|
if is_first_continuation:
|
||||||
note_prefix = 'Downloading comment section API JSON'
|
note_prefix = 'Downloading comment section API JSON'
|
||||||
@ -3433,11 +3435,18 @@ def extract_thread(contents):
|
|||||||
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
|
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
|
||||||
' ' if parent else '', ' replies' if parent else '',
|
' ' if parent else '', ' replies' if parent else '',
|
||||||
page_num, comment_prog_str)
|
page_num, comment_prog_str)
|
||||||
|
|
||||||
|
# Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
|
||||||
|
# Ignore check if YouTube says the comment count is 0.
|
||||||
|
check_get_keys = None
|
||||||
|
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||||
|
check_get_keys = [[*continuation_items_path, ..., (
|
||||||
|
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
||||||
try:
|
try:
|
||||||
response = self._extract_response(
|
response = self._extract_response(
|
||||||
item_id=None, query=continuation,
|
item_id=None, query=continuation,
|
||||||
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
||||||
check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
|
check_get_keys=check_get_keys)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# Ignore incomplete data error for replies if retries didn't work.
|
# Ignore incomplete data error for replies if retries didn't work.
|
||||||
# This is to allow any other parent comments and comment threads to be downloaded.
|
# This is to allow any other parent comments and comment threads to be downloaded.
|
||||||
@ -3449,15 +3458,8 @@ def extract_thread(contents):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
is_forced_continuation = False
|
is_forced_continuation = False
|
||||||
continuation_contents = traverse_obj(
|
|
||||||
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
|
|
||||||
|
|
||||||
continuation = None
|
continuation = None
|
||||||
for continuation_section in continuation_contents:
|
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||||
continuation_items = traverse_obj(
|
|
||||||
continuation_section,
|
|
||||||
(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
|
|
||||||
get_all=False, expected_type=list) or []
|
|
||||||
if is_first_continuation:
|
if is_first_continuation:
|
||||||
continuation = extract_header(continuation_items)
|
continuation = extract_header(continuation_items)
|
||||||
is_first_continuation = False
|
is_first_continuation = False
|
||||||
|
Loading…
Reference in New Issue
Block a user