Check load_more_widget_html for feed paging

This commit is contained in:
Tim Sogard 2014-07-27 17:14:29 -07:00
parent 65bc504db8
commit 1a9b9649fb

View File

@ -1325,6 +1325,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
u'%s feed' % self._FEED_NAME, u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i) u'Downloading page %s' % i)
feed_html = info.get('feed_html') or info.get('content_html') feed_html = info.get('feed_html') or info.get('content_html')
load_more_widget_html = info.get('load_more_widget_html') or feed_html
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html) m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids) ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend( feed_entries.extend(
@ -1332,7 +1333,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
for video_id in ids) for video_id in ids)
mobj = re.search( mobj = re.search(
r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)', r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
feed_html) load_more_widget_html)
if mobj is None: if mobj is None:
break break
paging = mobj.group('paging') paging = mobj.group('paging')