Allow iterators for playlist result entries

This commit is contained in:
Philipp Hagemeister 2014-12-06 14:02:19 +01:00
parent 158f8cadc0
commit b82f815f37
3 changed files with 26 additions and 16 deletions

View File

@ -7,6 +7,7 @@
import datetime import datetime
import errno import errno
import io import io
import itertools
import json import json
import locale import locale
import os import os
@ -654,21 +655,28 @@ def make_result(embedded_info):
if playlistend == -1: if playlistend == -1:
playlistend = None playlistend = None
if isinstance(ie_result['entries'], list): ie_entries = ie_result['entries']
n_all_entries = len(ie_result['entries']) if isinstance(ie_entries, list):
entries = ie_result['entries'][playliststart:playlistend] n_all_entries = len(ie_entries)
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" % "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries)) (ie_result['extractor'], playlist, n_all_entries, n_entries))
else: elif isinstance(ie_entries, PagedList):
assert isinstance(ie_result['entries'], PagedList) entries = ie_entries.getslice(
entries = ie_result['entries'].getslice(
playliststart, playlistend) playliststart, playlistend)
n_entries = len(entries) n_entries = len(entries)
self.to_screen( self.to_screen(
"[%s] playlist %s: Downloading %d videos" % "[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries)) (ie_result['extractor'], playlist, n_entries))
else: # iterable
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1): for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))

View File

@ -158,8 +158,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos. _type "playlist" indicates multiple videos.
There must be a key "entries", which is a list or a PagedList object, each There must be a key "entries", which is a list, an iterable, or a PagedList
element of which is a valid dictionary under this specfication. object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above). semantics as videos (see above).

View File

@ -1262,8 +1262,12 @@ def _real_extract(self, url):
# The videos are contained in a single page # The videos are contained in a single page
# the ajax pages can't be used, they are empty # the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page) video_ids = self.extract_videos_from_page(channel_page)
else: entries = [
# Download all channel pages using the json-based channel_ajax query self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(entries, channel_id)
def _entries():
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id) url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json( page = self._download_json(
@ -1271,16 +1275,14 @@ def _real_extract(self, url):
transform_source=uppercase_escape) transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html']) ids_in_page = self.extract_videos_from_page(page['content_html'])
video_ids.extend(ids_in_page) for video_id in ids_in_page:
yield self.url_result(
video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break break
self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) return self.playlist_result(_entries(), channel_id)
url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
for video_id in video_ids]
return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor): class YoutubeUserIE(InfoExtractor):