From b82f815f373818ba99ee43660e9255e8f4ecac62 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 6 Dec 2014 14:02:19 +0100 Subject: [PATCH] Allow iterators for playlist result entries --- youtube_dl/YoutubeDL.py | 20 ++++++++++++++------ youtube_dl/extractor/common.py | 4 ++-- youtube_dl/extractor/youtube.py | 18 ++++++++++-------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f89ac4e1de..56dc3d4617 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -7,6 +7,7 @@ import datetime import errno import io +import itertools import json import locale import os @@ -654,21 +655,28 @@ def make_result(embedded_info): if playlistend == -1: playlistend = None - if isinstance(ie_result['entries'], list): - n_all_entries = len(ie_result['entries']) - entries = ie_result['entries'][playliststart:playlistend] + ie_entries = ie_result['entries'] + if isinstance(ie_entries, list): + n_all_entries = len(ie_entries) + entries = ie_entries[playliststart:playlistend] n_entries = len(entries) self.to_screen( "[%s] playlist %s: Collected %d video ids (downloading %d of them)" % (ie_result['extractor'], playlist, n_all_entries, n_entries)) - else: - assert isinstance(ie_result['entries'], PagedList) - entries = ie_result['entries'].getslice( + elif isinstance(ie_entries, PagedList): + entries = ie_entries.getslice( playliststart, playlistend) n_entries = len(entries) self.to_screen( "[%s] playlist %s: Downloading %d videos" % (ie_result['extractor'], playlist, n_entries)) + else: # iterable + entries = list(itertools.islice( + ie_entries, playliststart, playlistend)) + n_entries = len(entries) + self.to_screen( + "[%s] playlist %s: Downloading %d videos" % + (ie_result['extractor'], playlist, n_entries)) for i, entry in enumerate(entries, 1): self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries)) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5b5e33cea8..cb6081dd08 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -158,8 +158,8 @@ class InfoExtractor(object): _type "playlist" indicates multiple videos. - There must be a key "entries", which is a list or a PagedList object, each - element of which is a valid dictionary under this specfication. + There must be a key "entries", which is a list, an iterable, or a PagedList + object, each element of which is a valid dictionary by this specification. Additionally, playlists can have "title" and "id" attributes with the same semantics as videos (see above). diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index addef9594c..8b6e591a43 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1262,8 +1262,12 @@ def _real_extract(self, url): # The videos are contained in a single page # the ajax pages can't be used, they are empty video_ids = self.extract_videos_from_page(channel_page) - else: - # Download all channel pages using the json-based channel_ajax query + entries = [ + self.url_result(video_id, 'Youtube', video_id=video_id) + for video_id in video_ids] + return self.playlist_result(entries, channel_id) + + def _entries(): for pagenum in itertools.count(1): url = self._MORE_PAGES_URL % (pagenum, channel_id) page = self._download_json( @@ -1271,16 +1275,14 @@ def _real_extract(self, url): transform_source=uppercase_escape) ids_in_page = self.extract_videos_from_page(page['content_html']) - video_ids.extend(ids_in_page) + for video_id in ids_in_page: + yield self.url_result( + video_id, 'Youtube', video_id=video_id) if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: break - self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) - - url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) - for video_id in video_ids] - return self.playlist_result(url_entries, channel_id) + return self.playlist_result(_entries(), channel_id) class YoutubeUserIE(InfoExtractor):