From 3442b30ab2a5f168caa45a7371aca0f4103fdd86 Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Sun, 18 May 2014 23:15:09 +0930 Subject: [PATCH 1/4] [generic] Support data-video-url for YouTube embeds (Fixes #2862) --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0e5cf0efb..69381f777 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -473,7 +473,7 @@ def _real_extract(self, url): # Look for embedded YouTube player matches = re.findall(r'''(?x) - (?:]+?src=|embedSWF\(\s*) + (?:]+?src=|data-video-url=|embedSWF\(\s*) (["\'])(?P(?:https?:)?//(?:www\.)?youtube\.com/ (?:embed|v)/.+?) \1''', webpage) From 212a5e28bae61f764e8e802e403a15cbe62f0dc6 Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Wed, 21 May 2014 19:04:55 +0930 Subject: [PATCH 2/4] Add a duplicate check to /extractor/common.py playlist_result function --- youtube_dl/extractor/common.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index db472aace..26dd9882f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -343,6 +343,16 @@ def url_result(url, ie=None, video_id=None): @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" + # Ensure we don't have any duplicates in the playlist + seen = set() + new_list = [] + for url in entries: + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + entries = new_list + video_info = {'_type': 'playlist', 'entries': entries} if playlist_id: From 610134730abfdaaa226de2092d8ad5d731d5b54b Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Wed, 21 May 2014 19:25:37 +0930 Subject: [PATCH 3/4] Add a _TEST_ --- youtube_dl/extractor/generic.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 69381f777..c1e533821 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -260,6 +260,20 @@ class GenericIE(InfoExtractor): 'uploader': 'Spi0n', }, 'add_ie': ['Dailymotion'], + }, + # YouTube embed via + { + 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM', + 'md5': 'c267b1ab6d736057d64babaa37e07a66', + 'info_dict': { + 'id': 'Ybd-qmqYYpA', + 'ext': 'mp4', + 'title': 'Asphalt 8: Airborne - Chinese Great Wall - Android Game Trailer', + 'uploader': 'gameloftandroid', + 'uploader_id': 'gameloftandroid', + 'upload_date': '20140321', + 'description': 'md5:9c6dca5dd75b7131ce482ccf080749d6' + } } ] From 37e3cbe22e0bfa6b98a6343be88e1c8c2c7ac41f Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Sun, 1 Jun 2014 01:16:35 +0930 Subject: [PATCH 4/4] Move duplicate check to generic.py --- youtube_dl/extractor/common.py | 10 ---------- youtube_dl/extractor/generic.py | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 26dd9882f..db472aace 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -343,16 +343,6 @@ def url_result(url, ie=None, video_id=None): @staticmethod def playlist_result(entries, playlist_id=None, playlist_title=None): """Returns a playlist""" - # Ensure we don't have any duplicates in the playlist - seen = set() - new_list = [] - for url in entries: - theurl = tuple(url.items()) - if theurl not in seen: - seen.add(theurl) - new_list.append(url) - entries = new_list - video_info = {'_type': 'playlist', 'entries': entries} if playlist_id: diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c1e533821..dfa8d6153 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -494,6 +494,14 @@ def _real_extract(self, url): if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube') for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -503,6 +511,14 @@ def _real_extract(self, url): if matches: urlrs = [self.url_result(unescapeHTML(tuppl[1])) for tuppl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) @@ -615,6 +631,14 @@ def _real_extract(self, url): if matches: urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie') for eurl in matches] + # First, ensure we have a duplicate free list of entries + seen = set() + new_list = [] + theurl = tuple(url.items()) + if theurl not in seen: + seen.add(theurl) + new_list.append(url) + urlrs = new_list return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title)