From 55b3e45bbab3af5132d45c8f3f8f19fae5f5f1d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 23 Oct 2013 14:38:03 +0200 Subject: [PATCH] [vimeo] Fix pro videos and player.vimeo.com urls The old process can still be used for those videos. Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info. --- youtube_dl/extractor/common.py | 5 +++-- youtube_dl/extractor/vimeo.py | 6 ++++++ youtube_dl/utils.py | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7d7ce5d98..aaa5c24c8 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ clean_html, compiled_regex_type, ExtractorError, + RegexNotFoundError, unescapeHTML, ) @@ -231,7 +232,7 @@ def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0 Perform a regex search on the given string, using a single or a list of patterns returning the first matching group. In case of failure return a default value or raise a WARNING or a - ExtractorError, depending on fatal, specifying the field name. + RegexNotFoundError, depending on fatal, specifying the field name. """ if isinstance(pattern, (str, compat_str, compiled_regex_type)): mobj = re.search(pattern, string, flags) @@ -251,7 +252,7 @@ def _search_regex(self, pattern, string, name, default=None, fatal=True, flags=0 elif default is not None: return default elif fatal: - raise ExtractorError(u'Unable to extract %s' % _name) + raise RegexNotFoundError(u'Unable to extract %s' % _name) else: self._downloader.report_warning(u'unable to extract %s; ' u'please report this issue on http://yt-dl.org/bug' % _name) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index bf48671b3..ad2f75d6b 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -10,6 +10,7 @@ clean_html, get_element_by_attribute, ExtractorError, + RegexNotFoundError, std_headers, unsmuggle_url, ) @@ -133,6 +134,11 @@ def _real_extract(self, url, new_video=True): r' data-config-url="(.+?)"', webpage, u'config URL') config_json = self._download_webpage(config_url, video_id) config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], + webpage, u'info section', flags=re.DOTALL) + config = json.loads(config) except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bfb8f6bcd..1d9785341 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -572,6 +572,11 @@ def format_traceback(self): return u''.join(traceback.format_tb(self.traceback)) +class RegexNotFoundError(ExtractorError): + """Error when a regex didn't match""" + pass + + class DownloadError(Exception): """Download Error exception.