From b30b8698ea11e85079cc9e392cdf26f4e61671c4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 30 Apr 2014 02:23:51 +0200 Subject: [PATCH] [generic] Allow multiple matches for generic hits (Fixes #2818) --- youtube_dl/extractor/generic.py | 85 ++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index cfb009d795..58092da38e 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -637,70 +637,77 @@ def _real_extract(self, url): return self.url_result(smotri_url, 'Smotri') # Start with something easy: JW Player in SWFObject - mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) - if mobj is None: + found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) + if not found: # Look for gorilla-vid style embedding - mobj = re.search(r'''(?sx) + found = re.findall(r'''(?sx) (?: jw_plugins| JWPlayerOptions| jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup ) .*?file\s*:\s*["\'](.*?)["\']''', webpage) - if mobj is None: + if not found: # Broaden the search a little bit - mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) - if mobj is None: - # Broaden the search a little bit: JWPlayer JS loader - mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) - - if mobj is None: + found = re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) + if not found: + # Broaden the findall a little bit: JWPlayer JS loader + found = re.findall(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage) + if not found: # Try to find twitter cards info - mobj = re.search(r'.*?.*?