FunnyOrDie IE (Fixes #599)

This commit is contained in:
Philipp Hagemeister 2012-12-20 21:28:27 +01:00
parent 69a3883199
commit 21a9c6aaac
3 changed files with 56 additions and 0 deletions

View File

@ -87,5 +87,11 @@
"name": "GooglePlus",
"url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH",
"file": "ZButuJc6CtH.flv"
},
{
"name": "FunnyOrDie",
"url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",
"file": "0732f586d7.mp4",
"md5": "f647e9e90064b53b6e046e75d0241fbd"
}
]

View File

@ -3630,3 +3630,52 @@ def _real_extract(self, url):
break
offset += limit
return info
class FunnyOrDieIE(InfoExtractor):
_VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
IE_NAME = u'FunnyOrDie'
def report_extraction(self, video_id):
self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
video_id = mobj.group('id')
self.report_extraction(video_id)
try:
urlh = compat_urllib_request.urlopen(url)
webpage_bytes = urlh.read()
webpage = webpage_bytes.decode('utf-8', 'ignore')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
if not m:
self._downloader.trouble(u'ERROR: unable to find video information')
video_url = unescapeHTML(m.group('url'))
print(video_url)
m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage)
if not m:
self._downloader.trouble(u'Cannot find video title')
title = unescapeHTML(m.group('title'))
m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
if m:
desc = unescapeHTML(m.group('desc'))
else:
desc = None
info = {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': title,
'description': desc,
}
return [info]

View File

@ -400,6 +400,7 @@ def gen_extractors():
ArteTvIE(),
NBAIE(),
JustinTVIE(),
FunnyOrDieIE(),
GenericIE()
]