Merge remote-tracking branch 'yasoob/master'

This commit is contained in:
Philipp Hagemeister 2013-06-25 21:09:15 +02:00
commit d746cd88c2
3 changed files with 43 additions and 0 deletions

View File

@ -686,5 +686,14 @@
"upload_date": "20130624", "upload_date": "20130624",
"uploader": "Hurts" "uploader": "Hurts"
} }
},
{
"name": "Tudou",
"url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
"file": "159447792.f4v",
"md5": "ad7c358a01541e926a1e413612c6b10a",
"info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦"
}
} }
] ]

View File

@ -58,6 +58,7 @@
from .youporn import YouPornIE from .youporn import YouPornIE
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
from .zdf import ZDFIE from .zdf import ZDFIE
from .tudou import TudouIE
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
@ -129,6 +130,7 @@ def gen_extractors():
BreakIE(), BreakIE(),
VevoIE(), VevoIE(),
JukeboxIE(), JukeboxIE(),
TudouIE(),
GenericIE() GenericIE()
] ]

View File

@ -0,0 +1,32 @@
import re
from .common import InfoExtractor
class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(2).replace('.html','')
webpage = self._download_webpage(url, video_id)
video_id = re.search('"k":(.+?),',webpage).group(1)
title = re.search(",kw:\"(.+)\"",webpage)
if title is None:
title = re.search(",kw: \'(.+)\'",webpage)
title = title.group(1)
thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
if thumbnail_url is None:
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
thumbnail_url = thumbnail_url.group(1)
info_url = "http://v2.tudou.com/f?id="+str(video_id)
webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
ext = (final_url.split('?')[0]).split('.')[-1]
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
}]