[9gag] Add extractor

This commit is contained in:
Philipp Hagemeister 2013-12-05 14:29:08 +01:00
parent 29030c0a4c
commit 7fc3fa0545
3 changed files with 73 additions and 3 deletions

View File

@ -405,7 +405,8 @@ def add_extra_info(info_dict, extra_info):
for key, value in extra_info.items(): for key, value in extra_info.items():
info_dict.setdefault(key, value) info_dict.setdefault(key, value)
def extract_info(self, url, download=True, ie_key=None, extra_info={}): def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True):
''' '''
Returns a list with a dictionary for each video we find. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos. If 'download', also downloads the videos.
@ -441,7 +442,10 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={}):
'webpage_url': url, 'webpage_url': url,
'extractor_key': ie.ie_key(), 'extractor_key': ie.ie_key(),
}) })
if process:
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(de), de.format_traceback())
break break
@ -474,8 +478,32 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
download, download,
ie_key=ie_result.get('ie_key'), ie_key=ie_result.get('ie_key'),
extra_info=extra_info) extra_info=extra_info)
elif result_type == 'playlist': elif result_type == 'url_transparent':
# Use the information from the embedding page
info = self.extract_info(
ie_result['url'], ie_key=ie_result.get('ie_key'),
extra_info=extra_info, download=False, process=False)
def make_result(embedded_info):
new_result = ie_result.copy()
for f in ('_type', 'url', 'ext', 'player_url', 'formats',
'entries', 'urlhandle', 'ie_key', 'duration',
'subtitles', 'annotations', 'format'):
if f in new_result:
del new_result[f]
if f in embedded_info:
new_result[f] = embedded_info[f]
return new_result
new_result = make_result(info)
assert new_result.get('_type') != 'url_transparent'
if new_result.get('_type') == 'compat_list':
new_result['entries'] = [
make_result(e) for e in new_result['entries']]
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type == 'playlist':
# We process each entry in the playlist # We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None) playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist) self.to_screen(u'[download] Downloading playlist: %s' % playlist)

View File

@ -102,6 +102,7 @@
from .newgrounds import NewgroundsIE from .newgrounds import NewgroundsIE
from .nhl import NHLIE, NHLVideocenterIE from .nhl import NHLIE, NHLVideocenterIE
from .niconico import NiconicoIE from .niconico import NiconicoIE
from .ninegag import NineGagIE
from .nowvideo import NowVideoIE from .nowvideo import NowVideoIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .orf import ORFIE from .orf import ORFIE

View File

@ -0,0 +1,41 @@
import json
import re
from .common import InfoExtractor
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'^https?://(?:www\.)?9gag\.tv/v/(?P<id>[0-9]+)'
_TEST = {
u"url": u"http://9gag.tv/v/1912",
u"file": u"1912.mp4",
u"info_dict": {
u"description": u"This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
u"title": u"\"People Are Awesome 2013\" Is Absolutely Awesome"
},
u'add_ie': [u'Youtube']
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
data_json = self._html_search_regex(r'''(?x)
<div\s*id="tv-video"\s*data-video-source="youtube"\s*
data-video-meta="([^"]+)"''', webpage, u'video metadata')
data = json.loads(data_json)
return {
'_type': 'url_transparent',
'url': data['youtubeVideoId'],
'ie_key': 'Youtube',
'id': video_id,
'title': data['title'],
'description': data['description'],
'view_count': int(data['view_count']),
'thumbnail': data['thumbnail_url'],
}