Merge branch 'master' of github.com:rg3/youtube-dl

This commit is contained in:
Sergey M․ 2015-10-14 20:32:11 +06:00
commit 4aa353673b
6 changed files with 121 additions and 23 deletions

View File

@ -167,6 +167,7 @@
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE from .faz import FazIE
from .fc2 import FC2IE from .fc2 import FC2IE
from .fczenit import FczenitIE
from .firstpost import FirstpostIE from .firstpost import FirstpostIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE

View File

@ -158,7 +158,7 @@ def _extract_session_code(self, html):
def _extract_session_day(self, html): def _extract_session_day(self, html):
m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html) m = re.search(r'<li class="day">\s*<a href="/Events/[^"]+">(?P<day>[^<]+)</a>\s*</li>', html)
return m.group('day') if m is not None else None return m.group('day').strip() if m is not None else None
def _extract_session_room(self, html): def _extract_session_room(self, html):
m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html) m = re.search(r'<li class="room">\s*(?P<room>.+?)\s*</li>', html)
@ -224,12 +224,12 @@ def _extract_entry_item(self, html, content_path):
if contents is None: if contents is None:
return contents return contents
authors = self._extract_authors(html) if len(contents) > 1:
raise ExtractorError('Got more than one entry')
result = contents[0]
result['authors'] = self._extract_authors(html)
for content in contents: return result
content['authors'] = authors
return contents
def _extract_session(self, html, content_path): def _extract_session(self, html, content_path):
contents = self._extract_content(html, content_path) contents = self._extract_content(html, content_path)

View File

@ -27,9 +27,7 @@ def _real_extract(self, url):
final_url = self._search_regex( final_url = self._search_regex(
r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
description = self._html_search_regex( description = self._html_search_meta('description', webpage)
r'<meta name="description" content="(.+?)" />',
webpage, 'video description')
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
webpage, 'thumbnail url') webpage, 'thumbnail url')

View File

@ -0,0 +1,41 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class FczenitIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?fc-zenit\.ru/video/gl(?P<id>[0-9]+)'
_TEST = {
'url': 'http://fc-zenit.ru/video/gl6785/',
'md5': '458bacc24549173fe5a5aa29174a5606',
'info_dict': {
'id': '6785',
'ext': 'mp4',
'title': '«Зенит-ТВ»: как Олег Шатов играл против «Урала»',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(r'<div class=\"photoalbum__title\">([^<]+)', webpage, 'title')
bitrates_raw = self._html_search_regex(r'bitrates:.*\n(.*)\]', webpage, 'video URL')
bitrates = re.findall(r'url:.?\'(.+?)\'.*?bitrate:.?([0-9]{3}?)', bitrates_raw)
formats = [{
'url': furl,
'tbr': tbr,
} for furl, tbr in bitrates]
self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
'formats': formats,
}

View File

@ -2,11 +2,15 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str,
compat_urllib_parse, compat_urllib_parse,
compat_parse_qs,
compat_urllib_parse_urlparse,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
parse_duration,
replace_extension,
) )
@ -28,6 +32,7 @@ class FiveMinIE(InfoExtractor):
'id': '518013791', 'id': '518013791',
'ext': 'mp4', 'ext': 'mp4',
'title': 'iPad Mini with Retina Display Review', 'title': 'iPad Mini with Retina Display Review',
'duration': 177,
}, },
}, },
{ {
@ -38,9 +43,52 @@ class FiveMinIE(InfoExtractor):
'id': '518086247', 'id': '518086247',
'ext': 'mp4', 'ext': 'mp4',
'title': 'How to Make a Next-Level Fruit Salad', 'title': 'How to Make a Next-Level Fruit Salad',
'duration': 184,
}, },
}, },
] ]
_ERRORS = {
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
}
_QUALITIES = {
1: {
'width': 640,
'height': 360,
},
2: {
'width': 854,
'height': 480,
},
4: {
'width': 1280,
'height': 720,
},
8: {
'width': 1920,
'height': 1080,
},
16: {
'width': 640,
'height': 360,
},
32: {
'width': 854,
'height': 480,
},
64: {
'width': 1280,
'height': 720,
},
128: {
'width': 640,
'height': 360,
},
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -59,26 +107,36 @@ def _real_extract(self, url):
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query, 'https://syn.5min.com/handlers/SenseHandler.ashx?' + query,
video_id) video_id)
if not response['success']: if not response['success']:
err_msg = response['errorMessage'] raise ExtractorError(
if err_msg == 'ErrorVideoUserNotGeo': '%s said: %s' % (
msg = 'Video not available from your location' self.IE_NAME,
else: self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
msg = 'Aol said: %s' % err_msg expected=True)
raise ExtractorError(msg, expected=True, video_id=video_id)
info = response['binding'][0] info = response['binding'][0]
second_id = compat_str(int(video_id[:-2]) + 1)
formats = [] formats = []
for quality, height in [(1, 320), (2, 480), (4, 720), (8, 1080)]: parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
if any(r['ID'] == quality for r in info['Renditions']): compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
for rendition in info['Renditions']:
if rendition['RenditionType'] == 'm3u8':
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
elif rendition['RenditionType'] == 'aac':
continue
else:
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
quality = self._QUALITIES.get(rendition['ID'], {})
formats.append({ formats.append({
'format_id': compat_str(quality), 'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
'url': 'http://avideos.5min.com/%s/%s/%s_%s.mp4' % (second_id[-3:], second_id, video_id, quality), 'url': rendition_url,
'height': height, 'width': quality.get('width'),
'height': quality.get('height'),
}) })
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': info['Title'], 'title': info['Title'],
'thumbnail': info.get('ThumbURL'),
'duration': parse_duration(info.get('Duration')),
'formats': formats, 'formats': formats,
} }

View File

@ -212,7 +212,7 @@ def _verify_video_password(self, url, video_id, webpage):
url = url.replace('http://', 'https://') url = url.replace('http://', 'https://')
password_request = compat_urllib_request.Request(url + '/password', data) password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'clip_v=1; vuid=%s' % vuid) password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
password_request.add_header('Referer', url) password_request.add_header('Referer', url)
return self._download_webpage( return self._download_webpage(
password_request, video_id, password_request, video_id,