mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-12 14:26:49 +01:00
PEP8: applied even more rules
This commit is contained in:
parent
2514d2635e
commit
9e1a5b8455
@ -142,7 +142,7 @@ def win_service_set_status(handle, status_code):
|
|||||||
|
|
||||||
def win_service_main(service_name, real_main, argc, argv_raw):
|
def win_service_main(service_name, real_main, argc, argv_raw):
|
||||||
try:
|
try:
|
||||||
#args = [argv_raw[i].value for i in range(argc)]
|
# args = [argv_raw[i].value for i in range(argc)]
|
||||||
stop_event = threading.Event()
|
stop_event = threading.Event()
|
||||||
handler = HandlerEx(functools.partial(stop_event, win_service_handler))
|
handler = HandlerEx(functools.partial(stop_event, win_service_handler))
|
||||||
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
|
h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
|
||||||
|
@ -30,7 +30,7 @@ def build_completion(opt_parser):
|
|||||||
for group in opt_parser.option_groups:
|
for group in opt_parser.option_groups:
|
||||||
for option in group.option_list:
|
for option in group.option_list:
|
||||||
long_option = option.get_opt_string().strip('-')
|
long_option = option.get_opt_string().strip('-')
|
||||||
help_msg = shell_quote([option.help])
|
shell_quote([option.help])
|
||||||
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
|
||||||
if option._short_opts:
|
if option._short_opts:
|
||||||
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
|
||||||
|
1
setup.py
1
setup.py
@ -4,7 +4,6 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import os.path
|
import os.path
|
||||||
import pkg_resources
|
|
||||||
import warnings
|
import warnings
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -116,14 +116,14 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
elif isinstance(expected, type):
|
elif isinstance(expected, type):
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertTrue(isinstance(got, expected),
|
self.assertTrue(isinstance(got, expected),
|
||||||
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
'Expected type %r for field %s, but got value %r of type %r' % (expected, info_field, got, type(got)))
|
||||||
else:
|
else:
|
||||||
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
if isinstance(expected, compat_str) and expected.startswith('md5:'):
|
||||||
got = 'md5:' + md5(got_dict.get(info_field))
|
got = 'md5:' + md5(got_dict.get(info_field))
|
||||||
else:
|
else:
|
||||||
got = got_dict.get(info_field)
|
got = got_dict.get(info_field)
|
||||||
self.assertEqual(expected, got,
|
self.assertEqual(expected, got,
|
||||||
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||||
|
|
||||||
# Check for the presence of mandatory fields
|
# Check for the presence of mandatory fields
|
||||||
if got_dict.get('_type') != 'playlist':
|
if got_dict.get('_type') != 'playlist':
|
||||||
@ -135,8 +135,8 @@ def expect_info_dict(self, expected_dict, got_dict):
|
|||||||
|
|
||||||
# Are checkable fields missing from the test case definition?
|
# Are checkable fields missing from the test case definition?
|
||||||
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
|
||||||
for key, value in got_dict.items()
|
for key, value in got_dict.items()
|
||||||
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
if value and key in ('title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location'))
|
||||||
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
|
||||||
if missing_keys:
|
if missing_keys:
|
||||||
def _repr(v):
|
def _repr(v):
|
||||||
|
@ -314,7 +314,7 @@ def _bidi_workaround(self, message):
|
|||||||
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
self._output_process.stdin.write((message + '\n').encode('utf-8'))
|
||||||
self._output_process.stdin.flush()
|
self._output_process.stdin.flush()
|
||||||
res = ''.join(self._output_channel.readline().decode('utf-8')
|
res = ''.join(self._output_channel.readline().decode('utf-8')
|
||||||
for _ in range(line_count))
|
for _ in range(line_count))
|
||||||
return res[:-len('\n')]
|
return res[:-len('\n')]
|
||||||
|
|
||||||
def to_screen(self, message, skip_eol=False):
|
def to_screen(self, message, skip_eol=False):
|
||||||
@ -701,13 +701,15 @@ def make_result(embedded_info):
|
|||||||
'It needs to be updated.' % ie_result.get('extractor'))
|
'It needs to be updated.' % ie_result.get('extractor'))
|
||||||
|
|
||||||
def _fixup(r):
|
def _fixup(r):
|
||||||
self.add_extra_info(r,
|
self.add_extra_info(
|
||||||
|
r,
|
||||||
{
|
{
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
'extractor_key': ie_result['extractor_key'],
|
'extractor_key': ie_result['extractor_key'],
|
||||||
})
|
}
|
||||||
|
)
|
||||||
return r
|
return r
|
||||||
ie_result['entries'] = [
|
ie_result['entries'] = [
|
||||||
self.process_ie_result(_fixup(r), download, extra_info)
|
self.process_ie_result(_fixup(r), download, extra_info)
|
||||||
@ -857,14 +859,14 @@ def process_video_result(self, info_dict, download=True):
|
|||||||
# Two formats have been requested like '137+139'
|
# Two formats have been requested like '137+139'
|
||||||
format_1, format_2 = rf.split('+')
|
format_1, format_2 = rf.split('+')
|
||||||
formats_info = (self.select_format(format_1, formats),
|
formats_info = (self.select_format(format_1, formats),
|
||||||
self.select_format(format_2, formats))
|
self.select_format(format_2, formats))
|
||||||
if all(formats_info):
|
if all(formats_info):
|
||||||
# The first format must contain the video and the
|
# The first format must contain the video and the
|
||||||
# second the audio
|
# second the audio
|
||||||
if formats_info[0].get('vcodec') == 'none':
|
if formats_info[0].get('vcodec') == 'none':
|
||||||
self.report_error('The first format must '
|
self.report_error('The first format must '
|
||||||
'contain the video, try using '
|
'contain the video, try using '
|
||||||
'"-f %s+%s"' % (format_2, format_1))
|
'"-f %s+%s"' % (format_2, format_1))
|
||||||
return
|
return
|
||||||
selected_format = {
|
selected_format = {
|
||||||
'requested_formats': formats_info,
|
'requested_formats': formats_info,
|
||||||
@ -1042,10 +1044,10 @@ def process_info(self, info_dict):
|
|||||||
with open(thumb_filename, 'wb') as thumbf:
|
with open(thumb_filename, 'wb') as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
self.to_screen('[%s] %s: Writing thumbnail to: %s' %
|
||||||
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
(info_dict['extractor'], info_dict['id'], thumb_filename))
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
self.report_warning('Unable to download thumbnail "%s": %s' %
|
self.report_warning('Unable to download thumbnail "%s": %s' %
|
||||||
(info_dict['thumbnail'], compat_str(err)))
|
(info_dict['thumbnail'], compat_str(err)))
|
||||||
|
|
||||||
if not self.params.get('skip_download', False):
|
if not self.params.get('skip_download', False):
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
|
||||||
@ -1066,8 +1068,8 @@ def dl(name, info):
|
|||||||
if not merger._executable:
|
if not merger._executable:
|
||||||
postprocessors = []
|
postprocessors = []
|
||||||
self.report_warning('You have requested multiple '
|
self.report_warning('You have requested multiple '
|
||||||
'formats but ffmpeg or avconv are not installed.'
|
'formats but ffmpeg or avconv are not installed.'
|
||||||
' The formats won\'t be merged')
|
' The formats won\'t be merged')
|
||||||
else:
|
else:
|
||||||
postprocessors = [merger]
|
postprocessors = [merger]
|
||||||
for f in info_dict['requested_formats']:
|
for f in info_dict['requested_formats']:
|
||||||
|
@ -116,7 +116,7 @@ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
|
|||||||
# Python 2's version is apparently totally broken
|
# Python 2's version is apparently totally broken
|
||||||
|
|
||||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
encoding='utf-8', errors='replace'):
|
encoding='utf-8', errors='replace'):
|
||||||
qs, _coerce_result = qs, unicode
|
qs, _coerce_result = qs, unicode
|
||||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
r = []
|
r = []
|
||||||
@ -145,10 +145,10 @@ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|||||||
return r
|
return r
|
||||||
|
|
||||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
encoding='utf-8', errors='replace'):
|
encoding='utf-8', errors='replace'):
|
||||||
parsed_result = {}
|
parsed_result = {}
|
||||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||||
encoding=encoding, errors=errors)
|
encoding=encoding, errors=errors)
|
||||||
for name, value in pairs:
|
for name, value in pairs:
|
||||||
if name in parsed_result:
|
if name in parsed_result:
|
||||||
parsed_result[name].append(value)
|
parsed_result[name].append(value)
|
||||||
|
@ -225,13 +225,15 @@ def real_download(self, filename, info_dict):
|
|||||||
self.to_screen('[download] Downloading f4m manifest')
|
self.to_screen('[download] Downloading f4m manifest')
|
||||||
manifest = self.ydl.urlopen(man_url).read()
|
manifest = self.ydl.urlopen(man_url).read()
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
http_dl = HttpQuietDownloader(self.ydl,
|
http_dl = HttpQuietDownloader(
|
||||||
|
self.ydl,
|
||||||
{
|
{
|
||||||
'continuedl': True,
|
'continuedl': True,
|
||||||
'quiet': True,
|
'quiet': True,
|
||||||
'noprogress': True,
|
'noprogress': True,
|
||||||
'test': self.params.get('test', False),
|
'test': self.params.get('test', False),
|
||||||
})
|
}
|
||||||
|
)
|
||||||
|
|
||||||
doc = etree.fromstring(manifest)
|
doc = etree.fromstring(manifest)
|
||||||
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
|
||||||
@ -277,7 +279,7 @@ def real_download(self, filename, info_dict):
|
|||||||
def frag_progress_hook(status):
|
def frag_progress_hook(status):
|
||||||
frag_total_bytes = status.get('total_bytes', 0)
|
frag_total_bytes = status.get('total_bytes', 0)
|
||||||
estimated_size = (state['downloaded_bytes'] +
|
estimated_size = (state['downloaded_bytes'] +
|
||||||
(total_frags - state['frag_counter']) * frag_total_bytes)
|
(total_frags - state['frag_counter']) * frag_total_bytes)
|
||||||
if status['status'] == 'finished':
|
if status['status'] == 'finished':
|
||||||
state['downloaded_bytes'] += frag_total_bytes
|
state['downloaded_bytes'] += frag_total_bytes
|
||||||
state['frag_counter'] += 1
|
state['frag_counter'] += 1
|
||||||
@ -287,13 +289,13 @@ def frag_progress_hook(status):
|
|||||||
frag_downloaded_bytes = status['downloaded_bytes']
|
frag_downloaded_bytes = status['downloaded_bytes']
|
||||||
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
|
||||||
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
frag_progress = self.calc_percent(frag_downloaded_bytes,
|
||||||
frag_total_bytes)
|
frag_total_bytes)
|
||||||
progress = self.calc_percent(state['frag_counter'], total_frags)
|
progress = self.calc_percent(state['frag_counter'], total_frags)
|
||||||
progress += frag_progress / float(total_frags)
|
progress += frag_progress / float(total_frags)
|
||||||
|
|
||||||
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
|
||||||
self.report_progress(progress, format_bytes(estimated_size),
|
self.report_progress(progress, format_bytes(estimated_size),
|
||||||
status.get('speed'), eta)
|
status.get('speed'), eta)
|
||||||
http_dl.add_progress_hook(frag_progress_hook)
|
http_dl.add_progress_hook(frag_progress_hook)
|
||||||
|
|
||||||
frags_filenames = []
|
frags_filenames = []
|
||||||
|
@ -88,7 +88,7 @@ def _clean_json(m):
|
|||||||
for li in doc.findall('./div/ul/li'):
|
for li in doc.findall('./div/ul/li'):
|
||||||
on_click = li.find('.//a').attrib['onClick']
|
on_click = li.find('.//a').attrib['onClick']
|
||||||
trailer_info_json = self._search_regex(self._JSON_RE,
|
trailer_info_json = self._search_regex(self._JSON_RE,
|
||||||
on_click, 'trailer info')
|
on_click, 'trailer info')
|
||||||
trailer_info = json.loads(trailer_info_json)
|
trailer_info = json.loads(trailer_info_json)
|
||||||
title = trailer_info['title']
|
title = trailer_info['title']
|
||||||
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||||
|
@ -38,7 +38,7 @@ def _real_extract(self, url):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
||||||
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||||
info_json = self._download_webpage(info_url, video_id)
|
info_json = self._download_webpage(info_url, video_id)
|
||||||
info = json.loads(info_json)['result']
|
info = json.loads(info_json)['result']
|
||||||
|
|
||||||
@ -74,8 +74,8 @@ def _real_extract(self, url):
|
|||||||
last_id = ''
|
last_id = ''
|
||||||
for i in itertools.count(1):
|
for i in itertools.count(1):
|
||||||
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
req_url = ('http://bambuser.com/xhr-api/index.php?username={user}'
|
||||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||||
'&method=broadcast&format=json&vid_older_than={last}'
|
'&method=broadcast&format=json&vid_older_than={last}'
|
||||||
).format(user=user, count=self._STEP, last=last_id)
|
).format(user=user, count=self._STEP, last=last_id)
|
||||||
req = compat_urllib_request.Request(req_url)
|
req = compat_urllib_request.Request(req_url)
|
||||||
# Without setting this header, we wouldn't get any result
|
# Without setting this header, we wouldn't get any result
|
||||||
|
@ -165,10 +165,10 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
if re.search(r'id="emp-error" class="notinuk">', webpage):
|
||||||
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
raise ExtractorError('Currently BBC iPlayer TV programmes are available to play in the UK only',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
playlist = self._download_xml('http://www.bbc.co.uk/iplayer/playlist/%s' % group_id, group_id,
|
||||||
'Downloading playlist XML')
|
'Downloading playlist XML')
|
||||||
|
|
||||||
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems')
|
||||||
if no_items is not None:
|
if no_items is not None:
|
||||||
|
@ -25,8 +25,7 @@ class CNNIE(InfoExtractor):
|
|||||||
'duration': 135,
|
'duration': 135,
|
||||||
'upload_date': '20130609',
|
'upload_date': '20130609',
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
"url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||||
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
"md5": "b5cc60c60a3477d185af8f19a2a26f4e",
|
||||||
"info_dict": {
|
"info_dict": {
|
||||||
|
@ -10,47 +10,46 @@
|
|||||||
class CollegeHumorIE(InfoExtractor):
|
class CollegeHumorIE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
_VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
{
|
||||||
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
|
||||||
'info_dict': {
|
'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd',
|
||||||
'id': '6902724',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': '6902724',
|
||||||
'title': 'Comic-Con Cosplay Catastrophe',
|
'ext': 'mp4',
|
||||||
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
'title': 'Comic-Con Cosplay Catastrophe',
|
||||||
'age_limit': 13,
|
'description': "Fans get creative this year at San Diego. Too creative. And yes, that's really Joss Whedon.",
|
||||||
'duration': 187,
|
'age_limit': 13,
|
||||||
|
'duration': 187,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
||||||
|
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3505939',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Font Conference',
|
||||||
|
'description': "This video wasn't long enough, so we made it double-spaced.",
|
||||||
|
'age_limit': 10,
|
||||||
|
'duration': 179,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# embedded youtube video
|
||||||
|
'url': 'http://www.collegehumor.com/embed/6950306',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Z-bao9fg6Yc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
||||||
|
'uploader': 'Mark Dice',
|
||||||
|
'uploader_id': 'MarkDice',
|
||||||
|
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
||||||
|
'upload_date': '20140127',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
},
|
},
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://www.collegehumor.com/video/3505939/font-conference',
|
|
||||||
'md5': '72fa701d8ef38664a4dbb9e2ab721816',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3505939',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Font Conference',
|
|
||||||
'description': "This video wasn't long enough, so we made it double-spaced.",
|
|
||||||
'age_limit': 10,
|
|
||||||
'duration': 179,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# embedded youtube video
|
|
||||||
{
|
|
||||||
'url': 'http://www.collegehumor.com/embed/6950306',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Z-bao9fg6Yc',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Young Americans Think President John F. Kennedy Died THIS MORNING IN A CAR ACCIDENT!!!',
|
|
||||||
'uploader': 'Mark Dice',
|
|
||||||
'uploader_id': 'MarkDice',
|
|
||||||
'description': 'md5:62c3dab9351fac7bb44b53b69511d87f',
|
|
||||||
'upload_date': '20140127',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -478,7 +478,7 @@ def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True,
|
|||||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('unable to extract %s; '
|
self._downloader.report_warning('unable to extract %s; '
|
||||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
@ -612,7 +612,7 @@ def _media_rating_search(self, html):
|
|||||||
|
|
||||||
def _twitter_search_player(self, html):
|
def _twitter_search_player(self, html):
|
||||||
return self._html_search_meta('twitter:player', html,
|
return self._html_search_meta('twitter:player', html,
|
||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _sort_formats(self, formats):
|
def _sort_formats(self, formats):
|
||||||
if not formats:
|
if not formats:
|
||||||
|
@ -114,7 +114,7 @@ def _real_extract(self, url):
|
|||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
'Downloading embed page')
|
'Downloading embed page')
|
||||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||||
'video info', flags=re.MULTILINE)
|
'video info', flags=re.MULTILINE)
|
||||||
info = json.loads(info)
|
info = json.loads(info)
|
||||||
if info.get('error') is not None:
|
if info.get('error') is not None:
|
||||||
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
|
||||||
@ -208,7 +208,7 @@ def _extract_entries(self, id):
|
|||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||||
break
|
break
|
||||||
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
|
||||||
for video_id in orderedSet(video_ids)]
|
for video_id in orderedSet(video_ids)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
class DefenseGouvFrIE(InfoExtractor):
|
class DefenseGouvFrIE(InfoExtractor):
|
||||||
IE_NAME = 'defense.gouv.fr'
|
IE_NAME = 'defense.gouv.fr'
|
||||||
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
||||||
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',
|
||||||
@ -28,9 +28,9 @@ def _real_extract(self, url):
|
|||||||
webpage, 'ID')
|
webpage, 'ID')
|
||||||
|
|
||||||
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||||
+ video_id)
|
+ video_id)
|
||||||
info = self._download_webpage(json_url, title,
|
info = self._download_webpage(json_url, title,
|
||||||
'Downloading JSON config')
|
'Downloading JSON config')
|
||||||
video_url = json.loads(info)['renditions'][0]['url']
|
video_url = json.loads(info)['renditions'][0]['url']
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
|
@ -16,9 +16,9 @@ class DiscoveryIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MythBusters: Mission Impossible Outtakes',
|
'title': 'MythBusters: Mission Impossible Outtakes',
|
||||||
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
'description': ('Watch Jamie Hyneman and Adam Savage practice being'
|
||||||
' each other -- to the point of confusing Jamie\'s dog -- and '
|
' each other -- to the point of confusing Jamie\'s dog -- and '
|
||||||
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
|
||||||
' back.'),
|
' back.'),
|
||||||
'duration': 156,
|
'duration': 156,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -29,7 +29,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
video_list_json = self._search_regex(r'var videoListJSON = ({.*?});',
|
||||||
webpage, 'video list', flags=re.DOTALL)
|
webpage, 'video list', flags=re.DOTALL)
|
||||||
video_list = json.loads(video_list_json)
|
video_list = json.loads(video_list_json)
|
||||||
info = video_list['clips'][0]
|
info = video_list['clips'][0]
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -11,18 +11,18 @@
|
|||||||
|
|
||||||
class DropboxIE(InfoExtractor):
|
class DropboxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
{
|
||||||
'info_dict': {
|
'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
|
||||||
'id': 'nelirfsxnmcfbfh',
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'id': 'nelirfsxnmcfbfh',
|
||||||
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
'ext': 'mp4',
|
||||||
}
|
'title': 'youtube-dl test video \'ä"BaW_jenozKc'
|
||||||
},
|
}
|
||||||
{
|
}, {
|
||||||
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -28,7 +28,7 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
video_url = self._search_regex(r'(?:file|source)=(http[^\'"&]*)',
|
||||||
webpage, 'video URL')
|
webpage, 'video URL')
|
||||||
final_url = compat_urllib_parse.unquote(video_url)
|
final_url = compat_urllib_parse.unquote(video_url)
|
||||||
uploader = self._html_search_meta('uploader', webpage)
|
uploader = self._html_search_meta('uploader', webpage)
|
||||||
title = self._og_search_title(webpage).replace(' | eHow', '')
|
title = self._og_search_title(webpage).replace(' | eHow', '')
|
||||||
|
@ -60,8 +60,8 @@ def _login(self):
|
|||||||
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
login_page_req = compat_urllib_request.Request(self._LOGIN_URL)
|
||||||
login_page_req.add_header('Cookie', 'locale=en_US')
|
login_page_req.add_header('Cookie', 'locale=en_US')
|
||||||
login_page = self._download_webpage(login_page_req, None,
|
login_page = self._download_webpage(login_page_req, None,
|
||||||
note='Downloading login page',
|
note='Downloading login page',
|
||||||
errnote='Unable to download login page')
|
errnote='Unable to download login page')
|
||||||
lsd = self._search_regex(
|
lsd = self._search_regex(
|
||||||
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
r'<input type="hidden" name="lsd" value="([^"]*)"',
|
||||||
login_page, 'lsd')
|
login_page, 'lsd')
|
||||||
@ -82,7 +82,7 @@ def _login(self):
|
|||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
try:
|
try:
|
||||||
login_results = self._download_webpage(request, None,
|
login_results = self._download_webpage(request, None,
|
||||||
note='Logging in', errnote='unable to fetch login page')
|
note='Logging in', errnote='unable to fetch login page')
|
||||||
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
|
||||||
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
self._downloader.report_warning('unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
|
||||||
return
|
return
|
||||||
@ -96,7 +96,7 @@ def _login(self):
|
|||||||
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
check_req = compat_urllib_request.Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
check_response = self._download_webpage(check_req, None,
|
check_response = self._download_webpage(check_req, None,
|
||||||
note='Confirming login')
|
note='Confirming login')
|
||||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||||
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
self._downloader.report_warning('Unable to confirm login, you have to login in your brower and authorize the login.')
|
||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
|
@ -44,9 +44,9 @@ def _real_extract(self, url):
|
|||||||
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False)
|
||||||
|
|
||||||
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]',
|
||||||
webpage, 'like count', fatal=False)
|
webpage, 'like count', fatal=False)
|
||||||
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]',
|
||||||
webpage, 'dislike count', fatal=False)
|
webpage, 'dislike count', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -50,7 +50,7 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
'Downloading embed page')
|
'Downloading embed page')
|
||||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
||||||
query = compat_urllib_parse.urlencode({
|
query = compat_urllib_parse.urlencode({
|
||||||
'func': 'GetResults',
|
'func': 'GetResults',
|
||||||
|
@ -32,9 +32,9 @@ def _real_extract(self, url):
|
|||||||
server = random.randint(2, 4)
|
server = random.randint(2, 4)
|
||||||
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
|
||||||
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
|
||||||
episode)
|
episode)
|
||||||
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
|
||||||
'playlist', flags=re.DOTALL)
|
'playlist', flags=re.DOTALL)
|
||||||
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
|
||||||
# TODO: return a single multipart video
|
# TODO: return a single multipart video
|
||||||
videos = []
|
videos = []
|
||||||
|
@ -37,7 +37,7 @@ def _real_extract(self, url):
|
|||||||
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
||||||
|
|
||||||
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
|
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
|
||||||
first_xml, 'node_id')
|
first_xml, 'node_id')
|
||||||
|
|
||||||
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
||||||
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
||||||
|
@ -55,7 +55,7 @@ def _real_extract(self, url):
|
|||||||
description = self._html_search_meta('description', webpage, 'description')
|
description = self._html_search_meta('description', webpage, 'description')
|
||||||
if description:
|
if description:
|
||||||
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
|
upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date',
|
||||||
fatal=False)
|
fatal=False)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
|
view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False)
|
||||||
|
@ -234,7 +234,7 @@ def _real_extract(self, url):
|
|||||||
info_json = self._download_webpage(info_url, name)
|
info_json = self._download_webpage(info_url, name)
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
|
||||||
ie='Dailymotion')
|
ie='Dailymotion')
|
||||||
|
|
||||||
|
|
||||||
class CultureboxIE(FranceTVBaseInfoExtractor):
|
class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||||
|
@ -784,7 +784,7 @@ def _playlist_from_matches(matches, getter, ie=None):
|
|||||||
|
|
||||||
# Look for Ooyala videos
|
# Look for Ooyala videos
|
||||||
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||||
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||||
|
|
||||||
|
@ -27,10 +27,10 @@ def _real_extract(self, url):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
|
||||||
webpage, 'video URL')
|
webpage, 'video URL')
|
||||||
|
|
||||||
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
|
||||||
webpage, 'description', fatal=False)
|
webpage, 'description', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -99,7 +99,7 @@ def _real_extract(self, url):
|
|||||||
video_id = self._find_video_id(webpage)
|
video_id = self._find_video_id(webpage)
|
||||||
result = self._get_video_info(video_id)
|
result = self._get_video_info(video_id)
|
||||||
description = self._html_search_regex(self._DESCRIPTION_RE,
|
description = self._html_search_regex(self._DESCRIPTION_RE,
|
||||||
webpage, 'video description', flags=re.DOTALL)
|
webpage, 'video description', flags=re.DOTALL)
|
||||||
result['description'] = description
|
result['description'] = description
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -27,9 +27,9 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||||
webpage, 'uploader id', fatal=False)
|
webpage, 'uploader id', fatal=False)
|
||||||
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
desc = self._search_regex(r'"caption":"(.*?)"', webpage, 'description',
|
||||||
fatal=False)
|
fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -45,22 +45,26 @@ def _real_extract(self, url):
|
|||||||
url = self._build_url(query)
|
url = self._build_url(query)
|
||||||
|
|
||||||
flashconfiguration = self._download_xml(url, video_id,
|
flashconfiguration = self._download_xml(url, video_id,
|
||||||
'Downloading flash configuration')
|
'Downloading flash configuration')
|
||||||
file_url = flashconfiguration.find('file').text
|
file_url = flashconfiguration.find('file').text
|
||||||
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
|
||||||
# Replace some of the parameters in the query to get the best quality
|
# Replace some of the parameters in the query to get the best quality
|
||||||
# and http links (no m3u8 manifests)
|
# and http links (no m3u8 manifests)
|
||||||
file_url = re.sub(r'(?<=\?)(.+)$',
|
file_url = re.sub(r'(?<=\?)(.+)$',
|
||||||
lambda m: self._clean_query(m.group()),
|
lambda m: self._clean_query(m.group()),
|
||||||
file_url)
|
file_url)
|
||||||
info = self._download_xml(file_url, video_id,
|
info = self._download_xml(file_url, video_id,
|
||||||
'Downloading video info')
|
'Downloading video info')
|
||||||
item = info.find('channel/item')
|
item = info.find('channel/item')
|
||||||
|
|
||||||
def _bp(p):
|
def _bp(p):
|
||||||
return xpath_with_ns(p,
|
return xpath_with_ns(
|
||||||
{'media': 'http://search.yahoo.com/mrss/',
|
p,
|
||||||
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
|
{
|
||||||
|
'media': 'http://search.yahoo.com/mrss/',
|
||||||
|
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
|
||||||
|
}
|
||||||
|
)
|
||||||
formats = []
|
formats = []
|
||||||
for content in item.findall(_bp('media:group/media:content')):
|
for content in item.findall(_bp('media:group/media:content')):
|
||||||
attr = content.attrib
|
attr = content.attrib
|
||||||
|
@ -36,7 +36,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
||||||
iframe_html, 'video url')
|
iframe_html, 'video url')
|
||||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
video_url = unescapeHTML(video_url).replace('\/', '/')
|
||||||
except RegexNotFoundError:
|
except RegexNotFoundError:
|
||||||
youtube_url = self._search_regex(
|
youtube_url = self._search_regex(
|
||||||
@ -47,9 +47,9 @@ def _real_extract(self, url):
|
|||||||
return self.url_result(youtube_url, ie='Youtube')
|
return self.url_result(youtube_url, ie='Youtube')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
||||||
html, 'title')
|
html, 'title')
|
||||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
||||||
html, 'artist')
|
html, 'artist')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -13,8 +13,10 @@ class KickStarterIE(InfoExtractor):
|
|||||||
'id': '1404461844',
|
'id': '1404461844',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
'title': 'Intersection: The Story of Josh Grant by Kyle Cowling',
|
||||||
'description': 'A unique motocross documentary that examines the '
|
'description': (
|
||||||
'life and mind of one of sports most elite athletes: Josh Grant.',
|
'A unique motocross documentary that examines the '
|
||||||
|
'life and mind of one of sports most elite athletes: Josh Grant.'
|
||||||
|
),
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'Embedded video (not using the native kickstarter video service)',
|
'note': 'Embedded video (not using the native kickstarter video service)',
|
||||||
|
@ -45,7 +45,7 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
|
|
||||||
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
|
||||||
'Downloading video JSON')
|
'Downloading video JSON')
|
||||||
video_json = json.loads(page)
|
video_json = json.loads(page)
|
||||||
|
|
||||||
if 'Status' in video_json:
|
if 'Status' in video_json:
|
||||||
|
@ -27,7 +27,7 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id,
|
||||||
'Downloading video RSS')
|
'Downloading video RSS')
|
||||||
|
|
||||||
title = rss.find('./channel/item/title').text
|
title = rss.find('./channel/item/title').text
|
||||||
description = rss.find('./channel/item/description').text
|
description = rss.find('./channel/item/description').text
|
||||||
|
@ -219,8 +219,8 @@ def _real_extract(self, url):
|
|||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
|
||||||
webpage, 'uploader nickname', fatal=False)
|
webpage, 'uploader nickname', fatal=False)
|
||||||
duration = int_or_none(
|
duration = int_or_none(
|
||||||
self._html_search_meta('video:duration', webpage))
|
self._html_search_meta('video:duration', webpage))
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
# The xml is not well formatted, there are raw '&'
|
# The xml is not well formatted, there are raw '&'
|
||||||
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
|
info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id,
|
||||||
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
|
video_id, 'Downloading info xml', transform_source=fix_xml_ampersands)
|
||||||
|
|
||||||
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||||
formats = []
|
formats = []
|
||||||
@ -44,7 +44,7 @@ def _real_extract(self, url):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||||
webpage, 'description', flags=re.DOTALL)
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -53,7 +53,7 @@ def _extract_mobile_video_formats(self, mtvn_id):
|
|||||||
# Otherwise we get a webpage that would execute some javascript
|
# Otherwise we get a webpage that would execute some javascript
|
||||||
req.add_header('Youtubedl-user-agent', 'curl/7')
|
req.add_header('Youtubedl-user-agent', 'curl/7')
|
||||||
webpage = self._download_webpage(req, mtvn_id,
|
webpage = self._download_webpage(req, mtvn_id,
|
||||||
'Downloading mobile page')
|
'Downloading mobile page')
|
||||||
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
|
||||||
req = HEADRequest(metrics_url)
|
req = HEADRequest(metrics_url)
|
||||||
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
response = self._request_webpage(req, mtvn_id, 'Resolving url')
|
||||||
@ -66,10 +66,10 @@ def _extract_video_formats(self, mdoc, mtvn_id):
|
|||||||
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
|
||||||
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
|
||||||
self.to_screen('The normal version is not available from your '
|
self.to_screen('The normal version is not available from your '
|
||||||
'country, trying with the mobile version')
|
'country, trying with the mobile version')
|
||||||
return self._extract_mobile_video_formats(mtvn_id)
|
return self._extract_mobile_video_formats(mtvn_id)
|
||||||
raise ExtractorError('This video is not available from your country.',
|
raise ExtractorError('This video is not available from your country.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in mdoc.findall('.//rendition'):
|
for rendition in mdoc.findall('.//rendition'):
|
||||||
@ -98,7 +98,7 @@ def _get_video_info(self, itemdoc):
|
|||||||
mediagen_url += '&acceptMethods=fms'
|
mediagen_url += '&acceptMethods=fms'
|
||||||
|
|
||||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||||
'Downloading video urls')
|
'Downloading video urls')
|
||||||
|
|
||||||
description_node = itemdoc.find('description')
|
description_node = itemdoc.find('description')
|
||||||
if description_node is not None:
|
if description_node is not None:
|
||||||
@ -126,7 +126,7 @@ def _get_video_info(self, itemdoc):
|
|||||||
# This a short id that's used in the webpage urls
|
# This a short id that's used in the webpage urls
|
||||||
mtvn_id = None
|
mtvn_id = None
|
||||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
'scheme', 'urn:mtvn:id')
|
'scheme', 'urn:mtvn:id')
|
||||||
if mtvn_id_node is not None:
|
if mtvn_id_node is not None:
|
||||||
mtvn_id = mtvn_id_node.text
|
mtvn_id = mtvn_id_node.text
|
||||||
|
|
||||||
@ -188,7 +188,7 @@ def _get_feed_url(self, uri):
|
|||||||
video_id = self._id_from_uri(uri)
|
video_id = self._id_from_uri(uri)
|
||||||
site_id = uri.replace(video_id, '')
|
site_id = uri.replace(video_id, '')
|
||||||
config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
|
config_url = ('http://media.mtvnservices.com/pmt/e1/players/{0}/'
|
||||||
'context4/context5/config.xml'.format(site_id))
|
'context4/context5/config.xml'.format(site_id))
|
||||||
config_doc = self._download_xml(config_url, video_id)
|
config_doc = self._download_xml(config_url, video_id)
|
||||||
feed_node = config_doc.find('.//feed')
|
feed_node = config_doc.find('.//feed')
|
||||||
feed_url = feed_node.text.strip().split('?')[0]
|
feed_url = feed_node.text.strip().split('?')[0]
|
||||||
|
@ -53,7 +53,7 @@ def _real_extract(self, url):
|
|||||||
# songs don't store any useful info in the 'context' variable
|
# songs don't store any useful info in the 'context' variable
|
||||||
def search_data(name):
|
def search_data(name):
|
||||||
return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
|
return self._search_regex(r'data-%s="(.*?)"' % name, webpage,
|
||||||
name)
|
name)
|
||||||
streamUrl = search_data('stream-url')
|
streamUrl = search_data('stream-url')
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -63,7 +63,7 @@ def search_data(name):
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
context = json.loads(self._search_regex(r'context = ({.*?});', webpage,
|
||||||
u'context'))
|
u'context'))
|
||||||
video = context['video']
|
video = context['video']
|
||||||
streamUrl = video['streamUrl']
|
streamUrl = video['streamUrl']
|
||||||
info = {
|
info = {
|
||||||
|
@ -72,7 +72,7 @@ def _real_extract(self, url):
|
|||||||
video_url = mobj.group(1) + '.flv'
|
video_url = mobj.group(1) + '.flv'
|
||||||
|
|
||||||
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
video_title = self._html_search_regex('<title>([^<]+)</title>',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
@ -162,7 +162,7 @@ def _real_extract(self, url):
|
|||||||
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
video_swfobj = compat_urllib_parse.unquote(video_swfobj)
|
||||||
|
|
||||||
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -30,7 +30,7 @@ def _real_extract(self, url):
|
|||||||
video_id = mobj.group(1)
|
video_id = mobj.group(1)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||||
webpage)
|
webpage)
|
||||||
if m_id is None:
|
if m_id is None:
|
||||||
m_error = re.search(
|
m_error = re.search(
|
||||||
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
r'(?s)<div class="nation_error">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||||
|
@ -38,12 +38,12 @@ def _real_extract(self, url):
|
|||||||
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page')
|
||||||
|
|
||||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||||
page, 'director id', fatal=False)
|
page, 'director id', fatal=False)
|
||||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||||
page, 'director name', fatal=False)
|
page, 'director name', fatal=False)
|
||||||
|
|
||||||
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||||
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii'))
|
||||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ def _real_extract(self, url):
|
|||||||
self._downloader.report_warning(u'Got an empty reponse, trying '
|
self._downloader.report_warning(u'Got an empty reponse, trying '
|
||||||
'adding the "newvideos" parameter')
|
'adding the "newvideos" parameter')
|
||||||
response = self._download_webpage(request_url + '&newvideos=true',
|
response = self._download_webpage(request_url + '&newvideos=true',
|
||||||
playlist_title)
|
playlist_title)
|
||||||
response = self._fix_json(response)
|
response = self._fix_json(response)
|
||||||
videos = json.loads(response)
|
videos = json.loads(response)
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
if 'deleted=' in flv_info_webpage:
|
if 'deleted=' in flv_info_webpage:
|
||||||
raise ExtractorError('The video has been deleted.',
|
raise ExtractorError('The video has been deleted.',
|
||||||
expected=True)
|
expected=True)
|
||||||
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
@ -170,13 +170,13 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, list_id)
|
webpage = self._download_webpage(url, list_id)
|
||||||
|
|
||||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
||||||
webpage, 'entries')
|
webpage, 'entries')
|
||||||
entries = json.loads(entries_json)
|
entries = json.loads(entries_json)
|
||||||
entries = [{
|
entries = [{
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
'ie_key': NiconicoIE.ie_key(),
|
||||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||||
entry['item_data']['video_id']),
|
entry['item_data']['video_id']),
|
||||||
} for entry in entries]
|
} for entry in entries]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -27,8 +27,7 @@ class NineGagIE(InfoExtractor):
|
|||||||
"thumbnail": "re:^https?://",
|
"thumbnail": "re:^https?://",
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube']
|
'add_ie': ['Youtube']
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'KklwM',
|
'id': 'KklwM',
|
||||||
|
@ -31,9 +31,9 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
video_uploader = self._html_search_regex(r'Posted\sby\s<a\shref="[A-Za-z0-9/]*">(?P<uploader>[A-Za-z]*)\s</a>',
|
||||||
webpage, 'uploader')
|
webpage, 'uploader')
|
||||||
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
raw_upload_date = self._html_search_regex('<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
|
||||||
webpage, 'date')
|
webpage, 'date')
|
||||||
video_upload_date = unified_strdate(raw_upload_date)
|
video_upload_date = unified_strdate(raw_upload_date)
|
||||||
|
|
||||||
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
player_url = self._html_search_regex(r'<iframe\swidth="[0-9]+"\sheight="[0-9]+"\ssrc="(?P<url>[\S]+)"', webpage, 'url')
|
||||||
|
@ -43,7 +43,7 @@ def _url_for_embed_code(embed_code):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def _build_url_result(cls, embed_code):
|
def _build_url_result(cls, embed_code):
|
||||||
return cls.url_result(cls._url_for_embed_code(embed_code),
|
return cls.url_result(cls._url_for_embed_code(embed_code),
|
||||||
ie=cls.ie_key())
|
ie=cls.ie_key())
|
||||||
|
|
||||||
def _extract_result(self, info, more_info):
|
def _extract_result(self, info, more_info):
|
||||||
return {
|
return {
|
||||||
|
@ -31,7 +31,7 @@ def _real_extract(self, url):
|
|||||||
# Extract URL, uploader, and title from webpage
|
# Extract URL, uploader, and title from webpage
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);',
|
||||||
webpage, 'info json')
|
webpage, 'info json')
|
||||||
info = json.loads(info_json)
|
info = json.loads(info_json)
|
||||||
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url'))
|
||||||
return {
|
return {
|
||||||
|
@ -33,7 +33,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
||||||
webpage, 'json data', flags=re.MULTILINE)
|
webpage, 'json data', flags=re.MULTILINE)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(json_data)
|
data = json.loads(json_data)
|
||||||
|
@ -27,8 +27,7 @@ class SBSIE(InfoExtractor):
|
|||||||
'thumbnail': 're:http://.*\.jpg',
|
'thumbnail': 're:http://.*\.jpg',
|
||||||
},
|
},
|
||||||
'add_ies': ['generic'],
|
'add_ies': ['generic'],
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
@ -96,7 +96,7 @@ def _real_extract(self, url):
|
|||||||
if title is None:
|
if title is None:
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
[r'<b>Title:</b> ([^<]*)</div>',
|
[r'<b>Title:</b> ([^<]*)</div>',
|
||||||
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
|
r'class="tabSeperator">></span><span class="tabText">(.*?)<'],
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
description = self._og_search_description(webpage, default=None)
|
description = self._og_search_description(webpage, default=None)
|
||||||
|
@ -46,7 +46,7 @@ def suitable(cls, url):
|
|||||||
def _extract_video(self, video_id):
|
def _extract_video(self, video_id):
|
||||||
data = compat_urllib_parse.urlencode({'vid': video_id})
|
data = compat_urllib_parse.urlencode({'vid': video_id})
|
||||||
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
|
||||||
video_id, 'Downloading video url')
|
video_id, 'Downloading video url')
|
||||||
image_page = self._download_webpage(
|
image_page = self._download_webpage(
|
||||||
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
|
||||||
video_id, 'Downloading thumbnail info')
|
video_id, 'Downloading thumbnail info')
|
||||||
|
@ -26,7 +26,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||||
webpage, 'title').strip()
|
webpage, 'title').strip()
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
video_url = self._html_search_regex(
|
||||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||||
|
@ -282,7 +282,7 @@ def _real_extract(self, url):
|
|||||||
(username, password) = self._get_login_info()
|
(username, password) = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
|
raise ExtractorError('Erotic broadcasts allowed only for registered users, '
|
||||||
'use --username and --password options to provide account credentials.', expected=True)
|
'use --username and --password options to provide account credentials.', expected=True)
|
||||||
|
|
||||||
login_form = {
|
login_form = {
|
||||||
'login-hint53': '1',
|
'login-hint53': '1',
|
||||||
|
@ -159,7 +159,7 @@ def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=No
|
|||||||
|
|
||||||
# We have to retrieve the url
|
# We have to retrieve the url
|
||||||
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
streams_url = ('http://api.soundcloud.com/i1/tracks/{0}/streams?'
|
||||||
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
'client_id={1}&secret_token={2}'.format(track_id, self._IPHONE_CLIENT_ID, secret_token))
|
||||||
format_dict = self._download_json(
|
format_dict = self._download_json(
|
||||||
streams_url,
|
streams_url,
|
||||||
track_id, 'Downloading track url')
|
track_id, 'Downloading track url')
|
||||||
|
@ -82,7 +82,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
|
||||||
rootpage = self._download_webpage(rootURL, info['id'],
|
rootpage = self._download_webpage(rootURL, info['id'],
|
||||||
errnote='Unable to download course info page')
|
errnote='Unable to download course info page')
|
||||||
|
|
||||||
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
links = orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
|
||||||
info['entries'] = [self.url_result(
|
info['entries'] = [self.url_result(
|
||||||
|
@ -8,24 +8,23 @@
|
|||||||
class TeamcocoIE(InfoExtractor):
|
class TeamcocoIE(InfoExtractor):
|
||||||
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
|
_VALID_URL = r'http://teamcoco\.com/video/(?P<video_id>[0-9]+)?/?(?P<display_id>.*)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant',
|
||||||
'file': '80187.mp4',
|
'file': '80187.mp4',
|
||||||
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
'md5': '3f7746aa0dc86de18df7539903d399ea',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
'title': 'Conan Becomes A Mary Kay Beauty Consultant',
|
||||||
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
||||||
|
'file': '19705.mp4',
|
||||||
|
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
||||||
|
'info_dict': {
|
||||||
|
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
||||||
|
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
|
|
||||||
'file': '19705.mp4',
|
|
||||||
'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
|
|
||||||
'info_dict': {
|
|
||||||
"description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",
|
|
||||||
"title": "Louis C.K. Interview Pt. 1 11/3/11"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -33,9 +33,9 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The illusion of consciousness',
|
'title': 'The illusion of consciousness',
|
||||||
'description': ('Philosopher Dan Dennett makes a compelling '
|
'description': ('Philosopher Dan Dennett makes a compelling '
|
||||||
'argument that not only don\'t we understand our own '
|
'argument that not only don\'t we understand our own '
|
||||||
'consciousness, but that half the time our brains are '
|
'consciousness, but that half the time our brains are '
|
||||||
'actively fooling us.'),
|
'actively fooling us.'),
|
||||||
'uploader': 'Dan Dennett',
|
'uploader': 'Dan Dennett',
|
||||||
'width': 854,
|
'width': 854,
|
||||||
'duration': 1308,
|
'duration': 1308,
|
||||||
@ -93,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):
|
|||||||
|
|
||||||
def _extract_info(self, webpage):
|
def _extract_info(self, webpage):
|
||||||
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
||||||
webpage, 'info json')
|
webpage, 'info json')
|
||||||
return json.loads(info_json)
|
return json.loads(info_json)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -113,7 +113,7 @@ def _playlist_videos_info(self, url, name):
|
|||||||
'''Returns the videos of the playlist'''
|
'''Returns the videos of the playlist'''
|
||||||
|
|
||||||
webpage = self._download_webpage(url, name,
|
webpage = self._download_webpage(url, name,
|
||||||
'Downloading playlist webpage')
|
'Downloading playlist webpage')
|
||||||
info = self._extract_info(webpage)
|
info = self._extract_info(webpage)
|
||||||
playlist_info = info['playlist']
|
playlist_info = info['playlist']
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ def _real_extract(self, url):
|
|||||||
embed_url = self._html_search_regex(
|
embed_url = self._html_search_regex(
|
||||||
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url')
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
embed_page = self._download_webpage(embed_url, video_id,
|
||||||
'Downloading embed player page')
|
'Downloading embed player page')
|
||||||
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id')
|
||||||
wat_info = self._download_json(
|
wat_info = self._download_json(
|
||||||
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
|
'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id)
|
||||||
|
@ -47,7 +47,7 @@ def _real_extract(self, url):
|
|||||||
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'
|
||||||
else:
|
else:
|
||||||
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'
|
||||||
'format=smil&mbr=true'.format(video_id))
|
'format=smil&mbr=true'.format(video_id))
|
||||||
|
|
||||||
meta = self._download_xml(smil_url, video_id)
|
meta = self._download_xml(smil_url, video_id)
|
||||||
try:
|
try:
|
||||||
|
@ -28,7 +28,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
webpage = self._download_webpage(url, video_id, 'Downloading page')
|
||||||
|
|
||||||
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
mobj = re.search(r'(?m)fo\.addVariable\("file",\s"(?P<fileid>[\da-z]+)"\);\n'
|
||||||
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
'\s+fo\.addVariable\("s",\s"(?P<serverid>\d+)"\);', webpage)
|
||||||
if mobj is None:
|
if mobj is None:
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, name)
|
webpage = self._download_webpage(url, name)
|
||||||
|
|
||||||
title = self._search_regex(r'<title>(.+?)</title>',
|
title = self._search_regex(r'<title>(.+?)</title>',
|
||||||
webpage, 'video title').replace(' - Trailer Addict', '')
|
webpage, 'video title').replace(' - Trailer Addict', '')
|
||||||
view_count_str = self._search_regex(
|
view_count_str = self._search_regex(
|
||||||
r'<span class="views_n">([0-9,.]+)</span>',
|
r'<span class="views_n">([0-9,.]+)</span>',
|
||||||
webpage, 'view count', fatal=False)
|
webpage, 'view count', fatal=False)
|
||||||
@ -46,9 +46,9 @@ def _real_extract(self, url):
|
|||||||
info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
|
info_webpage = self._download_webpage(info_url, video_id, "Downloading the info webpage")
|
||||||
|
|
||||||
final_url = self._search_regex(r'&fileurl=(.+)',
|
final_url = self._search_regex(r'&fileurl=(.+)',
|
||||||
info_webpage, 'Download url').replace('%3F', '?')
|
info_webpage, 'Download url').replace('%3F', '?')
|
||||||
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
thumbnail_url = self._search_regex(r'&image=(.+?)&',
|
||||||
info_webpage, 'thumbnail url')
|
info_webpage, 'thumbnail url')
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
r'(?s)<div class="synopsis">.*?<div class="movie_label_info"[^>]*>(.*?)</div>',
|
||||||
|
@ -43,7 +43,7 @@ def _real_extract(self, url):
|
|||||||
webpage, 'iframe url')
|
webpage, 'iframe url')
|
||||||
iframe = self._download_webpage(iframe_url, video_id)
|
iframe = self._download_webpage(iframe_url, video_id)
|
||||||
video_url = self._search_regex(r'<source src="([^"]+)"',
|
video_url = self._search_regex(r'<source src="([^"]+)"',
|
||||||
iframe, 'video url')
|
iframe, 'video url')
|
||||||
|
|
||||||
# The only place where you can get a title, it's not complete,
|
# The only place where you can get a title, it's not complete,
|
||||||
# but searching in other places doesn't work for all videos
|
# but searching in other places doesn't work for all videos
|
||||||
|
@ -154,7 +154,7 @@ def _real_extract(self, url):
|
|||||||
self.to_screen('%s: Already enrolled in' % course_id)
|
self.to_screen('%s: Already enrolled in' % course_id)
|
||||||
|
|
||||||
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
response = self._download_json('https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,
|
||||||
course_id, 'Downloading course curriculum')
|
course_id, 'Downloading course curriculum')
|
||||||
|
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
self.url_result('https://www.udemy.com/%s/#/lecture/%s' % (course_path, asset['id']), 'Udemy')
|
||||||
|
@ -45,13 +45,13 @@ def _real_extract(self, url):
|
|||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
video_title = self._html_search_regex(r'data-title="(?P<title>.+)"',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|
||||||
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
uploader = self._html_search_regex(r'data-content-type="channel".*?>(?P<uploader>.*?)</a>',
|
||||||
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
webpage, 'uploader', fatal=False, flags=re.DOTALL)
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
thumbnail = self._html_search_regex(r'<link rel="image_src" href="(?P<thumb>.*?)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -30,13 +30,13 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
redirect_page, urlh = self._download_webpage_handle(url, video_id)
|
||||||
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
new_location = self._search_regex(r'window\.location = \'(.*)\';',
|
||||||
redirect_page, 'redirect location')
|
redirect_page, 'redirect location')
|
||||||
redirect_url = urlh.geturl() + new_location
|
redirect_url = urlh.geturl() + new_location
|
||||||
webpage = self._download_webpage(redirect_url, video_id,
|
webpage = self._download_webpage(redirect_url, video_id,
|
||||||
'Downloading redirect page')
|
'Downloading redirect page')
|
||||||
|
|
||||||
title = self._html_search_regex(r'<title>(.*)</title>',
|
title = self._html_search_regex(r'<title>(.*)</title>',
|
||||||
webpage, 'title').split('/')[0].strip()
|
webpage, 'title').split('/')[0].strip()
|
||||||
|
|
||||||
info_url = "http://vbox7.com/play/magare.do"
|
info_url = "http://vbox7.com/play/magare.do"
|
||||||
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id})
|
||||||
|
@ -48,11 +48,11 @@ def _real_extract(self, url):
|
|||||||
video_url = compat_urlparse.unquote(config['clip']['url'])
|
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||||
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
|
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
|
||||||
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
|
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
|
||||||
webpage, 'uploader')
|
webpage, 'uploader')
|
||||||
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
|
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
|
||||||
webpage, 'thumbnail')
|
webpage, 'thumbnail')
|
||||||
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
|
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
|
||||||
webpage, 'description', flags=re.DOTALL)
|
webpage, 'description', flags=re.DOTALL)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
|
@ -112,7 +112,7 @@ def _real_extract(self, url):
|
|||||||
if mobj:
|
if mobj:
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id,
|
||||||
'Downloading video page')
|
'Downloading video page')
|
||||||
|
|
||||||
rutv_url = RUTVIE._extract_url(page)
|
rutv_url = RUTVIE._extract_url(page)
|
||||||
if rutv_url:
|
if rutv_url:
|
||||||
|
@ -28,11 +28,11 @@ def _real_extract(self, url):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
video = config.find('video')
|
video = config.find('video')
|
||||||
sources = video.find('sources')
|
sources = video.find('sources')
|
||||||
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key)
|
||||||
for key in ['on', 'av', 'off']] if node is not None)
|
for key in ['on', 'av', 'off']] if node is not None)
|
||||||
video_url = url_node.find('url').text
|
video_url = url_node.find('url').text
|
||||||
|
|
||||||
return {'id': video_id,
|
return {'id': video_id,
|
||||||
|
@ -260,7 +260,7 @@ def _real_extract(self, url):
|
|||||||
else:
|
else:
|
||||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||||
config = self._search_regex(config_re, webpage, 'info section',
|
config = self._search_regex(config_re, webpage, 'info section',
|
||||||
flags=re.DOTALL)
|
flags=re.DOTALL)
|
||||||
config = json.loads(config)
|
config = json.loads(config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
|
||||||
|
@ -121,7 +121,7 @@ def _login(self):
|
|||||||
}
|
}
|
||||||
|
|
||||||
request = compat_urllib_request.Request('https://login.vk.com/?act=login',
|
request = compat_urllib_request.Request('https://login.vk.com/?act=login',
|
||||||
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||||
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
|
||||||
|
|
||||||
if re.search(r'onLoginFailed', login_page):
|
if re.search(r'onLoginFailed', login_page):
|
||||||
@ -175,7 +175,7 @@ def _real_extract(self, url):
|
|||||||
upload_date = None
|
upload_date = None
|
||||||
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
|
mobj = re.search(r'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
x = mobj.group(1) + ' ' + mobj.group(2)
|
mobj.group(1) + ' ' + mobj.group(2)
|
||||||
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
|
upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -41,7 +41,7 @@ def _real_extract(self, url):
|
|||||||
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
videos_urls = sorted(videos_urls, key=lambda u: 'video.sina.com' in u)
|
||||||
player_url = videos_urls[-1]
|
player_url = videos_urls[-1]
|
||||||
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
m_sina = re.match(r'https?://video\.sina\.com\.cn/v/b/(\d+)-\d+\.html',
|
||||||
player_url)
|
player_url)
|
||||||
if m_sina is not None:
|
if m_sina is not None:
|
||||||
self.to_screen('Sina video detected')
|
self.to_screen('Sina video detected')
|
||||||
sina_id = m_sina.group(1)
|
sina_id = m_sina.group(1)
|
||||||
|
@ -67,17 +67,17 @@ def is_hd(webpage):
|
|||||||
description = mobj.group(1) if mobj else None
|
description = mobj.group(1) if mobj else None
|
||||||
|
|
||||||
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
|
upload_date = self._html_search_regex(r'hint=\'(\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}\'',
|
||||||
webpage, 'upload date', fatal=False)
|
webpage, 'upload date', fatal=False)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
|
uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',
|
||||||
webpage, 'uploader id', default='anonymous')
|
webpage, 'uploader id', default='anonymous')
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
|
thumbnail = self._html_search_regex(r'<video\s+.*?poster="([^"]+)".*?>', webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
|
duration = parse_duration(self._html_search_regex(r'<span>Runtime:</span> (\d+:\d+)</div>',
|
||||||
webpage, 'duration', fatal=False))
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
|
view_count = self._html_search_regex(r'<span>Views:</span> ([^<]+)</div>', webpage, 'view count', fatal=False)
|
||||||
if view_count:
|
if view_count:
|
||||||
|
@ -30,14 +30,14 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(r'flv_url=(.*?)&',
|
video_url = self._search_regex(r'flv_url=(.*?)&',
|
||||||
webpage, 'video URL')
|
webpage, 'video URL')
|
||||||
video_url = compat_urllib_parse.unquote(video_url)
|
video_url = compat_urllib_parse.unquote(video_url)
|
||||||
|
|
||||||
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM',
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
|
|
||||||
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
video_thumbnail = self._search_regex(r'url_bigthumb=(.*?)&',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -229,7 +229,7 @@ def _get_n_results(self, query, n):
|
|||||||
for pagenum in itertools.count(0):
|
for pagenum in itertools.count(0):
|
||||||
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
|
||||||
info = self._download_json(result_url, query,
|
info = self._download_json(result_url, query,
|
||||||
note='Downloading results page ' + str(pagenum + 1))
|
note='Downloading results page ' + str(pagenum + 1))
|
||||||
m = info['m']
|
m = info['m']
|
||||||
results = info['results']
|
results = info['results']
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ def _real_extract(self, url):
|
|||||||
# -8 means blocked outside China.
|
# -8 means blocked outside China.
|
||||||
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
error = config['data'][0].get('error') # Chinese and English, separated by newline.
|
||||||
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
raise ExtractorError(error or 'Server reported error %i' % error_code,
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
video_title = config['data'][0]['title']
|
video_title = config['data'][0]['title']
|
||||||
seed = config['data'][0]['seed']
|
seed = config['data'][0]['seed']
|
||||||
|
@ -64,7 +64,7 @@ def _real_extract(self, url):
|
|||||||
# Get all of the links from the page
|
# Get all of the links from the page
|
||||||
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
|
||||||
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
download_list_html = self._search_regex(DOWNLOAD_LIST_RE,
|
||||||
webpage, 'download list').strip()
|
webpage, 'download list').strip()
|
||||||
LINK_RE = r'<a href="([^"]+)">'
|
LINK_RE = r'<a href="([^"]+)">'
|
||||||
links = re.findall(LINK_RE, download_list_html)
|
links = re.findall(LINK_RE, download_list_html)
|
||||||
|
|
||||||
|
@ -950,7 +950,7 @@ def _map_to_format_list(urlmap):
|
|||||||
|
|
||||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||||
self.to_screen('{%s} signature length %s, %s' %
|
self.to_screen('{%s} signature length %s, %s' %
|
||||||
(format_id, parts_sizes, player_desc))
|
(format_id, parts_sizes, player_desc))
|
||||||
|
|
||||||
signature = self._decrypt_signature(
|
signature = self._decrypt_signature(
|
||||||
encrypted_sig, video_id, player_url, age_gate)
|
encrypted_sig, video_id, player_url, age_gate)
|
||||||
@ -1214,7 +1214,7 @@ def _real_extract(self, url):
|
|||||||
class YoutubeTopListIE(YoutubePlaylistIE):
|
class YoutubeTopListIE(YoutubePlaylistIE):
|
||||||
IE_NAME = 'youtube:toplist'
|
IE_NAME = 'youtube:toplist'
|
||||||
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
|
||||||
' (Example: "yttoplist:music:Top Tracks")')
|
' (Example: "yttoplist:music:Top Tracks")')
|
||||||
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'yttoplist:music:Trending',
|
'url': 'yttoplist:music:Trending',
|
||||||
|
Loading…
Reference in New Issue
Block a user