Merge remote-tracking branch 'origin/master' into IE_cleanup

Conflicts:
	youtube_dl/FileDownloader.py
This commit is contained in:
Filippo Valsorda 2012-11-27 23:20:32 +01:00
commit c63cc10ffa
12 changed files with 346 additions and 102 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
*~ *~
wine-py2exe/ wine-py2exe/
py2exe.log py2exe.log
*.kate-swp

View File

@ -1 +1 @@
2012.11.28 2012.11.29

View File

@ -1,4 +1,4 @@
% youtube-dl(1) % YOUTUBE-DL(1)
# NAME # NAME
youtube-dl youtube-dl
@ -20,6 +20,11 @@ # OPTIONS
-i, --ignore-errors continue on download errors -i, --ignore-errors continue on download errors
-r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m) -r, --rate-limit LIMIT download rate limit (e.g. 50k or 44.6m)
-R, --retries RETRIES number of retries (default is 10) -R, --retries RETRIES number of retries (default is 10)
--buffer-size SIZE size of download buffer (e.g. 1024 or 16k) (default
is 1024)
--no-resize-buffer do not automatically adjust the buffer size. By
default, the buffer size is automatically resized
from an initial value of SIZE.
--dump-user-agent display the current browser identification --dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent --user-agent UA specify a custom user agent
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
@ -108,6 +113,28 @@ # CONFIGURATION
You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`. You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
# OUTPUT TEMPLATE
The `-o` option allows users to indicate a template for the output file names. The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "http://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences have the format `%(NAME)s`. To clarify, that is a percent symbol followed by a name in parenthesis, followed by a lowercase S. Allowed names are:
- `id`: The sequence will be replaced by the video identifier.
- `url`: The sequence will be replaced by the video URL.
- `uploader`: The sequence will be replaced by the nickname of the person who uploaded the video.
- `upload_date`: The sequence will be replaced by the upload date in YYYYMMDD format.
- `title`: The sequence will be replaced by the video title.
- `ext`: The sequence will be replaced by the appropriate extension (like flv or mp4).
- `epoch`: The sequence will be replaced by the Unix epoch when creating the file.
- `autonumber`: The sequence will be replaced by a five-digit number that will be increased with each download, starting at zero.
The current default template is `%(id)s.%(ext)s`, but that will be switchted to `%(title)s-%(id)s.%(ext)s` (which can be requested with `-t` at the moment).
In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc
youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
$ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames
youtube-dl_test_video_.mp4 # A simple file name
# FAQ # FAQ
### Can you please put the -b option back? ### Can you please put the -b option back?

View File

@ -56,7 +56,7 @@ def test_sanitize_filename_restricted(self):
self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c') self.assertEqual(sanitize_filename(u'aäb中国的c', restricted=True), u'a_b_c')
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
forbidden = u'"\0\\/&: \'\t\n' forbidden = u'"\0\\/&!: \'\t\n'
for fc in forbidden: for fc in forbidden:
for fbc in forbidden: for fbc in forbidden:
self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))

Binary file not shown.

View File

@ -1,4 +1,4 @@
.TH youtube-dl 1 "" .TH YOUTUBE-DL 1 ""
.SH NAME .SH NAME
.PP .PP
youtube-dl youtube-dl
@ -24,6 +24,11 @@ redistribute it or use it however you like.
-i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors -i,\ --ignore-errors\ \ \ \ \ \ continue\ on\ download\ errors
-r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m) -r,\ --rate-limit\ LIMIT\ \ \ download\ rate\ limit\ (e.g.\ 50k\ or\ 44.6m)
-R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10) -R,\ --retries\ RETRIES\ \ \ \ number\ of\ retries\ (default\ is\ 10)
--buffer-size\ SIZE\ \ \ \ \ \ \ size\ of\ download\ buffer\ (e.g.\ 1024\ or\ 16k)\ (default
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ is\ 1024)
--no-resize-buffer\ \ \ \ \ \ \ do\ not\ automatically\ adjust\ the\ buffer\ size.\ By
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ default,\ the\ buffer\ size\ is\ automatically\ resized
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ from\ an\ initial\ value\ of\ SIZE.
--dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification --dump-user-agent\ \ \ \ \ \ \ \ display\ the\ current\ browser\ identification
--user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent --user-agent\ UA\ \ \ \ \ \ \ \ \ \ specify\ a\ custom\ user\ agent
--list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they --list-extractors\ \ \ \ \ \ \ \ List\ all\ supported\ extractors\ and\ the\ URLs\ they
@ -139,6 +144,59 @@ You can configure youtube-dl by placing default arguments (such as
\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not \f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
\f[C]~/.local/config/youtube-dl.conf\f[]. \f[C]~/.local/config/youtube-dl.conf\f[].
.SH OUTPUT TEMPLATE
.PP
The \f[C]-o\f[] option allows users to indicate a template for the
output file names.
The basic usage is not to set any template arguments when downloading a
single file, like in
\f[C]youtube-dl\ -o\ funny_video.flv\ "http://some/video"\f[].
However, it may contain special sequences that will be replaced when
downloading each video.
The special sequences have the format \f[C]%(NAME)s\f[].
To clarify, that is a percent symbol followed by a name in parenthesis,
followed by a lowercase S.
Allowed names are:
.IP \[bu] 2
\f[C]id\f[]: The sequence will be replaced by the video identifier.
.IP \[bu] 2
\f[C]url\f[]: The sequence will be replaced by the video URL.
.IP \[bu] 2
\f[C]uploader\f[]: The sequence will be replaced by the nickname of the
person who uploaded the video.
.IP \[bu] 2
\f[C]upload_date\f[]: The sequence will be replaced by the upload date
in YYYYMMDD format.
.IP \[bu] 2
\f[C]title\f[]: The sequence will be replaced by the video title.
.IP \[bu] 2
\f[C]ext\f[]: The sequence will be replaced by the appropriate extension
(like flv or mp4).
.IP \[bu] 2
\f[C]epoch\f[]: The sequence will be replaced by the Unix epoch when
creating the file.
.IP \[bu] 2
\f[C]autonumber\f[]: The sequence will be replaced by a five-digit
number that will be increased with each download, starting at zero.
.PP
The current default template is \f[C]%(id)s.%(ext)s\f[], but that will
be switchted to \f[C]%(title)s-%(id)s.%(ext)s\f[] (which can be
requested with \f[C]-t\f[] at the moment).
.PP
In some cases, you don\[aq]t want special characters such as 中, spaces,
or &, such as when transferring the downloaded filename to a Windows
system or the filename through an 8bit-unsafe channel.
In these cases, add the \f[C]--restrict-filenames\f[] flag to get a
shorter title:
.IP
.nf
\f[C]
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc
youtube-dl\ test\ video\ \[aq]\[aq]_ä↭𝕐.mp4\ \ \ \ #\ All\ kinds\ of\ weird\ characters
$\ youtube-dl\ --get-filename\ -o\ "%(title)s.%(ext)s"\ BaW_jenozKc\ --restrict-filenames
youtube-dl_test_video_.mp4\ \ \ \ \ \ \ \ \ \ #\ A\ simple\ file\ name
\f[]
.fi
.SH FAQ .SH FAQ
.SS Can you please put the -b option back? .SS Can you please put the -b option back?
.PP .PP

View File

@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts local cur prev opts
COMPREPLY=() COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}" cur="${COMP_WORDS[COMP_CWORD]}"
opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --buffer-size --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --no-resize-buffer --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )

BIN
youtube-dl.exe Executable file → Normal file

Binary file not shown.

View File

@ -62,6 +62,8 @@ class FileDownloader(object):
ratelimit: Download speed limit, in bytes/sec. ratelimit: Download speed limit, in bytes/sec.
nooverwrites: Prevent overwriting files. nooverwrites: Prevent overwriting files.
retries: Number of times to retry for HTTP error 5xx retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible. continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar. noprogress: Do not print the progress bar.
playliststart: Playlist item to start at. playliststart: Playlist item to start at.
@ -106,7 +108,7 @@ def format_bytes(bytes):
if bytes == 0.0: if bytes == 0.0:
exponent = 0 exponent = 0
else: else:
exponent = long(math.log(bytes, 1024.0)) exponent = int(math.log(bytes, 1024.0))
suffix = 'bkMGTPEZY'[exponent] suffix = 'bkMGTPEZY'[exponent]
converted = float(bytes) / float(1024 ** exponent) converted = float(bytes) / float(1024 ** exponent)
return '%.2f%s' % (converted, suffix) return '%.2f%s' % (converted, suffix)
@ -125,7 +127,7 @@ def calc_eta(start, now, total, current):
if current == 0 or dif < 0.001: # One millisecond if current == 0 or dif < 0.001: # One millisecond
return '--:--' return '--:--'
rate = float(current) / dif rate = float(current) / dif
eta = long((float(total) - float(current)) / rate) eta = int((float(total) - float(current)) / rate)
(eta_mins, eta_secs) = divmod(eta, 60) (eta_mins, eta_secs) = divmod(eta, 60)
if eta_mins > 99: if eta_mins > 99:
return '--:--' return '--:--'
@ -177,7 +179,7 @@ def to_screen(self, message, skip_eol=False):
if not self.params.get('quiet', False): if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol] terminator = [u'\n', u''][skip_eol]
output = message + terminator output = message + terminator
if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
output = output.encode(preferredencoding(), 'ignore') output = output.encode(preferredencoding(), 'ignore')
self._screen_file.write(output) self._screen_file.write(output)
self._screen_file.flush() self._screen_file.flush()
@ -325,9 +327,13 @@ def prepare_filename(self, info_dict):
"""Generate the output filename.""" """Generate the output filename."""
try: try:
template_dict = dict(info_dict) template_dict = dict(info_dict)
template_dict['epoch'] = unicode(int(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads) template_dict['epoch'] = int(time.time())
template_dict['autonumber'] = u'%05d' % self._num_downloads
template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items()) template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
template_dict = dict((k, sanitize_filename(u(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
filename = self.params['outtmpl'] % template_dict filename = self.params['outtmpl'] % template_dict
return filename return filename
except (ValueError, KeyError), err: except (ValueError, KeyError), err:
@ -370,7 +376,6 @@ def process_info(self, info_dict):
raise MaxDownloadsReached() raise MaxDownloadsReached()
filename = self.prepare_filename(info_dict) filename = self.prepare_filename(info_dict)
filename = sanitize_filename(filename, self.params.get('restrictfilenames'))
# Forced printings # Forced printings
if self.params.get('forcetitle', False): if self.params.get('forcetitle', False):
@ -398,7 +403,7 @@ def process_info(self, info_dict):
if dn != '' and not os.path.exists(dn): # dn is already encoded if dn != '' and not os.path.exists(dn): # dn is already encoded
os.makedirs(dn) os.makedirs(dn)
except (OSError, IOError), err: except (OSError, IOError), err:
self.trouble(u'ERROR: unable to create directory ' + unicode(err)) self.trouble(u'ERROR: unable to create directory ' + u(err))
return return
if self.params.get('writedescription', False): if self.params.get('writedescription', False):
@ -623,7 +628,7 @@ def _do_download(self, filename, info_dict):
else: else:
# Examine the reported length # Examine the reported length
if (content_length is not None and if (content_length is not None and
(resume_len - 100 < long(content_length) < resume_len + 100)): (resume_len - 100 < int(content_length) < resume_len + 100)):
# The file had already been fully downloaded. # The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that # Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file, # YouTube sometimes adds or removes a few bytes from the end of the file,
@ -650,10 +655,10 @@ def _do_download(self, filename, info_dict):
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
if data_len is not None: if data_len is not None:
data_len = long(data_len) + resume_len data_len = int(data_len) + resume_len
data_len_str = self.format_bytes(data_len) data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len byte_counter = 0 + resume_len
block_size = 1024 block_size = self.params.get('buffersize', 1024)
start = time.time() start = time.time()
while True: while True:
# Download and write # Download and write
@ -679,6 +684,7 @@ def _do_download(self, filename, info_dict):
except (IOError, OSError), err: except (IOError, OSError), err:
self.trouble(u'\nERROR: unable to write data: %s' % str(err)) self.trouble(u'\nERROR: unable to write data: %s' % str(err))
return False return False
if not self.params.get('noresizebuffer', False):
block_size = self.best_block_size(after - before, len(data_block)) block_size = self.best_block_size(after - before, len(data_block))
# Progress message # Progress message
@ -699,7 +705,7 @@ def _do_download(self, filename, info_dict):
stream.close() stream.close()
self.report_finish() self.report_finish()
if data_len is not None and byte_counter != data_len: if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, long(data_len)) raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
# Update file modification time # Update file modification time

View File

@ -253,7 +253,7 @@ def _real_initialize(self):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err: except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
return return
# Set language # Set language
@ -262,7 +262,7 @@ def _real_initialize(self):
self.report_lang() self.report_lang()
urllib2.urlopen(request).read() urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to set language: %s' % u(err))
return return
# No authentication to be performed # No authentication to be performed
@ -285,7 +285,7 @@ def _real_initialize(self):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return return
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
return return
# Confirm age # Confirm age
@ -298,7 +298,7 @@ def _real_initialize(self):
self.report_age_confirmation() self.report_age_confirmation()
age_results = urllib2.urlopen(request).read() age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -320,7 +320,7 @@ def _real_extract(self, url):
try: try:
video_webpage = urllib2.urlopen(request).read() video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
# Attempt to extract SWF player URL # Attempt to extract SWF player URL
@ -342,7 +342,7 @@ def _real_extract(self, url):
if 'token' in video_info: if 'token' in video_info:
break break
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
return return
if 'token' not in video_info: if 'token' not in video_info:
if 'reason' in video_info: if 'reason' in video_info:
@ -405,7 +405,7 @@ def _real_extract(self, url):
try: try:
srt_list = urllib2.urlopen(request).read() srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list: if not srt_lang_list:
@ -422,7 +422,7 @@ def _real_extract(self, url):
try: try:
srt_xml = urllib2.urlopen(request).read() srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) raise Trouble(u'WARNING: unable to download video subtitles: %s' % u(err))
if not srt_xml: if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles') raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@ -544,7 +544,7 @@ def _real_initialize(self):
self.report_disclaimer() self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % u(err))
return return
# Confirm age # Confirm age
@ -557,7 +557,7 @@ def _real_initialize(self):
self.report_age_confirmation() self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read() disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to confirm age: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -581,7 +581,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
@ -672,7 +672,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
@ -768,7 +768,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader, and title from webpage # Extract URL, uploader, and title from webpage
@ -807,7 +807,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage) mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
if mobj is None: if mobj is None:
@ -861,7 +861,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract URL, uploader, and title from webpage # Extract URL, uploader, and title from webpage
@ -929,7 +929,7 @@ def _real_extract(self, url, new_video=True):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@ -953,7 +953,7 @@ def _real_extract(self, url, new_video=True):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract uploader and title from webpage # Extract uploader and title from webpage
@ -1011,7 +1011,7 @@ def _real_extract(self, url, new_video=True):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Extract media URL from playlist XML # Extract media URL from playlist XML
@ -1067,7 +1067,7 @@ def _real_extract(self, url, new_video=True):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
# Now we begin extracting as much information as we can from what we # Now we begin extracting as much information as we can from what we
@ -1147,6 +1147,143 @@ def _real_extract(self, url, new_video=True):
}] }]
class ArteTvIE(InfoExtractor):
"""arte.tv information extractor."""
_VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*'
_LIVE_URL = r'index-[0-9]+\.html$'
IE_NAME = u'arte.tv'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def report_download_webpage(self, video_id):
"""Report webpage download."""
self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
def report_extraction(self, video_id):
"""Report information extraction."""
self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
def fetch_webpage(self, url):
self._downloader.increment_downloads()
request = urllib2.Request(url)
try:
self.report_download_webpage(url)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return
except ValueError, err:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
return webpage
def grep_webpage(self, url, regex, regexFlags, matchTuples):
page = self.fetch_webpage(url)
mobj = re.search(regex, page, regexFlags)
info = {}
if mobj is None:
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
self._downloader.trouble(err)
return
else:
info[key] = mobj.group(i)
return info
def extractLiveStream(self, url):
video_lang = url.split('/')[-4]
info = self.grep_webpage(
url,
r'src="(.*?/videothek_js.*?\.js)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
http_host = url.split('/')[2]
next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url')))
info = self.grep_webpage(
next_url,
r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' +
'(http://.*?\.swf).*?' +
'(rtmp://.*?)\'',
re.DOTALL,
[
(1, 'path', u'ERROR: could not extract video path: %s' % url),
(2, 'player', u'ERROR: could not extract video player: %s' % url),
(3, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
video_url = u'%s/%s' % (info.get('url'), info.get('path'))
def extractPlus7Stream(self, url):
video_lang = url.split('/')[-3]
info = self.grep_webpage(
url,
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
(1, 'url', u'ERROR: Invalid URL: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video lang="%s" ref="(http[^\'"&]*)' % video_lang,
0,
[
(1, 'url', u'ERROR: Could not find <video> tag: %s' % url)
]
)
next_url = urllib.unquote(info.get('url'))
info = self.grep_webpage(
next_url,
r'<video id="(.*?)".*?>.*?' +
'<name>(.*?)</name>.*?' +
'<dateVideo>(.*?)</dateVideo>.*?' +
'<url quality="hd">(.*?)</url>',
re.DOTALL,
[
(1, 'id', u'ERROR: could not extract video id: %s' % url),
(2, 'title', u'ERROR: could not extract video title: %s' % url),
(3, 'date', u'ERROR: could not extract video date: %s' % url),
(4, 'url', u'ERROR: could not extract video url: %s' % url)
]
)
return {
'id': info.get('id'),
'url': urllib.unquote(info.get('url')),
'uploader': u'arte.tv',
'upload_date': info.get('date'),
'title': info.get('title'),
'ext': u'mp4',
'format': u'NA',
'player_url': None,
}
def _real_extract(self, url):
video_id = url.split('/')[-1]
self.report_extraction(video_id)
if re.search(self._LIVE_URL, video_id) is not None:
self.extractLiveStream(url)
return
else:
info = self.extractPlus7Stream(url)
return [info]
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
"""Generic last-resort information extractor.""" """Generic last-resort information extractor."""
@ -1232,7 +1369,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
except ValueError, err: except ValueError, err:
# since this is the last-resort InfoExtractor, if # since this is the last-resort InfoExtractor, if
@ -1324,7 +1461,7 @@ def _real_extract(self, query):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1351,7 +1488,7 @@ def _download_n_results(self, query, n):
try: try:
data = urllib2.urlopen(request).read() data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download API page: %s' % u(err))
return return
api_response = json.loads(data)['data'] api_response = json.loads(data)['data']
@ -1402,7 +1539,7 @@ def _real_extract(self, query):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1428,7 +1565,7 @@ def _download_n_results(self, query, n):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1484,7 +1621,7 @@ def _real_extract(self, query):
return return
else: else:
try: try:
n = long(prefix) n = int(prefix)
if n <= 0: if n <= 0:
self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
return return
@ -1511,7 +1648,7 @@ def _download_n_results(self, query, n):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1581,7 +1718,7 @@ def _real_extract(self, url):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1638,7 +1775,7 @@ def _real_extract(self, url):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1701,7 +1838,7 @@ def _real_extract(self, url):
try: try:
page = urllib2.urlopen(request).read() page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
# Extract video identifiers # Extract video identifiers
@ -1773,7 +1910,7 @@ def _real_extract(self, url):
mobj = re.search(r'data-users-id="([^"]+)"', page) mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1) page_base = page_base % mobj.group(1)
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
@ -1861,7 +1998,7 @@ def _real_extract(self, url):
self.report_download_webpage(file_id) self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % u(err))
return return
# Search for the real file URL # Search for the real file URL
@ -1977,7 +2114,7 @@ def _real_initialize(self):
else: else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err: except (IOError, netrc.NetrcParseError), err:
self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % u(err))
return return
if useremail is None: if useremail is None:
@ -1997,7 +2134,7 @@ def _real_initialize(self):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return return
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) self._downloader.to_stderr(u'WARNING: unable to log in: %s' % u(err))
return return
def _real_extract(self, url): def _real_extract(self, url):
@ -2014,7 +2151,7 @@ def _real_extract(self, url):
page = urllib2.urlopen(request) page = urllib2.urlopen(request)
video_webpage = page.read() video_webpage = page.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
# Start extracting information # Start extracting information
@ -2149,13 +2286,13 @@ def _real_extract(self, url):
'urlhandle': urlh 'urlhandle': urlh
} }
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % u(err))
return return
if info is None: # Regular URL if info is None: # Regular URL
try: try:
json_code = urlh.read() json_code = urlh.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % u(err))
return return
try: try:
@ -2223,7 +2360,7 @@ def _real_extract(self,url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -2320,7 +2457,7 @@ def _real_extract(self, url):
htmlHandle = urllib2.urlopen(req) htmlHandle = urllib2.urlopen(req)
html = htmlHandle.read() html = htmlHandle.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
if dlNewest: if dlNewest:
url = htmlHandle.geturl() url = htmlHandle.geturl()
@ -2353,7 +2490,7 @@ def _real_extract(self, url):
urlHandle = urllib2.urlopen(playerUrl_raw) urlHandle = urllib2.urlopen(playerUrl_raw)
playerUrl = urlHandle.geturl() playerUrl = urlHandle.geturl()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to find out player URL: ' + u(err))
return return
uri = mMovieParams[0][1] uri = mMovieParams[0][1]
@ -2362,7 +2499,7 @@ def _real_extract(self, url):
try: try:
indexXml = urllib2.urlopen(indexUrl).read() indexXml = urllib2.urlopen(indexUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download episode index: ' + u(err))
return return
results = [] results = []
@ -2383,7 +2520,7 @@ def _real_extract(self, url):
try: try:
configXml = urllib2.urlopen(configReq).read() configXml = urllib2.urlopen(configReq).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: %s' % u(err))
return return
cdoc = xml.etree.ElementTree.fromstring(configXml) cdoc = xml.etree.ElementTree.fromstring(configXml)
@ -2466,7 +2603,7 @@ def _real_extract(self, url):
m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type']) m = re.match(r'text/html; charset="?([^"]+)"?', webPage.headers['Content-Type'])
webPage = webPageBytes.decode(m.group(1) if m else 'utf-8') webPage = webPageBytes.decode(m.group(1) if m else 'utf-8')
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download webpage: ' + u(err))
return return
descMatch = re.search('<meta name="description" content="([^"]*)"', webPage) descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
@ -2482,7 +2619,7 @@ def _real_extract(self, url):
try: try:
configJSON = urllib2.urlopen(configUrl).read() configJSON = urllib2.urlopen(configUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download configuration: ' + u(err))
return return
# Technically, it's JavaScript, not JSON # Technically, it's JavaScript, not JSON
@ -2491,7 +2628,7 @@ def _real_extract(self, url):
try: try:
config = json.loads(configJSON) config = json.loads(configJSON)
except (ValueError,), err: except (ValueError,), err:
self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err)) self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + u(err))
return return
playlist = config['playlist'] playlist = config['playlist']
@ -2538,7 +2675,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage) m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@ -2559,7 +2696,7 @@ def _real_extract(self, url):
try: try:
metaXml = urllib2.urlopen(xmlUrl).read() metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metaXml) mdoc = xml.etree.ElementTree.fromstring(metaXml)
@ -2604,7 +2741,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -2688,7 +2825,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction('%s/%s' % (uploader, slug_title)) self.report_extraction('%s/%s' % (uploader, slug_title))
@ -2723,7 +2860,7 @@ def _real_extract(self, url):
try: try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except Exception, e: except Exception, e:
self._downloader.to_stderr(compat_str(e)) self._downloader.to_stderr(u(e))
# for soundcloud, a request to a cross domain is required for cookies # for soundcloud, a request to a cross domain is required for cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@ -2765,7 +2902,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
self.report_extraction(url) self.report_extraction(url)
@ -2877,7 +3014,7 @@ def _real_extract(self, url):
self.report_download_json(file_url) self.report_download_json(file_url)
jsonData = urllib2.urlopen(request).read() jsonData = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % u(err))
return return
# parse JSON # parse JSON
@ -2956,7 +3093,7 @@ def _real_extract(self, url):
try: try:
metaXml = urllib2.urlopen(xmlUrl).read() metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err)) self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metaXml) mdoc = xml.etree.ElementTree.fromstring(metaXml)
try: try:
@ -2980,7 +3117,7 @@ def _real_extract(self, url):
try: try:
coursepage = urllib2.urlopen(url).read() coursepage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
return return
m = re.search('<h1>([^<]+)</h1>', coursepage) m = re.search('<h1>([^<]+)</h1>', coursepage)
@ -3019,7 +3156,7 @@ def _real_extract(self, url):
try: try:
rootpage = urllib2.urlopen(rootURL).read() rootpage = urllib2.urlopen(rootURL).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) self._downloader.trouble(u'ERROR: unable to download course info page: ' + u(err))
return return
info['title'] = info['id'] info['title'] = info['id']
@ -3066,7 +3203,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % u(err))
return return
mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@ -3099,7 +3236,7 @@ def _real_extract(self, url):
try: try:
metadataXml = urllib2.urlopen(request).read() metadataXml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % u(err))
return return
mdoc = xml.etree.ElementTree.fromstring(metadataXml) mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@ -3187,7 +3324,7 @@ def _real_extract(self, url):
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
jsondata = urllib2.urlopen(request).read() jsondata = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as err: except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extraction(video_id) self.report_extraction(video_id)
@ -3361,7 +3498,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % u(err))
return return
# Extract update date # Extract update date
@ -3403,7 +3540,7 @@ def _real_extract(self, url):
try: try:
webpage = urllib2.urlopen(request).read() webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % u(err))
return return
self.report_extract_vid_page(video_page) self.report_extract_vid_page(video_page)

View File

@ -18,10 +18,11 @@
'Ori Avtalion', 'Ori Avtalion',
'shizeeg', 'shizeeg',
'Filippo Valsorda', 'Filippo Valsorda',
'Christian Albrecht',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
__version__ = '2012.11.28' __version__ = '2012.11.29'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION' UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@ -126,9 +127,12 @@ def _format_option_string(option):
opts = [] opts = []
if option._short_opts: opts.append(option._short_opts[0]) if option._short_opts:
if option._long_opts: opts.append(option._long_opts[0]) opts.append(option._short_opts[0])
if len(opts) > 1: opts.insert(1, ', ') if option._long_opts:
opts.append(option._long_opts[0])
if len(opts) > 1:
opts.insert(1, ', ')
if option.takes_value(): opts.append(' %s' % option.metavar) if option.takes_value(): opts.append(' %s' % option.metavar)
@ -187,6 +191,11 @@ def _find_term_columns():
dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
general.add_option('-R', '--retries', general.add_option('-R', '--retries',
dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
general.add_option('--buffer-size',
dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
general.add_option('--no-resize-buffer',
action='store_true', dest='noresizebuffer',
help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
general.add_option('--dump-user-agent', general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent', action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False) help='display the current browser identification', default=False)
@ -362,7 +371,7 @@ def gen_extractors():
YoukuIE(), YoukuIE(),
XNXXIE(), XNXXIE(),
GooglePlusIE(), GooglePlusIE(),
ArteTvIE(),
GenericIE() GenericIE()
] ]
@ -440,9 +449,14 @@ def _real_main():
opts.ratelimit = numeric_limit opts.ratelimit = numeric_limit
if opts.retries is not None: if opts.retries is not None:
try: try:
opts.retries = long(opts.retries) opts.retries = int(opts.retries)
except (TypeError, ValueError), err: except (TypeError, ValueError), err:
parser.error(u'invalid retry count specified') parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
try: try:
opts.playliststart = int(opts.playliststart) opts.playliststart = int(opts.playliststart)
if opts.playliststart <= 0: if opts.playliststart <= 0:
@ -493,6 +507,8 @@ def _real_main():
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,
'retries': opts.retries, 'retries': opts.retries,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl, 'continuedl': opts.continue_dl,
'noprogress': opts.noprogress, 'noprogress': opts.noprogress,
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,

View File

@ -27,9 +27,9 @@
} }
try: try:
compat_str = unicode # Python 2 u = unicode # Python 2
except NameError: except NameError:
compat_str = str u = str
def preferredencoding(): def preferredencoding():
"""Get preferred encoding. """Get preferred encoding.
@ -37,19 +37,17 @@ def preferredencoding():
Returns the best encoding scheme for the system, based on Returns the best encoding scheme for the system, based on
locale.getpreferredencoding() and some further tweaks. locale.getpreferredencoding() and some further tweaks.
""" """
def yield_preferredencoding():
try: try:
pref = locale.getpreferredencoding() pref = locale.getpreferredencoding()
u'TEST'.encode(pref) u'TEST'.encode(pref)
except: except:
pref = 'UTF-8' pref = 'UTF-8'
while True:
yield pref return pref
return yield_preferredencoding().next()
def htmlentity_transform(matchobj): def htmlentity_transform(matchobj):
"""Transforms an HTML entity to a Unicode character. """Transforms an HTML entity to a character.
This function receives a match object and is intended to be used with This function receives a match object and is intended to be used with
the re.sub() function. the re.sub() function.
@ -60,7 +58,6 @@ def htmlentity_transform(matchobj):
if entity in htmlentitydefs.name2codepoint: if entity in htmlentitydefs.name2codepoint:
return unichr(htmlentitydefs.name2codepoint[entity]) return unichr(htmlentitydefs.name2codepoint[entity])
# Unicode character
mobj = re.match(ur'(?u)#(x?\d+)', entity) mobj = re.match(ur'(?u)#(x?\d+)', entity)
if mobj is not None: if mobj is not None:
numstr = mobj.group(1) numstr = mobj.group(1)
@ -69,7 +66,7 @@ def htmlentity_transform(matchobj):
numstr = u'0%s' % numstr numstr = u'0%s' % numstr
else: else:
base = 10 base = 10
return unichr(long(numstr, base)) return unichr(int(numstr, base))
# Unknown entity in name, return its literal representation # Unknown entity in name, return its literal representation
return (u'&%s;' % entity) return (u'&%s;' % entity)
@ -128,8 +125,10 @@ def find_startpos(self, x):
handle_decl = handle_pi = unknown_decl = find_startpos handle_decl = handle_pi = unknown_decl = find_startpos
def get_result(self): def get_result(self):
if self.result == None: return None if self.result is None:
if len(self.result) != 3: return None return None
if len(self.result) != 3:
return None
lines = self.html.split('\n') lines = self.html.split('\n')
lines = lines[self.result[1][0]-1:self.result[2][0]] lines = lines[self.result[1][0]-1:self.result[2][0]]
lines[0] = lines[0][self.result[1][1]:] lines[0] = lines[0][self.result[1][1]:]
@ -208,7 +207,7 @@ def replace_insane(char):
return '_-' if restricted else ' -' return '_-' if restricted else ' -'
elif char in '\\/|*<>': elif char in '\\/|*<>':
return '_' return '_'
if restricted and (char in '&\'' or char.isspace()): if restricted and (char in '!&\'' or char.isspace()):
return '_' return '_'
if restricted and ord(char) > 127: if restricted and ord(char) > 127:
return '_' return '_'
@ -235,7 +234,7 @@ def orderedSet(iterable):
def unescapeHTML(s): def unescapeHTML(s):
""" """
@param s a string (of type unicode) @param s a string
""" """
assert type(s) == type(u'') assert type(s) == type(u'')
@ -244,7 +243,7 @@ def unescapeHTML(s):
def encodeFilename(s): def encodeFilename(s):
""" """
@param s The name of the file (of type unicode) @param s The name of the file
""" """
assert type(s) == type(u'') assert type(s) == type(u'')