Add webpage_url_basename info_dict field (Fixes #1938)

This commit is contained in:
Philipp Hagemeister 2013-12-17 04:13:36 +01:00
parent 44c471c3b8
commit 29eb517403
3 changed files with 27 additions and 9 deletions

View File

@ -13,20 +13,21 @@
#from youtube_dl.utils import htmlentity_transform #from youtube_dl.utils import htmlentity_transform
from youtube_dl.utils import ( from youtube_dl.utils import (
timeconvert,
sanitize_filename,
unescapeHTML,
orderedSet,
DateRange, DateRange,
unified_strdate, encodeFilename,
find_xpath_attr, find_xpath_attr,
get_meta_content, get_meta_content,
xpath_with_ns, orderedSet,
smuggle_url, sanitize_filename,
unsmuggle_url,
shell_quote, shell_quote,
encodeFilename, smuggle_url,
str_to_int, str_to_int,
timeconvert,
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_basename,
xpath_with_ns,
) )
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
@ -181,6 +182,12 @@ def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int('123.456'), 123456)
def test_url_basename(self):
self.assertEqual(url_basename(u'http://foo.de/'), u'')
self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -47,6 +47,7 @@
subtitles_filename, subtitles_filename,
takewhile_inclusive, takewhile_inclusive,
UnavailableVideoError, UnavailableVideoError,
url_basename,
write_json_file, write_json_file,
write_string, write_string,
YoutubeDLHandler, YoutubeDLHandler,
@ -484,6 +485,7 @@ def extract_info(self, url, download=True, ie_key=None, extra_info={},
{ {
'extractor': ie.IE_NAME, 'extractor': ie.IE_NAME,
'webpage_url': url, 'webpage_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(), 'extractor_key': ie.ie_key(),
}) })
if process: if process:
@ -576,6 +578,7 @@ def make_result(embedded_info):
'playlist_index': i + playliststart, 'playlist_index': i + playliststart,
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
} }
@ -596,6 +599,7 @@ def _fixup(r):
{ {
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'], 'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'], 'extractor_key': ie_result['extractor_key'],
}) })
return r return r

View File

@ -1084,3 +1084,10 @@ def remove_start(s, start):
if s.startswith(start): if s.startswith(start):
return s[len(start):] return s[len(start):]
return s return s
def url_basename(url):
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
if not m:
return u''
return m.group(1)