Merge remote-tracking branch 'upstream/master' into MLB

Conflicts:
	youtube_dl/extractor/mlb.py
This commit is contained in:
Charles Chen 2014-07-22 14:44:38 -07:00
commit c4f731262d
54 changed files with 1639 additions and 604 deletions

View File

@ -12,7 +12,7 @@ # INSTALLATION
If you do not have curl, you can alternatively use a recent wget: If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+x /usr/local/bin/youtube-dl sudo chmod a+x /usr/local/bin/youtube-dl
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).

View File

@ -148,3 +148,10 @@ def assertRegexpMatches(self, text, regexp, msg=None):
else: else:
msg = note + ', ' + msg msg = note + ', ' + msg
self.assertTrue(m, msg) self.assertTrue(m, msg)
def assertGreaterEqual(self, got, expected, msg=None):
if not (got >= expected):
if msg is None:
msg = '%r not greater than or equal to %r' % (got, expected)
self.assertTrue(got >= expected, msg)

1
test/swftests/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.swf

View File

@ -0,0 +1,19 @@
// input: [["a", "b", "c", "d"]]
// output: ["c", "b", "a", "d"]
package {
public class ArrayAccess {
public static function main(ar:Array):Array {
var aa:ArrayAccess = new ArrayAccess();
return aa.f(ar, 2);
}
private function f(ar:Array, num:Number):Array{
var x:String = ar[0];
var y:String = ar[num % ar.length];
ar[0] = y;
ar[num] = x;
return ar;
}
}
}

View File

@ -0,0 +1,17 @@
// input: []
// output: 121
package {
public class ClassCall {
public static function main():int{
var f:OtherClass = new OtherClass();
return f.func(100,20);
}
}
}
class OtherClass {
public function func(x: int, y: int):int {
return x+y+1;
}
}

View File

@ -0,0 +1,15 @@
// input: []
// output: 0
package {
public class ClassConstruction {
public static function main():int{
var f:Foo = new Foo();
return 0;
}
}
}
class Foo {
}

View File

@ -0,0 +1,13 @@
// input: [1, 2]
// output: 3
package {
public class LocalVars {
public static function main(a:int, b:int):int{
var c:int = a + b + b;
var d:int = c - b;
var e:int = d;
return e;
}
}
}

View File

@ -0,0 +1,21 @@
// input: []
// output: 9
package {
public class PrivateCall {
public static function main():int{
var f:OtherClass = new OtherClass();
return f.func();
}
}
}
class OtherClass {
private function pf():int {
return 9;
}
public function func():int {
return this.pf();
}
}

View File

@ -0,0 +1,13 @@
// input: [1]
// output: 1
package {
public class StaticAssignment {
public static var v:int;
public static function main(a:int):int{
v = a;
return v;
}
}
}

View File

@ -0,0 +1,16 @@
// input: []
// output: 1
package {
public class StaticRetrieval {
public static var v:int;
public static function main():int{
if (v) {
return 0;
} else {
return 1;
}
}
}
}

View File

@ -11,6 +11,7 @@
from test.helper import ( from test.helper import (
assertRegexpMatches, assertRegexpMatches,
assertGreaterEqual,
expect_info_dict, expect_info_dict,
FakeYDL, FakeYDL,
) )
@ -71,8 +72,8 @@ def test_dailymotion_user(self):
ie = DailymotionUserIE(dl) ie = DailymotionUserIE(dl)
result = ie.extract('https://www.dailymotion.com/user/nqtv') result = ie.extract('https://www.dailymotion.com/user/nqtv')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
assertGreaterEqual(self, len(result['entries']), 100)
self.assertEqual(result['title'], 'Rémi Gaillard') self.assertEqual(result['title'], 'Rémi Gaillard')
self.assertTrue(len(result['entries']) >= 100)
def test_vimeo_channel(self): def test_vimeo_channel(self):
dl = FakeYDL() dl = FakeYDL()
@ -111,7 +112,7 @@ def test_vine_user(self):
ie = VineUserIE(dl) ie = VineUserIE(dl)
result = ie.extract('https://vine.co/Visa') result = ie.extract('https://vine.co/Visa')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertTrue(len(result['entries']) >= 47) assertGreaterEqual(self, len(result['entries']), 47)
def test_ustream_channel(self): def test_ustream_channel(self):
dl = FakeYDL() dl = FakeYDL()
@ -119,7 +120,7 @@ def test_ustream_channel(self):
result = ie.extract('http://www.ustream.tv/channel/channeljapan') result = ie.extract('http://www.ustream.tv/channel/channeljapan')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10874166') self.assertEqual(result['id'], '10874166')
self.assertTrue(len(result['entries']) >= 54) assertGreaterEqual(self, len(result['entries']), 54)
def test_soundcloud_set(self): def test_soundcloud_set(self):
dl = FakeYDL() dl = FakeYDL()
@ -127,7 +128,7 @@ def test_soundcloud_set(self):
result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'The Royal Concept EP') self.assertEqual(result['title'], 'The Royal Concept EP')
self.assertTrue(len(result['entries']) >= 6) assertGreaterEqual(self, len(result['entries']), 6)
def test_soundcloud_user(self): def test_soundcloud_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -135,7 +136,7 @@ def test_soundcloud_user(self):
result = ie.extract('https://soundcloud.com/the-concept-band') result = ie.extract('https://soundcloud.com/the-concept-band')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 12) assertGreaterEqual(self, len(result['entries']), 12)
def test_soundcloud_likes(self): def test_soundcloud_likes(self):
dl = FakeYDL() dl = FakeYDL()
@ -143,7 +144,7 @@ def test_soundcloud_likes(self):
result = ie.extract('https://soundcloud.com/the-concept-band/likes') result = ie.extract('https://soundcloud.com/the-concept-band/likes')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '9615865') self.assertEqual(result['id'], '9615865')
self.assertTrue(len(result['entries']) >= 1) assertGreaterEqual(self, len(result['entries']), 1)
def test_soundcloud_playlist(self): def test_soundcloud_playlist(self):
dl = FakeYDL() dl = FakeYDL()
@ -162,7 +163,7 @@ def test_livestream_event(self):
result = ie.extract('http://new.livestream.com/tedx/cityenglish') result = ie.extract('http://new.livestream.com/tedx/cityenglish')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_livestreamoriginal_folder(self): def test_livestreamoriginal_folder(self):
dl = FakeYDL() dl = FakeYDL()
@ -170,7 +171,7 @@ def test_livestreamoriginal_folder(self):
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertTrue(len(result['entries']) >= 28) assertGreaterEqual(self, len(result['entries']), 28)
def test_nhl_videocenter(self): def test_nhl_videocenter(self):
dl = FakeYDL() dl = FakeYDL()
@ -187,7 +188,7 @@ def test_bambuser_channel(self):
result = ie.extract('http://bambuser.com/channel/pixelversity') result = ie.extract('http://bambuser.com/channel/pixelversity')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'pixelversity') self.assertEqual(result['title'], 'pixelversity')
self.assertTrue(len(result['entries']) >= 60) assertGreaterEqual(self, len(result['entries']), 60)
def test_bandcamp_album(self): def test_bandcamp_album(self):
dl = FakeYDL() dl = FakeYDL()
@ -195,7 +196,7 @@ def test_bandcamp_album(self):
result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'Nightmare Night EP') self.assertEqual(result['title'], 'Nightmare Night EP')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_community(self): def test_smotri_community(self):
dl = FakeYDL() dl = FakeYDL()
@ -204,7 +205,7 @@ def test_smotri_community(self):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'kommuna') self.assertEqual(result['id'], 'kommuna')
self.assertEqual(result['title'], 'КПРФ') self.assertEqual(result['title'], 'КПРФ')
self.assertTrue(len(result['entries']) >= 4) assertGreaterEqual(self, len(result['entries']), 4)
def test_smotri_user(self): def test_smotri_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -213,7 +214,7 @@ def test_smotri_user(self):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'inspector') self.assertEqual(result['id'], 'inspector')
self.assertEqual(result['title'], 'Inspector') self.assertEqual(result['title'], 'Inspector')
self.assertTrue(len(result['entries']) >= 9) assertGreaterEqual(self, len(result['entries']), 9)
def test_AcademicEarthCourse(self): def test_AcademicEarthCourse(self):
dl = FakeYDL() dl = FakeYDL()
@ -232,7 +233,7 @@ def test_ivi_compilation(self):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa') self.assertEqual(result['id'], 'dvoe_iz_lartsa')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
self.assertTrue(len(result['entries']) >= 24) assertGreaterEqual(self, len(result['entries']), 24)
def test_ivi_compilation_season(self): def test_ivi_compilation_season(self):
dl = FakeYDL() dl = FakeYDL()
@ -241,7 +242,7 @@ def test_ivi_compilation_season(self):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
self.assertTrue(len(result['entries']) >= 12) assertGreaterEqual(self, len(result['entries']), 12)
def test_imdb_list(self): def test_imdb_list(self):
dl = FakeYDL() dl = FakeYDL()
@ -260,7 +261,7 @@ def test_khanacademy_topic(self):
self.assertEqual(result['id'], 'cryptography') self.assertEqual(result['id'], 'cryptography')
self.assertEqual(result['title'], 'Journey into cryptography') self.assertEqual(result['title'], 'Journey into cryptography')
self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
self.assertTrue(len(result['entries']) >= 3) assertGreaterEqual(self, len(result['entries']), 3)
def test_EveryonesMixtape(self): def test_EveryonesMixtape(self):
dl = FakeYDL() dl = FakeYDL()
@ -277,7 +278,7 @@ def test_rutube_channel(self):
result = ie.extract('http://rutube.ru/tags/video/1800/') result = ie.extract('http://rutube.ru/tags/video/1800/')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1800') self.assertEqual(result['id'], '1800')
self.assertTrue(len(result['entries']) >= 68) assertGreaterEqual(self, len(result['entries']), 68)
def test_rutube_person(self): def test_rutube_person(self):
dl = FakeYDL() dl = FakeYDL()
@ -285,7 +286,7 @@ def test_rutube_person(self):
result = ie.extract('http://rutube.ru/video/person/313878/') result = ie.extract('http://rutube.ru/video/person/313878/')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '313878') self.assertEqual(result['id'], '313878')
self.assertTrue(len(result['entries']) >= 37) assertGreaterEqual(self, len(result['entries']), 37)
def test_multiple_brightcove_videos(self): def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283 # https://github.com/rg3/youtube-dl/issues/2283
@ -322,7 +323,7 @@ def test_ted_playlist(self):
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], '10') self.assertEqual(result['id'], '10')
self.assertEqual(result['title'], 'Who are the hackers?') self.assertEqual(result['title'], 'Who are the hackers?')
self.assertTrue(len(result['entries']) >= 6) assertGreaterEqual(self, len(result['entries']), 6)
def test_toypics_user(self): def test_toypics_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -330,7 +331,7 @@ def test_toypics_user(self):
result = ie.extract('http://videos.toypics.net/Mikey') result = ie.extract('http://videos.toypics.net/Mikey')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'Mikey') self.assertEqual(result['id'], 'Mikey')
self.assertTrue(len(result['entries']) >= 17) assertGreaterEqual(self, len(result['entries']), 17)
def test_xtube_user(self): def test_xtube_user(self):
dl = FakeYDL() dl = FakeYDL()
@ -338,7 +339,7 @@ def test_xtube_user(self):
result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers') result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'greenshowers') self.assertEqual(result['id'], 'greenshowers')
self.assertTrue(len(result['entries']) >= 155) assertGreaterEqual(self, len(result['entries']), 155)
def test_InstagramUser(self): def test_InstagramUser(self):
dl = FakeYDL() dl = FakeYDL()
@ -346,7 +347,7 @@ def test_InstagramUser(self):
result = ie.extract('http://instagram.com/porsche') result = ie.extract('http://instagram.com/porsche')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'porsche') self.assertEqual(result['id'], 'porsche')
self.assertTrue(len(result['entries']) >= 2) assertGreaterEqual(self, len(result['entries']), 2)
test_video = next( test_video = next(
e for e in result['entries'] e for e in result['entries']
if e['id'] == '614605558512799803_462752227') if e['id'] == '614605558512799803_462752227')
@ -385,7 +386,7 @@ def test_aol_playlist(self):
self.assertEqual(result['id'], '152147') self.assertEqual(result['id'], '152147')
self.assertEqual( self.assertEqual(
result['title'], 'Brace Yourself - Today\'s Weirdest News') result['title'], 'Brace Yourself - Today\'s Weirdest News')
self.assertTrue(len(result['entries']) >= 10) assertGreaterEqual(self, len(result['entries']), 10)
def test_TeacherTubeUser(self): def test_TeacherTubeUser(self):
dl = FakeYDL() dl = FakeYDL()
@ -393,7 +394,7 @@ def test_TeacherTubeUser(self):
result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
self.assertIsPlaylist(result) self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'rbhagwati2') self.assertEqual(result['id'], 'rbhagwati2')
self.assertTrue(len(result['entries']) >= 179) assertGreaterEqual(self, len(result['entries']), 179)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

77
test/test_swfinterp.py Normal file
View File

@ -0,0 +1,77 @@
#!/usr/bin/env python
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import errno
import io
import json
import re
import subprocess
from youtube_dl.swfinterp import SWFInterpreter
TEST_DIR = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'swftests')
class TestSWFInterpreter(unittest.TestCase):
pass
def _make_testfunc(testfile):
m = re.match(r'^(.*)\.(as)$', testfile)
if not m:
return
test_id = m.group(1)
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
if ((not os.path.exists(swf_file))
or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
except OSError as ose:
if ose.errno == errno.ENOENT:
print('mxmlc not found! Skipping test.')
return
raise
with open(swf_file, 'rb') as swf_f:
swf_content = swf_f.read()
swfi = SWFInterpreter(swf_content)
with io.open(as_file, 'r', encoding='utf-8') as as_f:
as_content = as_f.read()
def _find_spec(key):
m = re.search(
r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content)
if not m:
raise ValueError('Cannot find %s in %s' % (key, testfile))
return json.loads(m.group(1))
input_args = _find_spec('input')
output = _find_spec('output')
swf_class = swfi.extract_class(test_id)
func = swfi.extract_function(swf_class, 'main')
res = func(input_args)
self.assertEqual(res, output)
test_func.__name__ = str('test_swf_' + test_id)
setattr(TestSWFInterpreter, test_func.__name__, test_func)
for testfile in os.listdir(TEST_DIR):
_make_testfunc(testfile)
if __name__ == '__main__':
unittest.main()

View File

@ -45,6 +45,18 @@
u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
), ),
(
u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
u'swf',
86,
u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
),
(
u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
u'swf',
u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
),
] ]
@ -57,12 +69,12 @@ def setUp(self):
def make_tfunc(url, stype, sig_input, expected_sig): def make_tfunc(url, stype, sig_input, expected_sig):
basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % url
assert m, '%r should follow URL format' % basename
test_id = m.group(1) test_id = m.group(1)
def test_func(self): def test_func(self):
basename = 'player-%s.%s' % (test_id, stype)
fn = os.path.join(self.TESTDATA_DIR, basename) fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn): if not os.path.exists(fn):

View File

@ -1197,6 +1197,10 @@ def _format_note(self, fdict):
if res: if res:
res += ', ' res += ', '
res += format_bytes(fdict['filesize']) res += format_bytes(fdict['filesize'])
elif fdict.get('filesize_approx') is not None:
if res:
res += ', '
res += '~' + format_bytes(fdict['filesize_approx'])
return res return res
def list_formats(self, info_dict): def list_formats(self, info_dict):

View File

@ -65,17 +65,16 @@
'Tobias Bell', 'Tobias Bell',
'Naglis Jonaitis', 'Naglis Jonaitis',
'Charles Chen', 'Charles Chen',
'Hassaan Ali',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
import codecs import codecs
import io import io
import locale
import optparse import optparse
import os import os
import random import random
import re
import shlex import shlex
import sys import sys
@ -634,7 +633,7 @@ def _real_main(argv=None):
if desc is False: if desc is False:
continue continue
if hasattr(ie, 'SEARCH_KEY'): if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
_COUNTS = (u'', u'5', u'10', u'all') _COUNTS = (u'', u'5', u'10', u'all')
desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
compat_print(desc) compat_print(desc)

View File

@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE from .addanime import AddAnimeIE
from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .aol import AolIE from .aol import AolIE
@ -52,6 +53,7 @@
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE from .cspan import CSpanIE
@ -62,6 +64,7 @@
DailymotionUserIE, DailymotionUserIE,
) )
from .daum import DaumIE from .daum import DaumIE
from .dfb import DFBIE
from .dotsub import DotsubIE from .dotsub import DotsubIE
from .dreisat import DreiSatIE from .dreisat import DreiSatIE
from .drtv import DRTVIE from .drtv import DRTVIE
@ -250,6 +253,7 @@
RutubePersonIE, RutubePersonIE,
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
@ -263,6 +267,8 @@
SmotriUserIE, SmotriUserIE,
SmotriBroadcastIE, SmotriBroadcastIE,
) )
from .snotr import SnotrIE
from .sockshare import SockshareIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import ( from .soundcloud import (
SoundcloudIE, SoundcloudIE,
@ -397,6 +403,7 @@
YoutubeUserIE, YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

View File

@ -0,0 +1,139 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class AdultSwimIE(InfoExtractor):
_VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
_TEST = {
'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
'playlist': [
{
'md5': '4da359ec73b58df4575cd01a610ba5dc',
'info_dict': {
'id': '8a250ba1450996e901453d7f02ca02f5',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'ffbdf55af9331c509d95350bd0cc1819',
'info_dict': {
'id': '8a250ba1450996e901453d7f4bd102f6',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'b92409635540304280b4b6c36bd14a0a',
'info_dict': {
'id': '8a250ba1450996e901453d7fa73c02f7',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
},
{
'md5': 'e8818891d60e47b29cd89d7b0278156d',
'info_dict': {
'id': '8a250ba1450996e901453d7fc8ba02f8',
'ext': 'flv',
'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
'uploader': 'Rick and Morty',
'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
}
}
]
}
_video_extensions = {
'3500': 'flv',
'640': 'mp4',
'150': 'mp4',
'ipad': 'm3u8',
'iphone': 'm3u8'
}
_video_dimensions = {
'3500': (1280, 720),
'640': (480, 270),
'150': (320, 180)
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_path = mobj.group('path')
webpage = self._download_webpage(url, video_path)
episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
title = self._og_search_title(webpage)
index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
episode_el = idoc.find('.//episode')
show_title = episode_el.attrib.get('collectionTitle')
episode_title = episode_el.attrib.get('title')
thumbnail = episode_el.attrib.get('thumbnailUrl')
description = episode_el.find('./description').text.strip()
entries = []
segment_els = episode_el.findall('./segments/segment')
for part_num, segment_el in enumerate(segment_els):
segment_id = segment_el.attrib.get('id')
segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
thumbnail = segment_el.attrib.get('thumbnailUrl')
duration = segment_el.attrib.get('duration')
segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
formats = []
file_els = idoc.findall('.//files/file')
for file_el in file_els:
bitrate = file_el.attrib.get('bitrate')
type = file_el.attrib.get('type')
width, height = self._video_dimensions.get(bitrate, (None, None))
formats.append({
'format_id': '%s-%s' % (bitrate, type),
'url': file_el.text,
'ext': self._video_extensions.get(bitrate, 'mp4'),
# The bitrate may not be a number (for example: 'iphone')
'tbr': int(bitrate) if bitrate.isdigit() else None,
'height': height,
'width': width
})
self._sort_formats(formats)
entries.append({
'id': segment_id,
'title': segment_title,
'formats': formats,
'uploader': show_title,
'thumbnail': thumbnail,
'duration': duration,
'description': description
})
return {
'_type': 'playlist',
'id': episode_id,
'display_id': video_path,
'entries': entries,
'title': '%s %s' % (show_title, episode_title),
'description': description,
'thumbnail': thumbnail
}

View File

@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor):
'id': '19540403', 'id': '19540403',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF', 'title': 'Planes 2 Bande-annonce VF',
'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}, { }, {
@ -42,7 +42,7 @@ class AllocineIE(InfoExtractor):
'id': '19544709', 'id': '19544709',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF', 'title': 'Dragons 2 - Bande annonce finale VF',
'description': 'md5:e74a4dc750894bac300ece46c7036490', 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
'thumbnail': 're:http://.*\.jpg', 'thumbnail': 're:http://.*\.jpg',
}, },
}] }]

View File

@ -7,23 +7,32 @@
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
qualities,
) )
class ARDIE(InfoExtractor): class ARDIE(InfoExtractor):
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TEST = { _TESTS = [{
'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786', 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'file': '19288786.mp4', 'file': '22429276.mp4',
'md5': '515bf47ce209fb3f5a61b7aad364634c', 'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': { 'info_dict': {
'title': 'Edward Snowden im Interview - Held oder Verräter?', 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.', 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
}, },
'skip': 'Blocked outside of Germany', 'skip': 'Blocked outside of Germany',
} }, {
'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
'info_dict': {
'id': '22490580',
'ext': 'mp4',
'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
},
'skip': 'Blocked outside of Germany',
}]
def _real_extract(self, url): def _real_extract(self, url):
# determine video id from url # determine video id from url
@ -43,40 +52,64 @@ def _real_extract(self, url):
r'<h4 class="headline">(.*?)</h4>'], r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title') webpage, 'title')
description = self._html_search_meta( description = self._html_search_meta(
'dcterms.abstract', webpage, 'description') 'dcterms.abstract', webpage, 'description', default=None)
thumbnail = self._og_search_thumbnail(webpage) if description is None:
description = self._html_search_meta(
'description', webpage, 'meta description')
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
# structure altogether.
thumbnail = self._og_search_thumbnail(webpage, default=None)
media_info = self._download_json( media_streams = re.findall(r'''(?x)
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
# The second element of the _mediaArray contains the standard http urls "([^"]+)"''', webpage)
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
formats = [] if media_streams:
QUALITIES = qualities(['lo', 'hi', 'hq'])
formats = []
for furl in set(media_streams):
if furl.endswith('.f4m'):
fid = 'f4m'
else:
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
fid = fid_m.group(1) if fid_m else None
formats.append({
'quality': QUALITIES(fid),
'format_id': fid,
'url': furl,
})
else: # request JSON file
media_info = self._download_json(
'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
# The second element of the _mediaArray contains the standard http urls
streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
for s in streams: formats = []
if type(s['_stream']) == list: for s in streams:
for index, url in enumerate(s['_stream'][::-1]): if type(s['_stream']) == list:
quality = s['_quality'] + index for index, url in enumerate(s['_stream'][::-1]):
formats.append({ quality = s['_quality'] + index
'quality': quality, formats.append({
'url': url, 'quality': quality,
'format_id': '%s-%s' % (determine_ext(url), quality) 'url': url,
'format_id': '%s-%s' % (determine_ext(url), quality)
}) })
continue continue
format = { format = {
'quality': s['_quality'], 'quality': s['_quality'],
'url': s['_stream'], 'url': s['_stream'],
} }
format['format_id'] = '%s-%s' % ( format['format_id'] = '%s-%s' % (
determine_ext(format['url']), format['quality']) determine_ext(format['url']), format['quality'])
formats.append(format) formats.append(format)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -12,7 +12,7 @@
class BRIE(InfoExtractor): class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek' IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html' _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_BASE_URL = 'http://www.br.de' _BASE_URL = 'http://www.br.de'
_TESTS = [ _TESTS = [

View File

@ -1,24 +1,42 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
class CBSIE(InfoExtractor): class CBSIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P<id>[^/]+)/.*' _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P<id>[^/]+)/.*'
_TEST = { _TESTS = [{
u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
u'file': u'4JUVEwq3wUT7.flv', 'info_dict': {
u'info_dict': { 'id': '4JUVEwq3wUT7',
u'title': u'Connect Chat feat. Garth Brooks', 'ext': 'flv',
u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', 'title': 'Connect Chat feat. Garth Brooks',
u'duration': 1495, 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!',
'duration': 1495,
}, },
u'params': { 'params': {
# rtmp download # rtmp download
u'skip_download': True, 'skip_download': True,
}, },
} '_skip': 'Blocked outside the US',
}, {
'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/',
'info_dict': {
'id': 'P9gjWjelt6iP',
'ext': 'flv',
'title': 'Live on Letterman - St. Vincent',
'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.',
'duration': 3221,
},
'params': {
# rtmp download
'skip_download': True,
},
'_skip': 'Blocked outside the US',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -26,5 +44,5 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
real_id = self._search_regex( real_id = self._search_regex(
r"video\.settings\.pid\s*=\s*'([^']+)';", r"video\.settings\.pid\s*=\s*'([^']+)';",
webpage, u'real video ID') webpage, 'real video ID')
return self.url_result(u'theplatform:%s' % real_id) return self.url_result(u'theplatform:%s' % real_id)

View File

@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
'id': '85523671', 'id': '85523671',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Sunday Times - Icons', 'title': 'The Sunday Times - Icons',
'description': 'md5:3e1c0dc6047498d6728dcdaad0891762', 'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
'uploader': 'Us', 'uploader': 'Us',
'uploader_id': 'usfilms', 'uploader_id': 'usfilms',
'upload_date': '20140131' 'upload_date': '20140131'

View File

@ -43,7 +43,11 @@ def _real_extract(self, url):
raise ExtractorError('Cannot find video data') raise ExtractorError('Cannot find video data')
video_id = vdata['id'] video_id = vdata['id']
title = vdata['headline'] title = vdata.get('headline')
if title is None:
title = vdata.get('title')
if title is None:
raise ExtractorError('Cannot find title!')
description = vdata.get('dek') description = vdata.get('dek')
thumbnail = vdata.get('image', {}).get('path') thumbnail = vdata.get('image', {}).get('path')
author = vdata.get('author') author = vdata.get('author')

View File

@ -14,13 +14,13 @@
class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections) (video-clips|episodes|cc-studios|video-collections|full-episodes)
/(?P<title>.*)''' /(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/' _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TEST = { _TEST = {
'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8', 'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': { 'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',

View File

@ -69,6 +69,7 @@ class InfoExtractor(object):
* vcodec Name of the video codec in use * vcodec Name of the video codec in use
* container Name of the container format * container Name of the container format
* filesize The number of bytes, if known in advance * filesize The number of bytes, if known in advance
* filesize_approx An estimate for the number of bytes
* player_url SWF Player URL (used for rtmpdump). * player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual * protocol The protocol that will be used for the actual
download, lower-case. download, lower-case.
@ -468,7 +469,7 @@ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs
display_name = name display_name = name
return self._html_search_regex( return self._html_search_regex(
r'''(?ix)<meta r'''(?ix)<meta
(?=[^>]+(?:itemprop|name|property)=["\']%s["\']) (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
[^>]+content=["\']([^"\']+)["\']''' % re.escape(name), [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
html, display_name, fatal=fatal, **kwargs) html, display_name, fatal=fatal, **kwargs)
@ -555,6 +556,7 @@ def _formats_key(f):
f.get('abr') if f.get('abr') is not None else -1, f.get('abr') if f.get('abr') is not None else -1,
audio_ext_preference, audio_ext_preference,
f.get('filesize') if f.get('filesize') is not None else -1, f.get('filesize') if f.get('filesize') is not None else -1,
f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
f.get('format_id'), f.get('format_id'),
) )
formats.sort(key=_formats_key) formats.sort(key=_formats_key)

View File

@ -0,0 +1,65 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
str_to_int,
)
class CrackedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
_TEST = {
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
'md5': '4b29a5eeec292cd5eca6388c7558db9e',
'info_dict': {
'id': '19006',
'ext': 'mp4',
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
'description': 'md5:3b909e752661db86007d10e5ec2df769',
'timestamp': 1405659600,
'upload_date': '20140718',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
if timestamp:
timestamp = parse_iso8601(timestamp[:-6])
view_count = str_to_int(self._html_search_regex(
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
if m:
width = int(m.group('width'))
height = int(m.group('height'))
else:
width = height = None
return {
'id': video_id,
'url':video_url,
'title': title,
'description': description,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'height': height,
'width': width,
}

View File

@ -0,0 +1,44 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class DFBIE(InfoExtractor):
IE_NAME = 'tv.dfb.de'
_VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
# The md5 is different each time
'info_dict': {
'id': '9070',
'ext': 'flv',
'title': 'Highlights des Empfangs in Berlin',
'upload_date': '20140716',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
video_id)
video_info = player_info.find('video')
f4m_info = self._download_xml(video_info.find('url').text, video_id)
token_el = f4m_info.find('token')
manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
return {
'id': video_id,
'title': video_info.find('title').text,
'url': manifest_url,
'ext': 'flv',
'thumbnail': self._og_search_thumbnail(webpage),
'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
}

View File

@ -5,24 +5,26 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import compat_urllib_parse_unquote
class DropboxIE(InfoExtractor): class DropboxIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
_TEST = { _TEST = {
'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4', 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
'md5': '8ae17c51172fb7f93bdd6a214cc8c896', 'md5': '8a3d905427a6951ccb9eb292f154530b',
'info_dict': { 'info_dict': {
'id': '0qr9sai2veej4f8', 'id': 'nelirfsxnmcfbfh',
'ext': 'mp4', 'ext': 'mp4',
'title': 'THE_DOCTOR_GAMES' 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
} }
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') video_id = mobj.group('id')
title = os.path.splitext(mobj.group('title'))[0] fn = compat_urllib_parse_unquote(mobj.group('title'))
title = os.path.splitext(fn)[0]
video_url = url + '?dl=1' video_url = url + '?dl=1'
return { return {

View File

@ -8,7 +8,6 @@
ExtractorError, ExtractorError,
compat_urllib_parse, compat_urllib_parse,
compat_urllib_request, compat_urllib_request,
determine_ext,
) )

View File

@ -48,7 +48,7 @@ def _real_extract(self, url):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@ -211,7 +211,7 @@ def _real_extract(self, url):
class CultureboxIE(FranceTVBaseInfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = 'culturebox.francetvinfo.fr' IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = { _TEST = {
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',

View File

@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor):
'id': 'e402820827', 'id': 'e402820827',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Please Use This Song (Jon Lajoie)', 'title': 'Please Use This Song (Jon Lajoie)',
'description': 'md5:2ed27d364f5a805a6dba199faaf6681d', 'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}] }]

View File

@ -402,7 +402,7 @@ def _real_extract(self, url):
elif default_search == 'error': elif default_search == 'error':
raise ExtractorError( raise ExtractorError(
('%r is not a valid URL. ' ('%r is not a valid URL. '
'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True) ) % (url, url), expected=True)
else: else:
assert ':' in default_search assert ':' in default_search

View File

@ -8,7 +8,7 @@
class KickStarterIE(InfoExtractor): class KickStarterIE(InfoExtractor):
_VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*'
_TEST = { _TESTS = [{
'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location', 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location',
'md5': 'c81addca81327ffa66c642b5d8b08cab', 'md5': 'c81addca81327ffa66c642b5d8b08cab',
'info_dict': { 'info_dict': {
@ -18,22 +18,45 @@ class KickStarterIE(InfoExtractor):
'description': 'A unique motocross documentary that examines the ' 'description': 'A unique motocross documentary that examines the '
'life and mind of one of sports most elite athletes: Josh Grant.', 'life and mind of one of sports most elite athletes: Josh Grant.',
}, },
} }, {
'note': 'Embedded video (not using the native kickstarter video service)',
'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178',
'playlist': [
{
'info_dict': {
'id': '78704821',
'ext': 'mp4',
'uploader_id': 'pebble',
'uploader': 'Pebble Technology',
'title': 'Pebble iOS Notifications',
}
}
],
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = re.match(self._VALID_URL, url)
video_id = m.group('id') video_id = m.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(r'data-video-url="(.*?)"', title = self._html_search_regex(
webpage, 'video URL') r'<title>\s*(.*?)(?:\s*&mdash; Kickstarter)?\s*</title>',
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
webpage, 'title').rpartition('— Kickstarter')[0].strip() video_url = self._search_regex(
r'data-video-url="(.*?)"',
webpage, 'video URL', default=None)
if video_url is None: # No native kickstarter, look for embedded videos
return {
'_type': 'url_transparent',
'ie_key': 'Generic',
'url': url,
'title': title,
}
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': video_title, 'title': title,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
} }

View File

@ -28,11 +28,13 @@ class LivestreamIE(InfoExtractor):
} }
def _extract_video_info(self, video_data): def _extract_video_info(self, video_data):
video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') video_url = (
video_data.get('progressive_url_hd') or
video_data.get('progressive_url')
)
return { return {
'id': compat_str(video_data['id']), 'id': compat_str(video_data['id']),
'url': video_url, 'url': video_url,
'ext': 'mp4',
'title': video_data['caption'], 'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'], 'thumbnail': video_data['thumbnail_url'],
'upload_date': video_data['updated_at'].replace('-', '')[:8], 'upload_date': video_data['updated_at'].replace('-', '')[:8],
@ -50,7 +52,8 @@ def _real_extract(self, url):
r'window.config = ({.*?});', webpage, 'window config') r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event'] info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data']) videos = [self._extract_video_info(video_data['data'])
for video_data in info['feed']['data'] if video_data['type'] == 'video'] for video_data in info['feed']['data']
if video_data['type'] == 'video']
return self.playlist_result(videos, info['id'], info['full_name']) return self.playlist_result(videos, info['id'], info['full_name'])
else: else:
og_video = self._og_search_video_url(webpage, 'player url') og_video = self._og_search_video_url(webpage, 'player url')

View File

@ -85,11 +85,25 @@ def _real_extract(self, url):
flags=re.MULTILINE) flags=re.MULTILINE)
bootstrap = json.loads(bootstrap_json) bootstrap = json.loads(bootstrap_json)
info = bootstrap['results'][0]['video'] info = bootstrap['results'][0]['video']
playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI'
mpxid = info['mpxId'] mpxid = info['mpxId']
all_videos = self._download_json(playlist_url, title)['videos']
# The response contains additional videos base_urls = [
info = next(v for v in all_videos if v['mpxId'] == mpxid) info['fallbackPlaylistUrl'],
info['associatedPlaylistUrl'],
]
for base_url in base_urls:
playlist_url = base_url + '?form=MPXNBCNewsAPI'
all_videos = self._download_json(playlist_url, title)['videos']
try:
info = next(v for v in all_videos if v['mpxId'] == mpxid)
break
except StopIteration:
continue
if info is None:
raise ExtractorError('Could not find video in playlists')
return { return {
'_type': 'url', '_type': 'url',

View File

@ -32,7 +32,7 @@ def _real_extract(self, url):
'http://e.omroep.nl/metadata/aflevering/%s' % video_id, 'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id, video_id,
# We have to remove the javascript callback # We have to remove the javascript callback
transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
) )
token_page = self._download_webpage( token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js', 'http://ida.omroep.nl/npoplayer/i.js',

View File

@ -35,9 +35,7 @@ def _real_extract(self, url):
r'<h1 class="videoTitle[^"]*">(.+?)</h1>', r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title') webpage, u'title')
video_thumbnail = self._html_search_regex( video_thumbnail = self._og_search_thumbnail(webpage)
r'playerInnerHTML.+?<img\s+src="(.+?)"',
webpage, u'thumbnail', fatal=False)
# No self-labeling, but they describe themselves as # No self-labeling, but they describe themselves as
# "Home of Videos Porno" # "Home of Videos Porno"

View File

@ -30,7 +30,7 @@ def _real_extract(self, url):
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex( data = json.loads(self._html_search_regex(
r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data'] r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url') video_url = data.get('downloadUrl') or data.get('url')

View File

@ -17,7 +17,7 @@ class RTVEALaCartaIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
'info_dict': { 'info_dict': {
'id': '2491869', 'id': '2491869',
'ext': 'mp4', 'ext': 'mp4',

View File

@ -0,0 +1,119 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class SapoIE(InfoExtractor):
IE_DESC = 'SAPO Vídeos'
_VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
_TESTS = [
{
'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
'md5': '79ee523f6ecb9233ac25075dee0eda83',
'note': 'SD video',
'info_dict': {
'id': 'UBz95kOtiWYUMTA5Ghfi',
'ext': 'mp4',
'title': 'Benfica - Marcas na Hitória',
'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
'duration': 264,
'uploader': 'tiago_1988',
'upload_date': '20080229',
'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
},
},
{
'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
'md5': '90a2f283cfb49193fe06e861613a72aa',
'note': 'HD video',
'info_dict': {
'id': 'IyusNAZ791ZdoCY5H5IF',
'ext': 'mp4',
'title': 'Codebits VII - Report',
'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
'duration': 144,
'uploader': 'codebits',
'upload_date': '20140427',
'categories': ['codebits', 'codebits2014'],
},
},
{
'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
'note': 'v2 video',
'info_dict': {
'id': 'yLqjzPtbTimsn2wWBKHz',
'ext': 'mp4',
'title': 'Hipnose Condicionativa 4',
'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
'duration': 692,
'uploader': 'sapozen',
'upload_date': '20090609',
'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
item = self._download_xml(
'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
title = item.find('./title').text
description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
upload_date = unified_strdate(item.find('./pubDate').text)
view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
categories = tags.split() if tags else []
age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
formats = [{
'url': video_url,
'ext': 'mp4',
'format_id': 'sd',
'width': int(video_size[0]),
'height': int(video_size[1]),
}]
if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
formats.append({
'url': re.sub(r'/mov/1$', '/mov/39', video_url),
'ext': 'mp4',
'format_id': 'hd',
'width': 1280,
'height': 720,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
'age_limit': age_limit,
'formats': formats,
}

View File

@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
'upload_date': '20120816', 'upload_date': '20120816',
'uploader': 'Howcast', 'uploader': 'Howcast',
'uploader_id': 'Howcast', 'uploader_id': 'Howcast',
'description': 'md5:4f0aac94361a12e1ce57d74f85265175', 'description': 'md5:727900f130df3dc9a25e2721497c7910',
}, },
'params': { 'params': {
'skip_download': True 'skip_download': True

View File

@ -0,0 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
float_or_none,
str_to_int,
parse_duration,
)
class SnotrIE(InfoExtractor):
_VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
_TESTS = [{
'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
'info_dict': {
'id': '13708',
'ext': 'flv',
'title': 'Drone flying through fireworks!',
'duration': 247,
'filesize_approx': 98566144,
'description': 'A drone flying through Fourth of July Fireworks',
}
}, {
'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
'info_dict': {
'id': '530',
'ext': 'flv',
'title': 'David Letteman - George W. Bush Top 10',
'duration': 126,
'filesize_approx': 8912896,
'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
}
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
view_count = str_to_int(self._html_search_regex(
r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
webpage, 'view count', fatal=False))
duration = parse_duration(self._html_search_regex(
r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
webpage, 'duration', fatal=False))
filesize_approx = float_or_none(self._html_search_regex(
r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
webpage, 'filesize', fatal=False), invscale=1024 * 1024)
return {
'id': video_id,
'description': description,
'title': title,
'url': video_url,
'view_count': view_count,
'duration': duration,
'filesize_approx': filesize_approx,
}

View File

@ -0,0 +1,78 @@
# coding: utf-8
from __future__ import unicode_literals
from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
)
import re
from .common import InfoExtractor
class SockshareIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P<id>[0-9A-Za-z]+)'
_FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.</div>'
_TEST = {
'url': 'http://www.sockshare.com/file/437BE28B89D799D7',
'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd',
'info_dict': {
'id': '437BE28B89D799D7',
'title': 'big_buck_bunny_720p_surround.avi',
'ext': 'avi',
'thumbnail': 're:^http://.*\.jpg$',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
url = 'http://sockshare.com/file/%s' % video_id
webpage = self._download_webpage(url, video_id)
if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
raise ExtractorError('Video %s does not exist' % video_id,
expected=True)
confirm_hash = self._html_search_regex(r'''(?x)<input\s+
type="hidden"\s+
value="([^"]*)"\s+
name="hash"
''', webpage, 'hash')
fields = {
"hash": confirm_hash,
"confirm": "Continue as Free User"
}
post = compat_urllib_parse.urlencode(fields)
req = compat_urllib_request.Request(url, post)
# Apparently, this header is required for confirmation to work.
req.add_header('Host', 'www.sockshare.com')
req.add_header('Content-type', 'application/x-www-form-urlencoded')
webpage = self._download_webpage(
req, video_id, 'Downloading video page')
video_url = self._html_search_regex(
r'<a href="([^"]*)".+class="download_file_link"',
webpage, 'file url')
video_url = "http://www.sockshare.com" + video_url
title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title')
thumbnail = self._html_search_regex(
r'<img\s+src="([^"]*)".+?name="bg"',
webpage, 'thumbnail')
formats = [{
'format_id': 'sd',
'url': video_url,
}]
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20140329', 'upload_date': '20140329',
'title': 'FRONTIERS - Final Greenlight Trailer', 'title': 'FRONTIERS - Final Greenlight Trailer',
'description': 'md5:6df4fe8dd494ae811869672b0767e025', 'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
'uploader': 'AAD Productions', 'uploader': 'AAD Productions',
'uploader_id': 'AtomicAgeDogGames', 'uploader_id': 'AtomicAgeDogGames',
} }

View File

@ -19,16 +19,6 @@ class TagesschauIE(InfoExtractor):
'description': 'md5:69da3c61275b426426d711bde96463ab', 'description': 'md5:69da3c61275b426426d711bde96463ab',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
}, {
'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
'md5': '66652566900963a3f962333579eeffcf',
'info_dict': {
'id': '5964',
'ext': 'mp4',
'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
'thumbnail': 're:http://.*\.jpg',
},
}] }]
_FORMATS = { _FORMATS = {

View File

@ -62,7 +62,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('title', webpage, 'title') title = self._html_search_meta('title', webpage, 'title', fatal=True)
TITLE_SUFFIX = ' - TeacherTube' TITLE_SUFFIX = ' - TeacherTube'
if title.endswith(TITLE_SUFFIX): if title.endswith(TITLE_SUFFIX):
title = title[:-len(TITLE_SUFFIX)].strip() title = title[:-len(TITLE_SUFFIX)].strip()
@ -101,7 +101,11 @@ class TeacherTubeUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
_MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">' _MEDIA_RE = r'''(?sx)
class="?sidebar_thumb_time"?>[0-9:]+</div>
\s*
<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
'''
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -111,14 +115,12 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, user_id) webpage = self._download_webpage(url, user_id)
urls.extend(re.findall(self._MEDIA_RE, webpage)) urls.extend(re.findall(self._MEDIA_RE, webpage))
pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
for p in pages: for p in pages:
more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
urls.extend(re.findall(self._MEDIA_RE, webpage)) video_urls = re.findall(self._MEDIA_RE, webpage)
urls.extend(video_urls)
entries = []
for url in urls:
entries.append(self.url_result(url, 'TeacherTube'))
entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
return self.playlist_result(entries, user_id) return self.playlist_result(entries, user_id)

View File

@ -1,8 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor

View File

@ -98,7 +98,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
'info_dict': { 'info_dict': {
'id': '54469442', 'id': '54469442',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
'uploader': 'The BLN & Business of Software', 'uploader': 'The BLN & Business of Software',
'uploader_id': 'theblnbusinessofsoftware', 'uploader_id': 'theblnbusinessofsoftware',
'duration': 3610, 'duration': 3610,

View File

@ -10,7 +10,7 @@
class VodlockerIE(InfoExtractor): class VodlockerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
_TESTS = [{ _TESTS = [{
'url': 'http://vodlocker.com/e8wvyzz4sl42', 'url': 'http://vodlocker.com/e8wvyzz4sl42',

View File

@ -55,14 +55,14 @@ class WDRIE(InfoExtractor):
}, },
}, },
{ {
'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
'md5': '24e83813e832badb0a8d7d1ef9ef0691', 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
'info_dict': { 'info_dict': {
'id': 'mdb-463528', 'id': 'mdb-478135',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Süpersong: Soul Bossa Nova', 'title': 'Flavia Coelho: Amar é Amar',
'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
'upload_date': '20140630', 'upload_date': '20140717',
}, },
}, },
] ]

View File

@ -1,19 +1,17 @@
# coding: utf-8 # coding: utf-8
import collections
import errno import errno
import io import io
import itertools import itertools
import json import json
import os.path import os.path
import re import re
import struct
import traceback import traceback
import zlib
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..utils import ( from ..utils import (
compat_chr, compat_chr,
compat_parse_qs, compat_parse_qs,
@ -347,8 +345,9 @@ def report_rtmp_download(self):
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url, slen): def _extract_signature_function(self, video_id, player_url, slen):
id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', id_m = re.match(
player_url) r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
player_url)
player_type = id_m.group('ext') player_type = id_m.group('ext')
player_id = id_m.group('id') player_id = id_m.group('id')
@ -449,417 +448,10 @@ def _parse_sig_js(self, jscode):
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _parse_sig_swf(self, file_contents): def _parse_sig_swf(self, file_contents):
if file_contents[1:3] != b'WS': swfi = SWFInterpreter(file_contents)
raise ExtractorError(
u'Not an SWF file; header is %r' % file_contents[:3])
if file_contents[:1] == b'C':
content = zlib.decompress(file_contents[8:])
else:
raise NotImplementedError(u'Unsupported compression format %r' %
file_contents[:1])
def extract_tags(content):
pos = 0
while pos < len(content):
header16 = struct.unpack('<H', content[pos:pos+2])[0]
pos += 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f:
tag_len = struct.unpack('<I', content[pos:pos+4])[0]
pos += 4
assert pos+tag_len <= len(content)
yield (tag_code, content[pos:pos+tag_len])
pos += tag_len
code_tag = next(tag
for tag_code, tag in extract_tags(content)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
def read_int(reader=None):
if reader is None:
reader = code_reader
res = 0
shift = 0
for _ in range(5):
buf = reader.read(1)
assert len(buf) == 1
b = struct.unpack('<B', buf)[0]
res = res | ((b & 0x7f) << shift)
if b & 0x80 == 0:
break
shift += 7
return res
def u30(reader=None):
res = read_int(reader)
assert res & 0xf0000000 == 0
return res
u32 = read_int
def s32(reader=None):
v = read_int(reader)
if v & 0x80000000 != 0:
v = - ((v ^ 0xffffffff) + 1)
return v
def read_string(reader=None):
if reader is None:
reader = code_reader
slen = u30(reader)
resb = reader.read(slen)
assert len(resb) == slen
return resb.decode('utf-8')
def read_bytes(count, reader=None):
if reader is None:
reader = code_reader
resb = reader.read(count)
assert len(resb) == count
return resb
def read_byte(reader=None):
resb = read_bytes(1, reader=reader)
res = struct.unpack('<B', resb)[0]
return res
# minor_version + major_version
read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
s32()
uint_count = u30()
for _c in range(1, uint_count):
u32()
double_count = u30()
read_bytes((double_count-1) * 8)
string_count = u30()
constant_strings = [u'']
for _c in range(1, string_count):
s = read_string()
constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
read_bytes(1) # kind
u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
0x0d: 2, # QNameA
0x0f: 1, # RTQName
0x10: 1, # RTQNameA
0x11: 0, # RTQNameL
0x12: 0, # RTQNameLA
0x09: 2, # Multiname
0x0e: 2, # MultinameA
0x1b: 1, # MultinameL
0x1c: 1, # MultinameLA
}
multinames = [u'']
for _c in range(1, multiname_count):
kind = u30()
assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
if kind == 0x07:
u30() # namespace_idx
name_idx = u30()
multinames.append(constant_strings[name_idx])
else:
multinames.append('[MULTINAME kind: %d]' % kind)
for _c2 in range(MULTINAME_SIZES[kind]):
u30()
# Methods
method_count = u30()
MethodInfo = collections.namedtuple(
'MethodInfo',
['NEED_ARGUMENTS', 'NEED_REST'])
method_infos = []
for method_id in range(method_count):
param_count = u30()
u30() # return type
for _ in range(param_count):
u30() # param type
u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
u30() # val
read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
u30() # name
item_count = u30()
for _c2 in range(item_count):
u30() # key
u30() # value
def parse_traits_info():
trait_name_idx = u30()
kind_full = read_byte()
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
u30() # Slot id
u30() # type_name_idx
vindex = u30()
if vindex != 0:
read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
u30() # disp_id
method_idx = u30()
methods[multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
u30() # slot_id
u30() # classi
elif kind == 0x05: # Function
u30() # slot_id
function_idx = u30()
methods[function_idx] = multinames[trait_name_idx]
else:
raise ExtractorError(u'Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
u30() # metadata index
return methods
# Classes
TARGET_CLASSNAME = u'SignatureDecipher' TARGET_CLASSNAME = u'SignatureDecipher'
searched_idx = multinames.index(TARGET_CLASSNAME) searched_class = swfi.extract_class(TARGET_CLASSNAME)
searched_class_id = None initial_function = swfi.extract_function(searched_class, u'decipher')
class_count = u30()
for class_id in range(class_count):
name_idx = u30()
if name_idx == searched_idx:
# We found the class we're looking for!
searched_class_id = class_id
u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
u30()
u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
if searched_class_id is None:
raise ExtractorError(u'Target class %r not found' %
TARGET_CLASSNAME)
method_names = {}
method_idxs = {}
for class_id in range(class_count):
u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
if class_id == searched_class_id:
method_names.update(trait_methods.items())
method_idxs.update(dict(
(idx, name)
for name, idx in trait_methods.items()))
# Scripts
script_count = u30()
for _c in range(script_count):
u30() # init
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
# Method bodies
method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
methods = {}
for _c in range(method_body_count):
method_idx = u30()
u30() # max_stack
local_count = u30()
u30() # init_scope_depth
u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
if method_idx in method_idxs:
m = Method(code, local_count)
methods[method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
u30() # from
u30() # to
u30() # target
u30() # exc_type
u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
method_pyfunctions = {}
def extract_function(func_name):
if func_name in method_pyfunctions:
return method_pyfunctions[func_name]
if func_name not in methods:
raise ExtractorError(u'Cannot find function %r' % func_name)
m = methods[func_name]
def resfunc(args):
registers = ['(this)'] + list(args) + [None] * m.local_count
stack = []
coder = io.BytesIO(m.code)
while True:
opcode = struct.unpack('!B', coder.read(1))[0]
if opcode == 36: # pushbyte
v = struct.unpack('!B', coder.read(1))[0]
stack.append(v)
elif opcode == 44: # pushstring
idx = u30(coder)
stack.append(constant_strings[idx])
elif opcode == 48: # pushscope
# We don't implement the scope register, so we'll just
# ignore the popped value
stack.pop()
elif opcode == 70: # callproperty
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'split':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, compat_str)
if args[0] == u'':
res = list(obj)
else:
res = obj.split(args[0])
stack.append(res)
elif mname == u'slice':
assert len(args) == 1
assert isinstance(args[0], int)
assert isinstance(obj, list)
res = obj[args[0]:]
stack.append(res)
elif mname == u'join':
assert len(args) == 1
assert isinstance(args[0], compat_str)
assert isinstance(obj, list)
res = args[0].join(obj)
stack.append(res)
elif mname in method_pyfunctions:
stack.append(method_pyfunctions[mname](args))
else:
raise NotImplementedError(
u'Unsupported property %r on %r'
% (mname, obj))
elif opcode == 72: # returnvalue
res = stack.pop()
return res
elif opcode == 79: # callpropvoid
index = u30(coder)
mname = multinames[index]
arg_count = u30(coder)
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == u'reverse':
assert isinstance(obj, list)
obj.reverse()
else:
raise NotImplementedError(
u'Unsupported (void) property %r on %r'
% (mname, obj))
elif opcode == 93: # findpropstrict
index = u30(coder)
mname = multinames[index]
res = extract_function(mname)
stack.append(res)
elif opcode == 97: # setproperty
index = u30(coder)
value = stack.pop()
idx = stack.pop()
obj = stack.pop()
assert isinstance(obj, list)
assert isinstance(idx, int)
obj[idx] = value
elif opcode == 98: # getlocal
index = u30(coder)
stack.append(registers[index])
elif opcode == 99: # setlocal
index = u30(coder)
value = stack.pop()
registers[index] = value
elif opcode == 102: # getproperty
index = u30(coder)
pname = multinames[index]
if pname == u'length':
obj = stack.pop()
assert isinstance(obj, list)
stack.append(len(obj))
else: # Assume attribute access
idx = stack.pop()
assert isinstance(idx, int)
obj = stack.pop()
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 128: # coerce
u30(coder)
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 164: # modulo
value2 = stack.pop()
value1 = stack.pop()
res = value1 % value2
stack.append(res)
elif opcode == 208: # getlocal_0
stack.append(registers[0])
elif opcode == 209: # getlocal_1
stack.append(registers[1])
elif opcode == 210: # getlocal_2
stack.append(registers[2])
elif opcode == 211: # getlocal_3
stack.append(registers[3])
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
registers[3] = stack.pop()
else:
raise NotImplementedError(
u'Unsupported opcode %d' % opcode)
method_pyfunctions[func_name] = resfunc
return resfunc
initial_function = extract_function(u'decipher')
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False): def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@ -1014,14 +606,11 @@ def _real_extract(self, url):
age_gate = True age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id} # We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube # this can be viewed without login into Youtube
data = compat_urllib_parse.urlencode({'video_id': video_id, data = compat_urllib_parse.urlencode({
'el': 'player_embedded', 'video_id': video_id,
'gl': 'US', 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'hl': 'en', 'sts':'16268',
'eurl': 'https://youtube.googleapis.com/v/' + video_id, })
'asv': 3,
'sts':'1588',
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id, video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False, note=False,
@ -1220,30 +809,37 @@ def _map_to_format_list(urlmap):
url += '&signature=' + url_data['sig'][0] url += '&signature=' + url_data['sig'][0]
elif 's' in url_data: elif 's' in url_data:
encrypted_sig = url_data['s'][0] encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
if age_gate:
if player_url is None:
player_version = 'unknown'
else:
player_version = self._search_regex(
r'-(.+)\.swf$', player_url,
u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
if not age_gate: if not age_gate:
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")', r'"assets":.+?"js":\s*("[^"]+")',
video_webpage, u'JS player URL') video_webpage, u'JS player URL')
player_url = json.loads(jsplayer_url_json) player_url = json.loads(jsplayer_url_json)
if player_url is None:
player_url_json = self._search_regex(
r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
video_webpage, u'age gate player URL')
player_url = json.loads(player_url_json)
if self._downloader.params.get('verbose'):
if player_url is None:
player_version = 'unknown'
player_desc = 'unknown'
else:
if player_url.endswith('swf'):
player_version = self._search_regex(
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
signature = self._decrypt_signature( signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate) encrypted_sig, video_id, player_url, age_gate)

609
youtube_dl/swfinterp.py Normal file
View File

@ -0,0 +1,609 @@
from __future__ import unicode_literals
import collections
import io
import zlib
from .utils import (
compat_str,
ExtractorError,
struct_unpack,
)
def _extract_tags(file_contents):
if file_contents[1:3] != b'WS':
raise ExtractorError(
'Not an SWF file; header is %r' % file_contents[:3])
if file_contents[:1] == b'C':
content = zlib.decompress(file_contents[8:])
else:
raise NotImplementedError(
'Unsupported compression format %r' %
file_contents[:1])
# Determine number of bits in framesize rectangle
framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
framesize_len = (5 + 4 * framesize_nbits + 7) // 8
pos = framesize_len + 2 + 2
while pos < len(content):
header16 = struct_unpack('<H', content[pos:pos + 2])[0]
pos += 2
tag_code = header16 >> 6
tag_len = header16 & 0x3f
if tag_len == 0x3f:
tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
pos += 4
assert pos + tag_len <= len(content), \
('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
% (tag_code, pos, tag_len, len(content)))
yield (tag_code, content[pos:pos + tag_len])
pos += tag_len
class _AVMClass_Object(object):
def __init__(self, avm_class):
self.avm_class = avm_class
def __repr__(self):
return '%s#%x' % (self.avm_class.name, id(self))
class _ScopeDict(dict):
def __init__(self, avm_class):
super(_ScopeDict, self).__init__()
self.avm_class = avm_class
def __repr__(self):
return '%s__Scope(%s)' % (
self.avm_class.name,
super(_ScopeDict, self).__repr__())
class _AVMClass(object):
def __init__(self, name_idx, name):
self.name_idx = name_idx
self.name = name
self.method_names = {}
self.method_idxs = {}
self.methods = {}
self.method_pyfunctions = {}
self.variables = _ScopeDict(self)
def make_object(self):
return _AVMClass_Object(self)
def __repr__(self):
return '_AVMClass(%s)' % (self.name)
def register_methods(self, methods):
self.method_names.update(methods.items())
self.method_idxs.update(dict(
(idx, name)
for name, idx in methods.items()))
class _Multiname(object):
def __init__(self, kind):
self.kind = kind
def __repr__(self):
return '[MULTINAME kind: 0x%x]' % self.kind
def _read_int(reader):
res = 0
shift = 0
for _ in range(5):
buf = reader.read(1)
assert len(buf) == 1
b = struct_unpack('<B', buf)[0]
res = res | ((b & 0x7f) << shift)
if b & 0x80 == 0:
break
shift += 7
return res
def _u30(reader):
res = _read_int(reader)
assert res & 0xf0000000 == 0
return res
_u32 = _read_int
def _s32(reader):
v = _read_int(reader)
if v & 0x80000000 != 0:
v = - ((v ^ 0xffffffff) + 1)
return v
def _s24(reader):
bs = reader.read(3)
assert len(bs) == 3
last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
return struct_unpack('<i', bs + last_byte)[0]
def _read_string(reader):
slen = _u30(reader)
resb = reader.read(slen)
assert len(resb) == slen
return resb.decode('utf-8')
def _read_bytes(count, reader):
assert count >= 0
resb = reader.read(count)
assert len(resb) == count
return resb
def _read_byte(reader):
resb = _read_bytes(1, reader=reader)
res = struct_unpack('<B', resb)[0]
return res
class SWFInterpreter(object):
def __init__(self, file_contents):
code_tag = next(tag
for tag_code, tag in _extract_tags(file_contents)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
# Define a couple convenience methods
u30 = lambda *args: _u30(*args, reader=code_reader)
s32 = lambda *args: _s32(*args, reader=code_reader)
u32 = lambda *args: _u32(*args, reader=code_reader)
read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
read_byte = lambda *args: _read_byte(*args, reader=code_reader)
# minor_version + major_version
read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
s32()
uint_count = u30()
for _c in range(1, uint_count):
u32()
double_count = u30()
read_bytes(max(0, (double_count - 1)) * 8)
string_count = u30()
self.constant_strings = ['']
for _c in range(1, string_count):
s = _read_string(code_reader)
self.constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
read_bytes(1) # kind
u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
0x0d: 2, # QNameA
0x0f: 1, # RTQName
0x10: 1, # RTQNameA
0x11: 0, # RTQNameL
0x12: 0, # RTQNameLA
0x09: 2, # Multiname
0x0e: 2, # MultinameA
0x1b: 1, # MultinameL
0x1c: 1, # MultinameLA
}
self.multinames = ['']
for _c in range(1, multiname_count):
kind = u30()
assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
if kind == 0x07:
u30() # namespace_idx
name_idx = u30()
self.multinames.append(self.constant_strings[name_idx])
else:
self.multinames.append(_Multiname(kind))
for _c2 in range(MULTINAME_SIZES[kind]):
u30()
# Methods
method_count = u30()
MethodInfo = collections.namedtuple(
'MethodInfo',
['NEED_ARGUMENTS', 'NEED_REST'])
method_infos = []
for method_id in range(method_count):
param_count = u30()
u30() # return type
for _ in range(param_count):
u30() # param type
u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
u30() # val
read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
u30() # name
item_count = u30()
for _c2 in range(item_count):
u30() # key
u30() # value
def parse_traits_info():
trait_name_idx = u30()
kind_full = read_byte()
kind = kind_full & 0x0f
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
u30() # Slot id
u30() # type_name_idx
vindex = u30()
if vindex != 0:
read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
u30() # disp_id
method_idx = u30()
methods[self.multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
u30() # slot_id
u30() # classi
elif kind == 0x05: # Function
u30() # slot_id
function_idx = u30()
methods[function_idx] = self.multinames[trait_name_idx]
else:
raise ExtractorError('Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
u30() # metadata index
return methods
# Classes
class_count = u30()
classes = []
for class_id in range(class_count):
name_idx = u30()
cname = self.multinames[name_idx]
avm_class = _AVMClass(name_idx, cname)
classes.append(avm_class)
u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
u30()
u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
avm_class.register_methods(trait_methods)
assert len(classes) == class_count
self._classes_by_name = dict((c.name, c) for c in classes)
for avm_class in classes:
u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
avm_class.register_methods(trait_methods)
# Scripts
script_count = u30()
for _c in range(script_count):
u30() # init
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
# Method bodies
method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
for _c in range(method_body_count):
method_idx = u30()
u30() # max_stack
local_count = u30()
u30() # init_scope_depth
u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
for avm_class in classes:
if method_idx in avm_class.method_idxs:
m = Method(code, local_count)
avm_class.methods[avm_class.method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
u30() # from
u30() # to
u30() # target
u30() # exc_type
u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
def extract_class(self, class_name):
try:
return self._classes_by_name[class_name]
except KeyError:
raise ExtractorError('Class %r not found' % class_name)
def extract_function(self, avm_class, func_name):
if func_name in avm_class.method_pyfunctions:
return avm_class.method_pyfunctions[func_name]
if func_name in self._classes_by_name:
return self._classes_by_name[func_name].make_object()
if func_name not in avm_class.methods:
raise ExtractorError('Cannot find function %s.%s' % (
avm_class.name, func_name))
m = avm_class.methods[func_name]
def resfunc(args):
# Helper functions
coder = io.BytesIO(m.code)
s24 = lambda: _s24(coder)
u30 = lambda: _u30(coder)
registers = [avm_class.variables] + list(args) + [None] * m.local_count
stack = []
scopes = collections.deque([
self._classes_by_name, avm_class.variables])
while True:
opcode = _read_byte(coder)
if opcode == 17: # iftrue
offset = s24()
value = stack.pop()
if value:
coder.seek(coder.tell() + offset)
elif opcode == 18: # iffalse
offset = s24()
value = stack.pop()
if not value:
coder.seek(coder.tell() + offset)
elif opcode == 36: # pushbyte
v = _read_byte(coder)
stack.append(v)
elif opcode == 42: # dup
value = stack[-1]
stack.append(value)
elif opcode == 44: # pushstring
idx = u30()
stack.append(self.constant_strings[idx])
elif opcode == 48: # pushscope
new_scope = stack.pop()
scopes.append(new_scope)
elif opcode == 66: # construct
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
res = obj.avm_class.make_object()
stack.append(res)
elif opcode == 70: # callproperty
index = u30()
mname = self.multinames[index]
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if isinstance(obj, _AVMClass_Object):
func = self.extract_function(obj.avm_class, mname)
res = func(args)
stack.append(res)
continue
elif isinstance(obj, _ScopeDict):
if mname in obj.avm_class.method_names:
func = self.extract_function(obj.avm_class, mname)
res = func(args)
else:
res = obj[mname]
stack.append(res)
continue
elif isinstance(obj, compat_str):
if mname == 'split':
assert len(args) == 1
assert isinstance(args[0], compat_str)
if args[0] == '':
res = list(obj)
else:
res = obj.split(args[0])
stack.append(res)
continue
elif isinstance(obj, list):
if mname == 'slice':
assert len(args) == 1
assert isinstance(args[0], int)
res = obj[args[0]:]
stack.append(res)
continue
elif mname == 'join':
assert len(args) == 1
assert isinstance(args[0], compat_str)
res = args[0].join(obj)
stack.append(res)
continue
raise NotImplementedError(
'Unsupported property %r on %r'
% (mname, obj))
elif opcode == 72: # returnvalue
res = stack.pop()
return res
elif opcode == 74: # constructproperty
index = u30()
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
mname = self.multinames[index]
assert isinstance(obj, _AVMClass)
# We do not actually call the constructor for now;
# we just pretend it does nothing
stack.append(obj.make_object())
elif opcode == 79: # callpropvoid
index = u30()
mname = self.multinames[index]
arg_count = u30()
args = list(reversed(
[stack.pop() for _ in range(arg_count)]))
obj = stack.pop()
if mname == 'reverse':
assert isinstance(obj, list)
obj.reverse()
else:
raise NotImplementedError(
'Unsupported (void) property %r on %r'
% (mname, obj))
elif opcode == 86: # newarray
arg_count = u30()
arr = []
for i in range(arg_count):
arr.append(stack.pop())
arr = arr[::-1]
stack.append(arr)
elif opcode == 93: # findpropstrict
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
res = s
break
else:
res = scopes[0]
stack.append(res[mname])
elif opcode == 94: # findproperty
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
res = s
break
else:
res = avm_class.variables
stack.append(res)
elif opcode == 96: # getlex
index = u30()
mname = self.multinames[index]
for s in reversed(scopes):
if mname in s:
scope = s
break
else:
scope = avm_class.variables
# I cannot find where static variables are initialized
# so let's just return None
res = scope.get(mname)
stack.append(res)
elif opcode == 97: # setproperty
index = u30()
value = stack.pop()
idx = self.multinames[index]
if isinstance(idx, _Multiname):
idx = stack.pop()
obj = stack.pop()
obj[idx] = value
elif opcode == 98: # getlocal
index = u30()
stack.append(registers[index])
elif opcode == 99: # setlocal
index = u30()
value = stack.pop()
registers[index] = value
elif opcode == 102: # getproperty
index = u30()
pname = self.multinames[index]
if pname == 'length':
obj = stack.pop()
assert isinstance(obj, list)
stack.append(len(obj))
else: # Assume attribute access
idx = stack.pop()
assert isinstance(idx, int)
obj = stack.pop()
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 115: # convert_
value = stack.pop()
intvalue = int(value)
stack.append(intvalue)
elif opcode == 128: # coerce
u30()
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 160: # add
value2 = stack.pop()
value1 = stack.pop()
res = value1 + value2
stack.append(res)
elif opcode == 161: # subtract
value2 = stack.pop()
value1 = stack.pop()
res = value1 - value2
stack.append(res)
elif opcode == 164: # modulo
value2 = stack.pop()
value1 = stack.pop()
res = value1 % value2
stack.append(res)
elif opcode == 175: # greaterequals
value2 = stack.pop()
value1 = stack.pop()
result = value1 >= value2
stack.append(result)
elif opcode == 208: # getlocal_0
stack.append(registers[0])
elif opcode == 209: # getlocal_1
stack.append(registers[1])
elif opcode == 210: # getlocal_2
stack.append(registers[2])
elif opcode == 211: # getlocal_3
stack.append(registers[3])
elif opcode == 212: # setlocal_0
registers[0] = stack.pop()
elif opcode == 213: # setlocal_1
registers[1] = stack.pop()
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
registers[3] = stack.pop()
else:
raise NotImplementedError(
'Unsupported opcode %d' % opcode)
avm_class.method_pyfunctions[func_name] = resfunc
return resfunc

View File

@ -91,11 +91,9 @@
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import unquote as compat_urllib_parse_unquote
except ImportError: # Python 2 except ImportError:
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
# Python 2's version is apparently totally broken
def _unquote(string, encoding='utf-8', errors='replace'):
if string == '': if string == '':
return string return string
res = string.split('%') res = string.split('%')
@ -130,6 +128,13 @@ def _unquote(string, encoding='utf-8', errors='replace'):
string += pct_sequence.decode(encoding, errors) string += pct_sequence.decode(encoding, errors)
return string return string
try:
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
encoding='utf-8', errors='replace'): encoding='utf-8', errors='replace'):
qs, _coerce_result = qs, unicode qs, _coerce_result = qs, unicode
@ -149,10 +154,12 @@ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
continue continue
if len(nv[1]) or keep_blank_values: if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ') name = nv[0].replace('+', ' ')
name = _unquote(name, encoding=encoding, errors=errors) name = compat_urllib_parse_unquote(
name, encoding=encoding, errors=errors)
name = _coerce_result(name) name = _coerce_result(name)
value = nv[1].replace('+', ' ') value = nv[1].replace('+', ' ')
value = _unquote(value, encoding=encoding, errors=errors) value = compat_urllib_parse_unquote(
value, encoding=encoding, errors=errors)
value = _coerce_result(value) value = _coerce_result(value)
r.append((name, value)) r.append((name, value))
return r return r
@ -1193,11 +1200,6 @@ def format_bytes(bytes):
return u'%.2f%s' % (converted, suffix) return u'%.2f%s' % (converted, suffix)
def str_to_int(int_str):
int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str)
def get_term_width(): def get_term_width():
columns = os.environ.get('COLUMNS', None) columns = os.environ.get('COLUMNS', None)
if columns: if columns:
@ -1265,15 +1267,22 @@ def get_method(self):
return "HEAD" return "HEAD"
def int_or_none(v, scale=1, default=None, get_attr=None): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
if get_attr: if get_attr:
if v is not None: if v is not None:
v = getattr(v, get_attr, None) v = getattr(v, get_attr, None)
return default if v is None else (int(v) // scale) return default if v is None else (int(v) * invscale // scale)
def float_or_none(v, scale=1, default=None): def str_to_int(int_str):
return default if v is None else (float(v) / scale) if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str)
def float_or_none(v, scale=1, invscale=1, default=None):
return default if v is None else (float(v) * invscale / scale)
def parse_duration(s): def parse_duration(s):

View File

@ -1,2 +1,2 @@
__version__ = '2014.07.15' __version__ = '2014.07.22'