Merge pull request #842 - myvideo, rtmp support

@dersphere code, from dersphere/plugin.video.myvideo_de.git
rewritten by @mc2avr
released in the Public Domain by the author
ref: https://github.com/rg3/youtube-dl/pull/842
This commit is contained in:
Filippo Valsorda 2013-05-20 09:49:58 +02:00
commit 5b68ea215b
2 changed files with 145 additions and 16 deletions

View File

@ -748,7 +748,7 @@ def post_process(self, filename, ie_info):
except (IOError, OSError): except (IOError, OSError):
self.report_warning(u'Unable to remove downloaded video file') self.report_warning(u'Unable to remove downloaded video file')
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path): def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@ -769,6 +769,8 @@ def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path
basic_args += ['--pageUrl', page_url] basic_args += ['--pageUrl', page_url]
if play_path is not None: if play_path is not None:
basic_args += ['-y', play_path] basic_args += ['-y', play_path]
if tc_url is not None:
basic_args += ['--tcUrl', url]
args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
if self.params.get('verbose', False): if self.params.get('verbose', False):
try: try:
@ -824,7 +826,8 @@ def _do_download(self, filename, info_dict):
return self._download_with_rtmpdump(filename, url, return self._download_with_rtmpdump(filename, url,
info_dict.get('player_url', None), info_dict.get('player_url', None),
info_dict.get('page_url', None), info_dict.get('page_url', None),
info_dict.get('play_path', None)) info_dict.get('play_path', None),
info_dict.get('tc_url', None))
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
stream = None stream = None

View File

@ -16,6 +16,9 @@
import random import random
import math import math
import operator import operator
import hashlib
import binascii
import urllib
from .utils import * from .utils import *
@ -1979,30 +1982,60 @@ class MyVideoIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
IE_NAME = u'myvideo' IE_NAME = u'myvideo'
# Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
# Copyright (C) 2013 Tristan Fischer (sphere@dersphere.de) - GPLv3
def __rc4crypt(self,data, key):
x = 0
box = list(range(256))
for i in list(range(256)):
x = (x + box[i] + ord(key[i % len(key)])) % 256
box[i], box[x] = box[x], box[i]
x = 0
y = 0
out = []
for char in data:
x = (x + 1) % 256
y = (y + box[x]) % 256
box[x], box[y] = box[y], box[x]
# out.append(chr(ord(char) ^ box[(box[x] + box[y]) % 256]))
out.append(chr(char ^ box[(box[x] + box[y]) % 256]))
return ''.join(out)
def __md5(self,s):
return hashlib.md5(s).hexdigest()
def _real_extract(self,url): def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'invalid URL: %s' % url)
video_id = mobj.group(1) video_id = mobj.group(1)
GK = (
b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
b'TnpsbA0KTVRkbU1tSTRNdz09'
)
# Get video webpage # Get video webpage
webpage_url = 'http://www.myvideo.de/watch/%s' % video_id webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
webpage = self._download_webpage(webpage_url, video_id) webpage = self._download_webpage(webpage_url, video_id)
mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
if mobj is not None:
self.report_extraction(video_id) self.report_extraction(video_id)
mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/.*?\.jpg\'', video_url = mobj.group(1) + '.flv'
webpage)
if mobj is None:
raise ExtractorError(u'Unable to extract media URL')
video_url = mobj.group(1) + ('/%s.flv' % video_id)
mobj = re.search('<title>([^<]+)</title>', webpage) mobj = re.search('<title>([^<]+)</title>', webpage)
if mobj is None: if mobj is None:
raise ExtractorError(u'Unable to extract title') raise ExtractorError(u'Unable to extract title')
video_title = mobj.group(1) video_title = mobj.group(1)
mobj = re.search('[.](.+?)$', video_url)
if mobj is None:
raise ExtractorError(u'Unable to extract extention')
video_ext = mobj.group(1)
return [{ return [{
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
@ -2012,6 +2045,99 @@ def _real_extract(self,url):
'ext': u'flv', 'ext': u'flv',
}] }]
# try encxml
params = {}
encxml = ''
sec = re.search('var flashvars={(.+?)}', webpage).group(1)
for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
if not a == '_encxml':
params[a] = b
else:
encxml = compat_urllib_parse.unquote(b)
if not params.get('domain'):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
self._downloader.report_warning(u'avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
# get enc data
enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
enc_data_b = binascii.unhexlify(enc_data)
sk = self.__md5(
base64.b64decode(base64.b64decode(GK)) +
self.__md5(
str(video_id).encode('utf-8')
).encode('utf-8')
)
dec_data = self.__rc4crypt(enc_data_b, sk)
# extracting infos
self.report_extraction(video_id)
mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract rtmpurl')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1))
if 'myvideo2flash' in video_rtmpurl:
self._downloader.report_warning(u'forcing RTMPT ...')
video_rtmpurl = video_rtmpurl.replace('rtmpe://', 'rtmpt://')
# extract non rtmp videos
if (video_rtmpurl is None) or (video_rtmpurl == ''):
mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract url')
video_rtmpurl = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
mobj = re.search('source=\'(.*?)\'', dec_data)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_file = compat_urllib_parse.unquote(mobj.group(1))
# mobj = re.search('path=\'(.*?)\'', dec_data)
# if mobj is None:
# raise ExtractorError(u'unable to extract filepath')
# video_filepath = mobj.group(1)
if not video_file.endswith('f4m'):
ppath, prefix = video_file.split('.')
video_playpath = '%s:%s' % (prefix, ppath)
video_hls_playlist = ''
else:
video_playpath = ''
video_hls_playlist = (
video_filepath + video_file
).replace('.f4m', '.m3u8')
mobj = re.search('swfobject.embedSWF\(\'(.+?)\'', webpage)
if mobj is None:
raise ExtractorError(u'unable to extract swfobj')
video_swfobj = compat_urllib_parse.unquote(mobj.group(1))
mobj = re.search("<h1(?: class='globalHd')?>(.*?)</h1>", webpage)
if mobj is None:
raise ExtractorError(u'unable to extract title')
video_title = mobj.group(1)
return [{
'id': video_id,
'url': video_rtmpurl,
'tc_url': video_rtmpurl,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': u'flv',
'play_path': video_playpath,
'video_file': video_file,
# 'file_path': video_filepath,
'video_hls_playlist': video_hls_playlist,
'player_url': video_swfobj,
}]
class ComedyCentralIE(InfoExtractor): class ComedyCentralIE(InfoExtractor):
"""Information extractor for The Daily Show and Colbert Report """ """Information extractor for The Daily Show and Colbert Report """