From b281aad2dc658e3c6535579d75b42a5634487b83 Mon Sep 17 00:00:00 2001 From: steven7851 Date: Fri, 26 Aug 2016 07:32:54 +0800 Subject: [PATCH] [douyutv] Use new api use lapi for flv info, and html5 api for room info #10153 #10318 --- youtube_dl/extractor/douyutv.py | 87 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index ce69627558..33efc993ee 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,9 +3,10 @@ import hashlib import time +import uuid from .common import InfoExtractor from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring) +from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) class DouyuTVIE(InfoExtractor): @@ -21,7 +22,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -37,7 +37,6 @@ class DouyuTVIE(InfoExtractor): 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'douyu小漠', - 'uploader_id': '3769985', 'is_live': True, }, 'params': { @@ -54,7 +53,6 @@ class DouyuTVIE(InfoExtractor): 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -75,19 +73,39 @@ def _real_extract(self, url): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - config = None + room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id + room_content = self._download_webpage(room_url, video_id) + room_json = self._parse_json(room_content, video_id, fatal=False) + + room = room_json['data'] + + show_status = room.get('show_status') + # 1 = live, 2 = offline + if show_status == '2': + raise ExtractorError( + 'Live stream is offline', expected=True) + + flv_json = None # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" # Retry with different parameters - same parameters cause same errors for i in range(5): - prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( - room_id, int(time.time())) - auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() + tt = int(time.time() / 60) + did = uuid.uuid4().hex.upper() - config_page = self._download_webpage( - 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), - video_id) + # Decompile core.swf in webpage by ffdec "Search SWFs in memory" + # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one + # If API changes in the future, just use this way to update + sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + + payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} + flv_data = compat_urllib_parse_urlencode(payload) + + flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id + flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, + headers={'Content-Type': 'application/x-www-form-urlencoded'}) try: - config = self._parse_json(config_page, video_id, fatal=False) + flv_json = self._parse_json(flv_content, video_id, fatal=False) except ExtractorError: # Wait some time before retrying to get a different time() value self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' @@ -95,54 +113,35 @@ def _real_extract(self, url): continue else: break - if config is None: + if flv_json is None: raise ExtractorError('Unable to fetch API result') - data = config['data'] + flv = flv_json['data'] - error_code = config.get('error', 0) + error_code = flv_json.get('error', 0) if error_code is not 0: error_desc = 'Server reported error %i' % error_code - if isinstance(data, (compat_str, compat_basestring)): - error_desc += ': ' + data + if isinstance(flv, (compat_str, compat_basestring)): + error_desc += ': ' + flv raise ExtractorError(error_desc, expected=True) - show_status = data.get('show_status') - # 1 = live, 2 = offline - if show_status == '2': - raise ExtractorError( - 'Live stream is offline', expected=True) + base_url = flv['rtmp_url'] + live_path = flv['rtmp_live'] - base_url = data['rtmp_url'] - live_path = data['rtmp_live'] + video_url = '%s/%s' % (base_url, live_path) - title = self._live_title(unescapeHTML(data['room_name'])) - description = data.get('show_details') - thumbnail = data.get('room_src') - - uploader = data.get('nickname') - uploader_id = data.get('owner_uid') - - multi_formats = data.get('rtmp_multi_bitrate') - if not isinstance(multi_formats, dict): - multi_formats = {} - multi_formats['live'] = live_path - - formats = [{ - 'url': '%s/%s' % (base_url, format_path), - 'format_id': format_id, - 'preference': 1 if format_id == 'live' else 0, - } for format_id, format_path in multi_formats.items()] - self._sort_formats(formats) + title = self._live_title(unescapeHTML(room['room_name'])) + description = room.get('notice') + thumbnail = room.get('room_src') + uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, - 'uploader_id': uploader_id, - 'formats': formats, 'is_live': True, }