From 5d98908b26a4bf593abd6340d34d983f8d0980df Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Tue, 17 Mar 2015 19:03:29 +0800 Subject: [PATCH] [QQMusic] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/qqmusic.py | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 youtube_dl/extractor/qqmusic.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index bbf3be41d..63bcf666f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -396,6 +396,7 @@ from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qqmusic import QQMusicIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py new file mode 100644 index 000000000..3dc637392 --- /dev/null +++ b/youtube_dl/extractor/qqmusic.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import strip_jsonp + +# guid is a random number generated in javascript, but seems a fixed number +# also works +guid = '1' + + +class QQMusicIE(InfoExtractor): + _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P[0-9A-Za-z]+)' + _TESTS = [{ + 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', + 'md5': 'bed90b6db2a7a7a7e11bc585f471f63a', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'm4a', + 'title': '可惜没如果', + 'upload_date': '20141227', + 'creator': '林俊杰', + } + }] + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download sont detail info', + errnote='Unable to get song detail info') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time').replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer') + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid) + + return { + 'id': mid, + 'url': song_url, + 'title': song_name, + 'upload_date': publish_time, + 'creator': singer, + }