[myspace] improve extraction

This commit is contained in:
remitamine 2016-03-30 21:18:07 +01:00
parent e621a344e6
commit 6b820a2376

View File

@ -2,13 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..utils import (
compat_str, ExtractorError,
int_or_none,
parse_iso8601,
) )
from ..utils import ExtractorError
class MySpaceIE(InfoExtractor): class MySpaceIE(InfoExtractor):
@ -24,6 +24,8 @@ class MySpaceIE(InfoExtractor):
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.', 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage', 'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage', 'uploader_id': 'fiveminutestothestage',
'timestamp': 1414108751,
'upload_date': '20141023',
}, },
'params': { 'params': {
# rtmp download # rtmp download
@ -64,7 +66,7 @@ class MySpaceIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Starset - First Light', 'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414', 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
'uploader': 'Jacob Soren', 'uploader': 'Yumi K',
'uploader_id': 'SorenPromotions', 'uploader_id': 'SorenPromotions',
'upload_date': '20140725', 'upload_date': '20140725',
} }
@ -78,6 +80,19 @@ def _real_extract(self, url):
player_url = self._search_regex( player_url = self._search_regex(
r'playerSwf":"([^"?]*)', webpage, 'player URL') r'playerSwf":"([^"?]*)', webpage, 'player URL')
def rtmp_format_from_stream_url(stream_url, width=None, height=None):
rtmp_url, play_path = stream_url.split(';', 1)
return {
'format_id': 'rtmp',
'url': rtmp_url,
'play_path': play_path,
'player_url': player_url,
'protocol': 'rtmp',
'ext': 'flv',
'width': width,
'height': height,
}
if mobj.group('mediatype').startswith('music/song'): if mobj.group('mediatype').startswith('music/song'):
# songs don't store any useful info in the 'context' variable # songs don't store any useful info in the 'context' variable
song_data = self._search_regex( song_data = self._search_regex(
@ -93,8 +108,8 @@ def search_data(name):
return self._search_regex( return self._search_regex(
r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
song_data, name, default='', group='data') song_data, name, default='', group='data')
streamUrl = search_data('stream-url') stream_url = search_data('stream-url')
if not streamUrl: if not stream_url:
vevo_id = search_data('vevo-id') vevo_id = search_data('vevo-id')
youtube_id = search_data('youtube-id') youtube_id = search_data('youtube-id')
if vevo_id: if vevo_id:
@ -106,36 +121,47 @@ def search_data(name):
else: else:
raise ExtractorError( raise ExtractorError(
'Found song but don\'t know how to download it') 'Found song but don\'t know how to download it')
info = { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'uploader': search_data('artist-name'), 'uploader': search_data('artist-name'),
'uploader_id': search_data('artist-username'), 'uploader_id': search_data('artist-username'),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'duration': int_or_none(search_data('duration')),
'formats': [rtmp_format_from_stream_url(stream_url)]
} }
else: else:
context = json.loads(self._search_regex( video = self._parse_json(self._search_regex(
r'context = ({.*?});', webpage, 'context')) r'context = ({.*?});', webpage, 'context'),
video = context['video'] video_id)['video']
streamUrl = video['streamUrl'] formats = []
info = { hls_stream_url = video.get('hlsStreamUrl')
'id': compat_str(video['mediaId']), if hls_stream_url:
formats.append({
'format_id': 'hls',
'url': hls_stream_url,
'protocol': 'm3u8_native',
'ext': 'mp4',
})
stream_url = video.get('streamUrl')
if stream_url:
formats.append(rtmp_format_from_stream_url(
stream_url,
int_or_none(video.get('width')),
int_or_none(video.get('height'))))
self._sort_formats(formats)
return {
'id': video_id,
'title': video['title'], 'title': video['title'],
'description': video['description'], 'description': video.get('description'),
'thumbnail': video['imageUrl'], 'thumbnail': video.get('imageUrl'),
'uploader': video['artistName'], 'uploader': video.get('artistName'),
'uploader_id': video['artistUsername'], 'uploader_id': video.get('artistUsername'),
'duration': int_or_none(video.get('duration')),
'timestamp': parse_iso8601(video.get('dateAdded')),
'formats': formats,
} }
rtmp_url, play_path = streamUrl.split(';', 1)
info.update({
'url': rtmp_url,
'play_path': play_path,
'player_url': player_url,
'ext': 'flv',
})
return info
class MySpaceAlbumIE(InfoExtractor): class MySpaceAlbumIE(InfoExtractor):
IE_NAME = 'MySpace:album' IE_NAME = 'MySpace:album'