[picarto] Improve extraction (closes #6205, closes #12514, closes #15276, closes #15551)

This commit is contained in:
Sergey M․ 2018-04-16 00:31:25 +07:00
parent d6166a7602
commit a42839e548
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 116 additions and 38 deletions

View File

@ -816,8 +816,8 @@
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .picarto import ( from .picarto import (
PicartoVodIE,
PicartoIE, PicartoIE,
PicartoVodIE,
) )
from .piksel import PikselIE from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE

View File

@ -1,12 +1,21 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, js_to_json, urlencode_postdata from ..compat import compat_str
from ..utils import (
ExtractorError,
js_to_json,
try_get,
update_url_query,
urlencode_postdata,
)
class PicartoIE(InfoExtractor): class PicartoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)[^/]*$' _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TEST = {
'url': 'https://picarto.tv/Setz', 'url': 'https://picarto.tv/Setz',
'info_dict': { 'info_dict': {
@ -16,72 +25,141 @@ class PicartoIE(InfoExtractor):
'timestamp': int, 'timestamp': int,
'is_live': True 'is_live': True
}, },
'params': { 'skip': 'Stream is offline',
'skip_download': True
}
} }
@classmethod
def suitable(cls, url):
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) channel_id = self._match_id(url)
stream_page = self._download_webpage(url, channel_id) stream_page = self._download_webpage(url, channel_id)
if 'This channel does not exist.' in stream_page: if '>This channel does not exist' in stream_page:
raise ExtractorError('Channel does not exist', expected=True) raise ExtractorError(
'Channel %s does not exist' % channel_id, expected=True)
player_settings_js = self._html_search_regex( player = self._parse_json(
r'(?s)playerSettings\[1\]\s*=\s*(\{.+?\}\n)', stream_page, 'player-settings') self._search_regex(
player_settings = self._parse_json(player_settings_js, channel_id, r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
transform_source=js_to_json) 'player settings'),
if not player_settings.get('online'): channel_id, transform_source=js_to_json)
if player.get('online') is False:
raise ExtractorError('Stream is offline', expected=True) raise ExtractorError('Stream is offline', expected=True)
cdn_data = self._download_json('https://picarto.tv/process/channel', channel_id, cdn_data = self._download_json(
'https://picarto.tv/process/channel', channel_id,
data=urlencode_postdata({'loadbalancinginfo': channel_id}), data=urlencode_postdata({'loadbalancinginfo': channel_id}),
note='Fetching load balancer info') note='Downloading load balancing info')
edge = [edge['ep'] for edge in cdn_data['edges'] if edge['id'] == cdn_data['preferedEdge']][0]
formats = self._extract_m3u8_formats('https://%s/hls/%s/index.m3u8' % (edge, channel_id), def get_event(key):
channel_id, 'mp4') return try_get(player, lambda x: x['event'][key], compat_str) or ''
formats.append({'url': 'https://%s/mp4/%s.mp4' % (edge, channel_id)})
params = {
'token': player.get('token') or '',
'ticket': get_event('ticket'),
'con': int(time.time() * 1000),
'type': get_event('ticket'),
'scope': get_event('scope'),
}
prefered_edge = cdn_data.get('preferedEdge')
default_tech = player.get('defaultTech')
formats = []
for edge in cdn_data['edges']:
edge_ep = edge.get('ep')
if not edge_ep or not isinstance(edge_ep, compat_str):
continue
edge_id = edge.get('id')
for tech in cdn_data['techs']:
tech_label = tech.get('label')
tech_type = tech.get('type')
preference = 0
if edge_id == prefered_edge:
preference += 1
if tech_type == default_tech:
preference += 1
format_id = []
if edge_id:
format_id.append(edge_id)
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
format_id.append('hls')
formats.extend(self._extract_m3u8_formats(
update_url_query(
'https://%s/hls/%s/index.m3u8'
% (edge_ep, channel_id), params),
channel_id, 'mp4', preference=preference,
m3u8_id='-'.join(format_id), fatal=False))
continue
elif tech_type == 'video/mp4' or tech_label == 'MP4':
format_id.append('mp4')
formats.append({
'url': update_url_query(
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
params),
'format_id': '-'.join(format_id),
'preference': preference,
})
else:
# rtmp format does not seem to work
continue
self._sort_formats(formats) self._sort_formats(formats)
mature = player.get('mature')
if mature is None:
age_limit = None
else:
age_limit = 18 if mature is True else 0
return { return {
'id': channel_id, 'id': channel_id,
'formats': formats,
'ext': 'mp4',
'title': self._live_title(channel_id), 'title': self._live_title(channel_id),
'is_live': True, 'is_live': True,
'thumbnail': player_settings.get('vodThumb'), 'thumbnail': player.get('vodThumb'),
'age_limit': 18 if player_settings.get('mature') else None, 'age_limit': age_limit,
'formats': formats,
} }
class PicartoVodIE(InfoExtractor): class PicartoVodIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[a-zA-Z0-9_\-\.]+).flv' _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'https://picarto.tv/videopopout/Carrot_2018.01.11.07.55.12.flv', 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
'md5': '80765b67813053ff31d4df2bd5e900ce', 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
'info_dict': { 'info_dict': {
'id': 'Carrot_2018.01.11.07.55.12', 'id': 'ArtofZod_2017.12.12.00.13.23.flv',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Carrot_2018.01.11.07.55.12', 'title': 'ArtofZod_2017.12.12.00.13.23.flv',
'thumbnail': r're:^https?://.*\.jpg$' 'thumbnail': r're:^https?://.*\.jpg'
} },
} }, {
'url': 'https://picarto.tv/videopopout/Plague',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
vod_info_js = self._html_search_regex(r'(?s)"#vod-player",\s*(\{.+?\})\)', vod_info = self._parse_json(
webpage, video_id) self._search_regex(
vod_info = self._parse_json(vod_info_js, video_id, transform_source=js_to_json) r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
video_id),
video_id, transform_source=js_to_json)
formats = self._extract_m3u8_formats(
vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': video_id, 'title': video_id,
'ext': 'mp4',
'protocol': 'm3u8',
'url': vod_info['vod'],
'thumbnail': vod_info.get('vodThumb'), 'thumbnail': vod_info.get('vodThumb'),
'formats': formats,
} }