[hotstart] fix and improve extraction

- fix format extraction (closes #26690)
- extract thumbnail URL (closes #16079, closes #20412)
- support country specific playlist URLs (closes #23496)
- select the last id in video URL (closes #26412)
This commit is contained in:
Remita Amine 2020-12-12 23:02:11 +01:00
parent bcc8ef0a5a
commit bb38a12157

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
import hashlib import hashlib
import hmac import hmac
import json
import re import re
import time import time
import uuid import uuid
@ -25,43 +26,50 @@ from ..utils import (
class HotStarBaseIE(InfoExtractor): class HotStarBaseIE(InfoExtractor):
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
def _call_api_impl(self, path, video_id, query): def _call_api_impl(self, path, video_id, headers, query, data=None):
st = int(time.time()) st = int(time.time())
exp = st + 6000 exp = st + 6000
auth = 'st=%d~exp=%d~acl=/*' % (st, exp) auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
response = self._download_json( h = {'hotstarauth': auth}
'https://api.hotstar.com/' + path, video_id, headers={ h.update(headers)
'hotstarauth': auth, return self._download_json(
'x-country-code': 'IN', 'https://api.hotstar.com/' + path,
'x-platform-code': 'JIO', video_id, headers=h, query=query, data=data)
}, query=query)
def _call_api(self, path, video_id, query_name='contentId'):
response = self._call_api_impl(path, video_id, {
'x-country-code': 'IN',
'x-platform-code': 'JIO',
}, {
query_name: video_id,
'tas': 10000,
})
if response['statusCode'] != 'OK': if response['statusCode'] != 'OK':
raise ExtractorError( raise ExtractorError(
response['body']['message'], expected=True) response['body']['message'], expected=True)
return response['body']['results'] return response['body']['results']
def _call_api(self, path, video_id, query_name='contentId'): def _call_api_v2(self, path, video_id, headers, query=None, data=None):
return self._call_api_impl(path, video_id, { h = {'X-Request-Id': compat_str(uuid.uuid4())}
query_name: video_id, h.update(headers)
'tas': 10000, try:
}) return self._call_api_impl(
path, video_id, h, query, data)
def _call_api_v2(self, path, video_id): except ExtractorError as e:
return self._call_api_impl( if isinstance(e.cause, compat_HTTPError):
'%s/in/contents/%s' % (path, video_id), video_id, { if e.cause.code == 402:
'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash', self.raise_login_required()
'client': 'mweb', message = self._parse_json(e.cause.read().decode(), video_id)['message']
'clientVersion': '6.18.0', if message in ('Content not available in region', 'Country is not supported'):
'deviceId': compat_str(uuid.uuid4()), raise self.raise_geo_restricted(message)
'osName': 'Windows', raise ExtractorError(message)
'osVersion': '10', raise e
})
class HotStarIE(HotStarBaseIE): class HotStarIE(HotStarBaseIE):
IE_NAME = 'hotstar' IE_NAME = 'hotstar'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
_TESTS = [{ _TESTS = [{
# contentData # contentData
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE):
# only available via api v2 # only available via api v2
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
'only_matching': True,
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_DEVICE_ID = None
_USER_TOKEN = None
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE):
headers = {'Referer': url} headers = {'Referer': url}
formats = [] formats = []
geo_restricted = False geo_restricted = False
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
if not self._USER_TOKEN:
self._DEVICE_ID = compat_str(uuid.uuid4())
self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
'X-HS-Platform': 'PCTV',
'Content-Type': 'application/json',
}, data=json.dumps({
'device_ids': [{
'id': self._DEVICE_ID,
'type': 'device_id',
}],
}).encode())['user_identity']
playback_sets = self._call_api_v2(
'play/v2/playback/content/' + video_id, video_id, {
'X-HS-Platform': 'web',
'X-HS-AppVersion': '6.99.1',
'X-HS-UserToken': self._USER_TOKEN,
}, query={
'device-id': self._DEVICE_ID,
'desired-config': 'encryption:plain',
'os-name': 'Windows',
'os-version': '10',
})['data']['playBackSets']
for playback_set in playback_sets: for playback_set in playback_sets:
if not isinstance(playback_set, dict): if not isinstance(playback_set, dict):
continue continue
@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE):
for f in formats: for f in formats:
f.setdefault('http_headers', {}).update(headers) f.setdefault('http_headers', {}).update(headers)
image = try_get(video_data, lambda x: x['image']['h'], compat_str)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
'description': video_data.get('description'), 'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
'formats': formats, 'formats': formats,
'channel': video_data.get('channelName'), 'channel': video_data.get('channelName'),
'channel_id': video_data.get('channelId'), 'channel_id': str_or_none(video_data.get('channelId')),
'series': video_data.get('showName'), 'series': video_data.get('showName'),
'season': video_data.get('seasonName'), 'season': video_data.get('seasonName'),
'season_number': int_or_none(video_data.get('seasonNo')), 'season_number': int_or_none(video_data.get('seasonNo')),
'season_id': video_data.get('seasonId'), 'season_id': str_or_none(video_data.get('seasonId')),
'episode': title, 'episode': title,
'episode_number': int_or_none(video_data.get('episodeNo')), 'episode_number': int_or_none(video_data.get('episodeNo')),
} }
@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE):
class HotStarPlaylistIE(HotStarBaseIE): class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist' IE_NAME = 'hotstar:playlist'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': { 'info_dict': {
@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE):
}, { }, {
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):