[radiojavan] Improve extraction (closes #17151)

This commit is contained in:
Sergey M․ 2018-09-03 02:53:26 +07:00
parent 0a9a8118ce
commit 93284ff2ea
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -4,15 +4,16 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate, parse_resolution,
str_to_int, str_to_int,
unified_strdate,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
class RadioJavanIE(InfoExtractor): class RadioJavanIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
_HOST_TRACKER_URL = 'https://www.radiojavan.com/videos/video_host'
_TEST = { _TEST = {
'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
'md5': 'e85208ffa3ca8b83534fca9fe19af95b', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
@ -31,23 +32,26 @@ class RadioJavanIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
download_host = self._download_json( download_host = self._download_json(
self._HOST_TRACKER_URL, 'https://www.radiojavan.com/videos/video_host', video_id,
video_id,
data=urlencode_postdata({'id': video_id}), data=urlencode_postdata({'id': video_id}),
headers={ headers={
'Content-Type': 'application/x-www-form-urlencoded', 'Content-Type': 'application/x-www-form-urlencoded',
'Referer': url, 'Referer': url,
} }).get('host', 'https://host1.rjmusicmedia.com')
)['host']
formats = [{ webpage = self._download_webpage(url, video_id)
'url': '%s/%s' % (download_host, video_path),
'format_id': '%sp' % height, formats = []
'height': int(height), for format_id, _, video_path in re.findall(
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
webpage):
f = parse_resolution(format_id)
f.update({
'url': urljoin(download_host, video_path),
'format_id': format_id,
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
title = self._og_search_title(webpage) title = self._og_search_title(webpage)