[plays] Improve extraction and add support for embed URLs

This commit is contained in:
Sergey M․ 2016-11-12 23:08:05 +07:00
parent f076d7972c
commit 3d2729514f
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -8,8 +8,8 @@
class PlaysTVIE(InfoExtractor): class PlaysTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})' _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})'
_TEST = { _TESTS = [{
'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall',
'md5': 'dfeac1198506652b5257a62762cec7bc', 'md5': 'dfeac1198506652b5257a62762cec7bc',
'info_dict': { 'info_dict': {
@ -18,14 +18,18 @@ class PlaysTVIE(InfoExtractor):
'title': 'Bjergsen - When you outplay the Azir wall', 'title': 'Bjergsen - When you outplay the Azir wall',
'description': 'Posted by Bjergsen', 'description': 'Posted by Bjergsen',
} }
} }, {
'url': 'https://plays.tv/embeds/56af17f56c95335490',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(
'https://plays.tv/video/%s' % video_id, video_id)
info = self._search_json_ld(webpage, video_id,)
content = self._search_json_ld(webpage, video_id)
title = content['title']
mpd_url, sources = re.search( mpd_url, sources = re.search(
r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>',
webpage).groups() webpage).groups()
@ -39,10 +43,11 @@ def _real_extract(self, url):
}) })
self._sort_formats(formats) self._sort_formats(formats)
return { info.update({
'id': video_id, 'id': video_id,
'title': title,
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage),
'formats': formats, 'formats': formats,
} })
return info