[ellentv] Improve extraction (Closes #10067)

This commit is contained in:
Sergey M․ 2016-07-13 22:41:46 +07:00
parent 0385aa6199
commit a0560d8ab8
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -6,12 +6,13 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
NO_DEFAULT,
) )
class EllenTVIE(InfoExtractor): class EllenTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
_TEST = { _TESTS = [{
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
'md5': '4294cf98bc165f218aaa0b89e0fd8042', 'md5': '4294cf98bc165f218aaa0b89e0fd8042',
'info_dict': { 'info_dict': {
@ -22,24 +23,47 @@ class EllenTVIE(InfoExtractor):
'timestamp': 1428035648, 'timestamp': 1428035648,
'upload_date': '20150403', 'upload_date': '20150403',
'uploader_id': 'batchUser', 'uploader_id': 'batchUser',
} },
} }, {
# not available via http://widgets.ellentube.com/
'url': 'http://www.ellentv.com/videos/1-szkgu2m2/',
'info_dict': {
'id': '1_szkgu2m2',
'ext': 'flv',
'title': "Ellen's Amazingly Talented Audience",
'description': 'md5:86ff1e376ff0d717d7171590e273f0a5',
'timestamp': 1255140900,
'upload_date': '20091010',
'uploader_id': 'ellenkaltura@gmail.com',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
URLS = ('http://widgets.ellentube.com/videos/%s' % video_id, url)
for num, url_ in enumerate(URLS, 1):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://widgets.ellentube.com/videos/%s' % video_id, url_, video_id, fatal=num == len(URLS))
video_id)
default = NO_DEFAULT if num == len(URLS) else None
partner_id = self._search_regex( partner_id = self._search_regex(
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id') r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id',
default=default)
kaltura_id = self._search_regex( kaltura_id = self._search_regex(
[r'id="kaltura_player_([^"]+)"', [r'id="kaltura_player_([^"]+)"',
r"_wb_entry_id\s*:\s*'([^']+)", r"_wb_entry_id\s*:\s*'([^']+)",
r'data-kaltura-entry-id="([^"]+)'], r'data-kaltura-entry-id="([^"]+)'],
webpage, 'kaltura id') webpage, 'kaltura id', default=default)
if partner_id and kaltura_id:
break
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura') return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')