[vier] Add support for vijf.be

vier.be and vijf.be run on the same CMS and are property of the same company,
so the same extractor can be used for both of them.
This commit is contained in:
Lars Vierbergen 2017-03-04 17:47:19 +01:00 committed by Sergey M
parent 054a587de8
commit a3ba8a7acf

View File

@ -9,7 +9,7 @@
class VierIE(InfoExtractor): class VierIE(InfoExtractor):
IE_NAME = 'vier' IE_NAME = 'vier'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))' _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?:[^/]+/videos/(?P<display_id>[^/]+)(?:/(?P<id>\d+))?|video/v3/embed/(?P<embed_id>\d+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129',
'info_dict': { 'info_dict': {
@ -23,6 +23,19 @@ class VierIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://www.vijf.be/temptationisland/videos/zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas/2561614',
'info_dict': {
'id': '2561614',
'display_id': 'zo-grappig-temptation-island-hosts-moeten-kiezen-tussen-onmogelijke-dilemmas',
'ext': 'mp4',
'title': 'ZO grappig: Temptation Island hosts moeten kiezen tussen onmogelijke dilemma\'s',
'description': 'Het spel is simpel: Annelien Coorevits en Rick Brandsteder krijgen telkens 2 dilemma\'s voorgeschoteld en ze MOETEN een keuze maken.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen', 'url': 'http://www.vier.be/planb/videos/mieren-herders-van-de-bladluizen',
'only_matching': True, 'only_matching': True,
@ -35,6 +48,7 @@ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
embed_id = mobj.group('embed_id') embed_id = mobj.group('embed_id')
display_id = mobj.group('display_id') or embed_id display_id = mobj.group('display_id') or embed_id
site = mobj.group('site')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -43,7 +57,7 @@ def _real_extract(self, url):
webpage, 'video id') webpage, 'video id')
application = self._search_regex( application = self._search_regex(
[r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'],
webpage, 'application', default='vier_vod') webpage, 'application', default=site + '_vod')
filename = self._search_regex( filename = self._search_regex(
[r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'],
webpage, 'filename') webpage, 'filename')
@ -68,13 +82,19 @@ def _real_extract(self, url):
class VierVideosIE(InfoExtractor): class VierVideosIE(InfoExtractor):
IE_NAME = 'vier:videos' IE_NAME = 'vier:videos'
_VALID_URL = r'https?://(?:www\.)?vier\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)' _VALID_URL = r'https?://(?:www\.)?(?P<site>vier|vijf)\.be/(?P<program>[^/]+)/videos(?:\?.*\bpage=(?P<page>\d+)|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vier.be/demoestuin/videos', 'url': 'http://www.vier.be/demoestuin/videos',
'info_dict': { 'info_dict': {
'id': 'demoestuin', 'id': 'demoestuin',
}, },
'playlist_mincount': 153, 'playlist_mincount': 153,
}, {
'url': 'http://www.vijf.be/temptationisland/videos',
'info_dict': {
'id': 'temptationisland',
},
'playlist_mincount': 159,
}, { }, {
'url': 'http://www.vier.be/demoestuin/videos?page=6', 'url': 'http://www.vier.be/demoestuin/videos?page=6',
'info_dict': { 'info_dict': {
@ -92,6 +112,7 @@ class VierVideosIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
program = mobj.group('program') program = mobj.group('program')
site = mobj.group('site')
page_id = mobj.group('page') page_id = mobj.group('page')
if page_id: if page_id:
@ -105,13 +126,13 @@ def _real_extract(self, url):
entries = [] entries = []
for current_page_id in itertools.count(start_page): for current_page_id in itertools.count(start_page):
current_page = self._download_webpage( current_page = self._download_webpage(
'http://www.vier.be/%s/videos?page=%d' % (program, current_page_id), 'http://www.%s.be/%s/videos?page=%d' % (site, program, current_page_id),
program, program,
'Downloading page %d' % (current_page_id + 1)) 'Downloading page %d' % (current_page_id + 1))
page_entries = [ page_entries = [
self.url_result('http://www.vier.be' + video_url, 'Vier') self.url_result('http://www.' + site + '.be' + video_url, 'Vier')
for video_url in re.findall( for video_url in re.findall(
r'<h3><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)] r'<h[23]><a href="(/[^/]+/videos/[^/]+(?:/\d+)?)">', current_page)]
entries.extend(page_entries) entries.extend(page_entries)
if page_id or '>Meer<' not in current_page: if page_id or '>Meer<' not in current_page:
break break