[franceculture] Add extractor for '/emission-*' urls (closes #3777, closes #8022)

This commit is contained in:
flatgreen 2015-12-27 15:30:45 +01:00 committed by Jaime Marquínez Ferrándiz
parent 7447661e9b
commit ecf17d1653
2 changed files with 42 additions and 3 deletions

View File

@ -203,7 +203,10 @@
from .foxgay import FoxgayIE from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .foxsports import FoxSportsIE from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE from .franceculture import (
FranceCultureIE,
FranceCultureEmissionIE,
)
from .franceinter import FranceInterIE from .franceinter import FranceInterIE
from .francetv import ( from .francetv import (
PluzzIE, PluzzIE,

View File

@ -8,6 +8,7 @@
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
ExtractorError,
) )
@ -28,8 +29,7 @@ class FranceCultureIE(InfoExtractor):
} }
} }
def _real_extract(self, url): def _extract_from_player(self, url, video_id):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_path = self._search_regex( video_path = self._search_regex(
@ -42,6 +42,9 @@ def _real_extract(self, url):
r'<a id="player".*?>\s+<img src="([^"]+)"', r'<a id="player".*?>\s+<img src="([^"]+)"',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail', fatal=False)
display_id = self._search_regex(
r'<span class="path-diffusion">emission-(.*?)</span>', webpage, 'display_id')
title = self._html_search_regex( title = self._html_search_regex(
r'<span class="title-diffusion">(.*?)</span>', webpage, 'title') r'<span class="title-diffusion">(.*?)</span>', webpage, 'title')
alt_title = self._html_search_regex( alt_title = self._html_search_regex(
@ -66,4 +69,37 @@ def _real_extract(self, url):
'alt_title': alt_title, 'alt_title': alt_title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'description': description, 'description': description,
'display_id': display_id,
} }
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_player(url, video_id)
class FranceCultureEmissionIE(FranceCultureIE):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emission-(?P<id>[^?#]+)'
_TEST = {
'url': 'http://www.franceculture.fr/emission-les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
'info_dict': {
'title': 'Jean-Gabriel Périot, cinéaste',
'alt_title': 'Les Carnets de la création',
'id': '5093239',
'display_id': 'les-carnets-de-la-creation-jean-gabriel-periot-cineaste-2015-10-13',
'ext': 'mp3',
'timestamp': 1444762500,
'upload_date': '20151013',
'description': 'startswith:Aujourd\'hui dans "Les carnets de la création", le cinéaste',
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_path = self._html_search_regex(
r'<a class="rf-player-open".*?href="([^"]+)"', webpage, 'video path', 'no_path_player')
if video_path == 'no_path_player':
raise ExtractorError('no player : no sound in this page.', expected=True)
new_id = self._search_regex('play=(?P<id>[0-9]+)', video_path, 'new_id', group='id')
video_url = compat_urlparse.urljoin(url, video_path)
return self._extract_from_player(video_url, new_id)