[telecinco] Add support for article opening videos

This commit is contained in:
Sergey M․ 2020-03-01 03:09:19 +07:00
parent f8cbd8c963
commit 6d475d01d8
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -11,6 +11,7 @@
determine_ext, determine_ext,
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get,
urljoin, urljoin,
) )
@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1876350223', 'id': '1876350223',
'title': 'Bacalao con kokotxas al pil-pil', 'title': 'Bacalao con kokotxas al pil-pil',
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
}, },
'playlist': [{ 'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7', 'md5': 'adb28c37238b675dad0f042292f209a7',
@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
'duration': 50, 'duration': 50,
}, },
}, {
# video in opening's content
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
'info_dict': {
'id': '2907195140',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
},
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
'info_dict': {
'id': 'TpI2EttSDAReWpJ1o0NVh2',
'ext': 'mp4',
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
'duration': 1015,
},
}],
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True, 'only_matching': True,
@ -138,14 +159,25 @@ def _real_extract(self, url):
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
webpage, 'article'), display_id)['article'] webpage, 'article'), display_id)['article']
title = article.get('title') title = article.get('title')
description = clean_html(article.get('leadParagraph')) description = clean_html(article.get('leadParagraph')) or ''
if article.get('editorialType') != 'VID': if article.get('editorialType') != 'VID':
entries = [] entries = []
for p in article.get('body', []): body = [article.get('opening')]
content = p.get('content') body.extend(try_get(article, lambda x: x['body'], list) or [])
if p.get('type') != 'video' or not content: for p in body:
if not isinstance(p, dict):
continue continue
entries.append(self._parse_content(content, url)) content = p.get('content')
if not content:
continue
type_ = p.get('type')
if type_ == 'paragraph':
content_str = str_or_none(content)
if content_str:
description += content_str
continue
if type_ == 'video' and isinstance(content, dict):
entries.append(self._parse_content(content, url))
return self.playlist_result( return self.playlist_result(
entries, str_or_none(article.get('id')), title, description) entries, str_or_none(article.get('id')), title, description)
content = article['opening']['content'] content = article['opening']['content']