From 9dde0e04e6d952977ecfd85ceac883106e7ac1ee Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 23 Oct 2016 23:22:09 +0800 Subject: [PATCH] [litv] Fix extraction (#11006) --- ChangeLog | 3 +++ youtube_dl/extractor/litv.py | 39 ++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 825e357a4..7dabde861 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,9 @@ version Core * Running youtube-dl in the background is fixed (#10996, #10706, #955) +Extractors +* [litv] Fix extraction + version 2016.10.21.1 diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index a3784e6c6..ded717cf2 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import json -import re from .common import InfoExtractor from ..utils import ( @@ -52,8 +51,8 @@ class LiTVIE(InfoExtractor): 'skip': 'Georestricted to Taiwan', }] - def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=True): - episode_title = view_data['title'] + def _extract_playlist(self, season_list, video_id, program_info, prompt=True): + episode_title = program_info['title'] content_id = season_list['contentId'] if prompt: @@ -61,7 +60,7 @@ def _extract_playlist(self, season_list, video_id, vod_data, view_data, prompt=T all_episodes = [ self.url_result(smuggle_url( - self._URL_TEMPLATE % (view_data['contentType'], episode['contentId']), + self._URL_TEMPLATE % (program_info['contentType'], episode['contentId']), {'force_noplaylist': True})) # To prevent infinite recursion for episode in season_list['episode']] @@ -80,19 +79,15 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) - view_data = dict(map(lambda t: (t[0], t[2]), re.findall( - r'viewData\.([a-zA-Z]+)\s*=\s*(["\'])([^"\']+)\2', - webpage))) - - vod_data = self._parse_json(self._search_regex( - 'var\s+vod\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), + program_info = self._parse_json(self._search_regex( + 'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), video_id) - season_list = list(vod_data.get('seasonList', {}).values()) + season_list = list(program_info.get('seasonList', {}).values()) if season_list: if not noplaylist: return self._extract_playlist( - season_list[0], video_id, vod_data, view_data, + season_list[0], video_id, program_info, prompt=noplaylist_prompt) if noplaylist_prompt: @@ -102,8 +97,8 @@ def _real_extract(self, url): # endpoint gives the same result as the data embedded in the webpage. # If georestricted, there are no embedded data, so an extra request is # necessary to get the error code - if 'assetId' not in view_data: - view_data = self._download_json( + if 'assetId' not in program_info: + program_info = self._download_json( 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, query={'contentId': video_id}, headers={'Accept': 'application/json'}) @@ -112,9 +107,9 @@ def _real_extract(self, url): webpage, 'video data', default='{}'), video_id) if not video_data: payload = { - 'assetId': view_data['assetId'], - 'watchDevices': view_data['watchDevices'], - 'contentType': view_data['contentType'], + 'assetId': program_info['assetId'], + 'watchDevices': program_info['watchDevices'], + 'contentType': program_info['contentType'], } video_data = self._download_json( 'https://www.litv.tv/vod/getMainUrl', video_id, @@ -136,11 +131,11 @@ def _real_extract(self, url): # LiTV HLS segments doesn't like compressions a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = True - title = view_data['title'] + view_data.get('secondaryMark', '') - description = view_data.get('description') - thumbnail = view_data.get('imageFile') - categories = [item['name'] for item in vod_data.get('category', [])] - episode = int_or_none(view_data.get('episode')) + title = program_info['title'] + program_info.get('secondaryMark', '') + description = program_info.get('description') + thumbnail = program_info.get('imageFile') + categories = [item['name'] for item in program_info.get('category', [])] + episode = int_or_none(program_info.get('episode')) return { 'id': video_id,