[ted] fixed error in case of no subtitles present

I created a test, but I leave it commented since TED videos get
new subtitles frequently.
This commit is contained in:
Ismaël Mejía 2013-11-05 12:00:13 +01:00
parent 38fcd4597a
commit 4ed3e51080
2 changed files with 22 additions and 8 deletions

View File

@ -51,6 +51,12 @@ def test_automatic_captions(self):
self.DL.params['subtitleslang'] = ['en'] self.DL.params['subtitleslang'] = ['en']
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0) self.assertTrue(len(subtitles.keys()) == 0)
# def test_nosubtitles(self):
# self.DL.expect_warning(u'video doesn\'t have subtitles')
# self.url = 'http://www.ted.com/talks/rodrigo_canales_the_deadly_genius_of_drug_cartels.html'
# self.DL.params['writesubtitles'] = True
# self.DL.params['allsubtitles'] = True
# subtitles = self.getSubtitles()
def test_multiple_langs(self): def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de'] langs = ['es', 'fr', 'de']

View File

@ -3,6 +3,11 @@
from .subtitles import SubtitlesInfoExtractor from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_str,
RegexNotFoundError,
)
class TEDIE(SubtitlesInfoExtractor): class TEDIE(SubtitlesInfoExtractor):
_VALID_URL=r'''http://www\.ted\.com/ _VALID_URL=r'''http://www\.ted\.com/
( (
@ -105,12 +110,15 @@ def _talk_info(self, url, video_id=0):
return info return info
def _get_available_subtitles(self, video_id, webpage): def _get_available_subtitles(self, video_id, webpage):
options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) try:
languages = re.findall(r'(?:<option value=")(\S+)"', options) options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL)
if languages: languages = re.findall(r'(?:<option value=")(\S+)"', options)
sub_lang_list = {} if languages:
for l in languages: sub_lang_list = {}
url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) for l in languages:
sub_lang_list[l] = url url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
return sub_lang_list sub_lang_list[l] = url
return sub_lang_list
except RegexNotFoundError as err:
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {} return {}