[animeondemand] Add support for full length films (Closes #10031)

This commit is contained in:
Sergey M․ 2016-07-09 06:57:04 +07:00
parent 0af985069b
commit 1f7258a367
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -22,6 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand' _NETRC_MACHINE = 'animeondemand'
_TESTS = [{ _TESTS = [{
# jap, OmU
'url': 'https://www.anime-on-demand.de/anime/161', 'url': 'https://www.anime-on-demand.de/anime/161',
'info_dict': { 'info_dict': {
'id': '161', 'id': '161',
@ -30,17 +31,21 @@ class AnimeOnDemandIE(InfoExtractor):
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
}, { }, {
# Film wording is used instead of Episode # Film wording is used instead of Episode, ger/jap, Dub/OmU
'url': 'https://www.anime-on-demand.de/anime/39', 'url': 'https://www.anime-on-demand.de/anime/39',
'only_matching': True, 'only_matching': True,
}, { }, {
# Episodes without titles # Episodes without titles, jap, OmU
'url': 'https://www.anime-on-demand.de/anime/162', 'url': 'https://www.anime-on-demand.de/anime/162',
'only_matching': True, 'only_matching': True,
}, { }, {
# ger/jap, Dub/OmU, account required # ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/169', 'url': 'https://www.anime-on-demand.de/anime/169',
'only_matching': True, 'only_matching': True,
}, {
# Full length film, non-series, ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/185',
'only_matching': True,
}] }]
def _login(self): def _login(self):
@ -110,35 +115,12 @@ def _real_extract(self, url):
entries = [] entries = []
for num, episode_html in enumerate(re.findall( def extract_info(html, video_id):
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1): title, description = [None] * 2
episodebox_title = self._search_regex(
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
episode_html, 'episodebox title', default=None, group='title')
if not episodebox_title:
continue
episode_number = int(self._search_regex(
r'(?:Episode|Film)\s*(\d+)',
episodebox_title, 'episode number', default=num))
episode_title = self._search_regex(
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
episodebox_title, 'episode title', default=None)
video_id = 'episode-%d' % episode_number
common_info = {
'id': video_id,
'series': anime_title,
'episode': episode_title,
'episode_number': episode_number,
}
formats = [] formats = []
for input_ in re.findall( for input_ in re.findall(
r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html): r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', html):
attributes = extract_attributes(input_) attributes = extract_attributes(input_)
playlist_urls = [] playlist_urls = []
for playlist_key in ('data-playlist', 'data-otherplaylist'): for playlist_key in ('data-playlist', 'data-otherplaylist'):
@ -215,28 +197,74 @@ def _real_extract(self, url):
}) })
formats.extend(file_formats) formats.extend(file_formats)
if formats: return {
self._sort_formats(formats) 'title': title,
'description': description,
'formats': formats,
}
def extract_entries(html, video_id, common_info):
info = extract_info(html, video_id)
if info['formats']:
self._sort_formats(info['formats'])
f = common_info.copy() f = common_info.copy()
f.update({ f.update(info)
'title': title,
'description': description,
'formats': formats,
})
entries.append(f) entries.append(f)
# Extract teaser only when full episode is not available # Extract teaser/trailer only when full episode is not available
if not formats: if not info['formats']:
m = re.search( m = re.search(
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<',
episode_html) html)
if m: if m:
f = common_info.copy() f = common_info.copy()
f.update({ f.update({
'id': '%s-teaser' % f['id'], 'id': '%s-%s' % (f['id'], m.group('kind').lower()),
'title': m.group('title'), 'title': m.group('title'),
'url': compat_urlparse.urljoin(url, m.group('href')), 'url': compat_urlparse.urljoin(url, m.group('href')),
}) })
entries.append(f) entries.append(f)
def extract_episodes(html):
for num, episode_html in enumerate(re.findall(
r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1):
episodebox_title = self._search_regex(
(r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
episode_html, 'episodebox title', default=None, group='title')
if not episodebox_title:
continue
episode_number = int(self._search_regex(
r'(?:Episode|Film)\s*(\d+)',
episodebox_title, 'episode number', default=num))
episode_title = self._search_regex(
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
episodebox_title, 'episode title', default=None)
video_id = 'episode-%d' % episode_number
common_info = {
'id': video_id,
'series': anime_title,
'episode': episode_title,
'episode_number': episode_number,
}
extract_entries(episode_html, video_id, common_info)
def extract_film(html, video_id):
common_info = {
'id': anime_id,
'title': anime_title,
'description': anime_description,
}
extract_entries(html, video_id, common_info)
extract_episodes(webpage)
if not entries:
extract_film(webpage, anime_id)
return self.playlist_result(entries, anime_id, anime_title, anime_description) return self.playlist_result(entries, anime_id, anime_title, anime_description)