From 77aa6b329da84335ac3f183e0ed04198b318a990 Mon Sep 17 00:00:00 2001 From: dst Date: Thu, 19 Dec 2013 05:28:16 +0700 Subject: [PATCH 1/5] [ivi] Add support for ivi.ru --- youtube_dl/extractor/__init__.py | 4 + youtube_dl/extractor/ivi.py | 154 +++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 youtube_dl/extractor/ivi.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7f2f8806e..677a894b3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -82,6 +82,10 @@ from .infoq import InfoQIE from .instagram import InstagramIE from .internetvideoarchive import InternetVideoArchiveIE +from .ivi import ( + IviIE, + IviCompilationIE +) from .jeuxvideo import JeuxVideoIE from .jukebox import JukeboxIE from .justintv import JustinTVIE diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py new file mode 100644 index 000000000..aa8b3b8a3 --- /dev/null +++ b/youtube_dl/extractor/ivi.py @@ -0,0 +1,154 @@ +# encoding: utf-8 + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + ExtractorError, +) + + +class IviIE(InfoExtractor): + IE_DESC = u'ivi.ru' + IE_NAME = u'ivi' + _VALID_URL = r'^https?://(?:www\.)?(?Pivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+))' + + _TESTS = [ + # Single movie + { + u'url': u'http://www.ivi.ru/watch/53141', + u'file': u'53141.mp4', + u'md5': u'6ff5be2254e796ed346251d117196cf4', + u'info_dict': { + u'title': u'Иван Васильевич меняет профессию', + u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346', + u'duration': 5498, + u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', + }, + }, + # Serial's serie + { + u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791', + u'file': u'74791.mp4', + u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9', + u'info_dict': { + u'title': u'Дежурный ангел - 1 серия', + u'duration': 2490, + u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', + }, + } + ] + + # Sorted by quality + _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] + + # Sorted by size + _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] + + def _extract_description(self, html): + m = re.search(r'', html) + return m.group('description') if m is not None else None + + def _extract_comment_count(self, html): + m = re.search(u'(?s)\s*Комментарии:\s*(?P\d+)\s*', html) + return int(m.group('commentcount')) if m is not None else 0 + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + + api_url = 'http://api.digitalaccess.ru/api/json/' + + data = {u'method': u'da.content.get', + u'params': [video_id, {u'site': u's183', + u'referrer': u'http://www.ivi.ru/watch/%s' % video_id, + u'contentid': video_id + } + ] + } + + request = compat_urllib_request.Request(api_url, json.dumps(data)) + + video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON') + video_json = json.loads(video_json_page) + + if u'error' in video_json: + error = video_json[u'error'] + if error[u'origin'] == u'NoRedisValidData': + raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) + raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True) + + result = video_json[u'result'] + + formats = [{'url': x[u'url'], + 'format_id': x[u'content_format'] + } for x in result[u'files'] if x[u'content_format'] in self._known_formats] + formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) + + if len(formats) == 0: + self._downloader.report_warning(u'No media links available for %s' % video_id) + return + + duration = result[u'duration'] + compilation = result[u'compilation'] + title = result[u'title'] + + title = '%s - %s' % (compilation, title) if compilation is not None else title + + previews = result[u'preview'] + previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) + thumbnail = previews[-1][u'url'] if len(previews) > 0 else None + + video_page_url = 'http://' + mobj.group('url') + video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') + + description = self._extract_description(video_page) + comment_count = self._extract_comment_count(video_page) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'description': description, + 'duration': duration, + 'comment_count': comment_count, + 'formats': formats, + } + + +class IviCompilationIE(InfoExtractor): + IE_DESC = u'ivi.ru compilations' + IE_NAME = u'ivi:compilation' + _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P[a-z\d_-]+)(?:/season(?P\d+))?$' + + def _extract_entries(self, html, compilation_id): + return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') + for serie in re.findall(r'(?:[^<]+)' % compilation_id, html)] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + compilation_id = mobj.group('compilationid') + season_id = mobj.group('seasonid') + + if season_id is not None: # Season link + season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id) + playlist_id = '%s/season%s' % (compilation_id, season_id) + playlist_title = self._html_search_meta(u'title', season_page, u'title') + entries = self._extract_entries(season_page, compilation_id) + else: # Compilation link + compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page') + playlist_id = compilation_id + playlist_title = self._html_search_meta(u'title', compilation_page, u'title') + seasons = re.findall(r'[^<]+' % compilation_id, compilation_page) + if len(seasons) == 0: # No seasons in this compilation + entries = self._extract_entries(compilation_page, compilation_id) + else: + entries = [] + for season_id in seasons: + season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), + compilation_id, u'Downloading season %s web page' % season_id) + entries.extend(self._extract_entries(season_page, compilation_id)) + + return self.playlist_result(entries, playlist_id, playlist_title) \ No newline at end of file From 8c21b7c647d5328388cb5be6af6cbe9f6143485c Mon Sep 17 00:00:00 2001 From: dst Date: Thu, 19 Dec 2013 05:39:22 +0700 Subject: [PATCH 2/5] [ivi] Add playlist tests --- test/test_playlists.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_playlists.py b/test/test_playlists.py index 5004d0464..576f7fb4e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -6,6 +6,7 @@ import os import sys import unittest +from youtube_dl.extractor.ivi import IviCompilationIE sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL @@ -168,6 +169,24 @@ def test_AcademicEarthCourse(self): self.assertEqual(result['title'], u'Building Dynamic Websites') self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.") self.assertEqual(len(result['entries']), 10) + + def test_ivi_compilation(self): + dl = FakeYDL() + ie = IviCompilationIE(dl) + result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'dezhurnyi_angel') + self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)') + self.assertTrue(len(result['entries']) >= 36) + + def test_ivi_compilation_season(self): + dl = FakeYDL() + ie = IviCompilationIE(dl) + result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], u'dezhurnyi_angel/season2') + self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') + self.assertTrue(len(result['entries']) >= 20) if __name__ == '__main__': From 5ce54a8205df1eea7e38ed86d3540f601f300a7e Mon Sep 17 00:00:00 2001 From: dst Date: Thu, 19 Dec 2013 05:53:34 +0700 Subject: [PATCH 3/5] [ivi] Neat import --- test/test_playlists.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 576f7fb4e..1b7b4e3d8 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -6,7 +6,6 @@ import os import sys import unittest -from youtube_dl.extractor.ivi import IviCompilationIE sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL @@ -28,7 +27,8 @@ BambuserChannelIE, BandcampAlbumIE, SmotriCommunityIE, - SmotriUserIE + SmotriUserIE, + IviCompilationIE ) From 6c6db72ed4505520d80002e37b523e4177146979 Mon Sep 17 00:00:00 2001 From: dst Date: Thu, 19 Dec 2013 06:19:41 +0700 Subject: [PATCH 4/5] [ivi] Skip tests for travis build --- youtube_dl/extractor/ivi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index aa8b3b8a3..10279478b 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -27,6 +27,7 @@ class IviIE(InfoExtractor): u'duration': 5498, u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', }, + u'skip': u'Only works from Russia', }, # Serial's serie { @@ -38,6 +39,7 @@ class IviIE(InfoExtractor): u'duration': 2490, u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', }, + u'skip': u'Only works from Russia', } ] From a51e37af6242a3fa49ad258a63d3f1a40c0ef9f2 Mon Sep 17 00:00:00 2001 From: dst Date: Thu, 19 Dec 2013 10:53:38 +0700 Subject: [PATCH 5/5] [ivi] Simplify --- youtube_dl/extractor/ivi.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 10279478b..4bdf55f93 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -13,7 +13,7 @@ class IviIE(InfoExtractor): IE_DESC = u'ivi.ru' IE_NAME = u'ivi' - _VALID_URL = r'^https?://(?:www\.)?(?Pivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+))' + _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+)' _TESTS = [ # Single movie @@ -103,9 +103,7 @@ def _real_extract(self, url): previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) thumbnail = previews[-1][u'url'] if len(previews) > 0 else None - video_page_url = 'http://' + mobj.group('url') - video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') - + video_page = self._download_webpage(url, video_id, u'Downloading video page') description = self._extract_description(video_page) comment_count = self._extract_comment_count(video_page)