mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-01-27 10:31:40 +01:00
[unige] Add new extractor
This enables to extract videos posted to the University of Geneva mediaserver, located at https://mediaserver.unige.ch. It supports: - Single courses/videos - Whole course "playlists" (accessible by clicking on the course's title) Signed-off-by: Frank Villaro-Dixon <frank@villaro-dixon.eu>
This commit is contained in:
parent
00ef748cc0
commit
f677ef143c
@ -1427,6 +1427,10 @@ from .ufctv import (
|
||||
UFCTVIE,
|
||||
UFCArabiaIE,
|
||||
)
|
||||
from .unige import (
|
||||
UnigeIE,
|
||||
UnigePlaylistIE,
|
||||
)
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .dlive import (
|
||||
|
99
youtube_dl/extractor/unige.py
Normal file
99
youtube_dl/extractor/unige.py
Normal file
@ -0,0 +1,99 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_HTTPError,
|
||||
)
|
||||
|
||||
|
||||
class UnigeIE(InfoExtractor):
|
||||
_VALID_URL = r'https://mediaserver.unige.ch/play/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediaserver.unige.ch/play/196613',
|
||||
'md5': 'xxxx',
|
||||
'info_dict': {
|
||||
'id': '196613',
|
||||
'display_id': '196613',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mediaserver.unige.ch/proxy/196613/VN3-2569-2023-2024-09-19.mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self, video_id):
|
||||
# Login credentials are per video group
|
||||
|
||||
username, password = self._get_login_info(netrc_machine=f'unige-mediaserver-{video_id}')
|
||||
if not username or not password:
|
||||
self.raise_login_required('You need a username/pwd to access this video')
|
||||
|
||||
try:
|
||||
secure_wp = f'https://mediaserver.unige.ch/proxy/{video_id}/secure.php?view=play&id={video_id}'
|
||||
self._download_webpage(
|
||||
secure_wp, None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'httpd_username': username,
|
||||
'httpd_password': password,
|
||||
}), headers={
|
||||
'Referer': secure_wp,
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
raise ExtractorError(
|
||||
'Unable to login: incorrect username and/or password',
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
try:
|
||||
# This dumb download only checks if we need to login, as authentication
|
||||
# is unique (and sometimes optional) for each video
|
||||
secure_wp = f'https://mediaserver.unige.ch/proxy/{video_id}/secure.php?view=play&id={video_id}'
|
||||
self._download_webpage(secure_wp, f'secure_{video_id}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self._login(video_id)
|
||||
else:
|
||||
# The video doesn't require login
|
||||
pass
|
||||
|
||||
video_title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage.replace('\n', ''), 'unige')
|
||||
course_title = self._html_search_regex(r'<a href="/collection/[-\w+]+">(?P<course>.*)</a></div>', webpage, 'unige')
|
||||
course_id = self._html_search_regex(r'<a href="/collection/(?P<courseid>[-\w+]+)">', webpage, 'unige')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'<source src="([^"]+)"', webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'url': video_url,
|
||||
'channel': course_title,
|
||||
'channel_id': course_id,
|
||||
}
|
||||
|
||||
|
||||
class UnigePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https://mediaserver.unige.ch/collection/(?P<id>[-\w+]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(url + '.rss', collection_id)
|
||||
|
||||
entries = [self.url_result(video.text, 'Unige')
|
||||
for video in rss.findall('./channel/item/link')]
|
||||
title_text = rss.find('./channel/title').text
|
||||
|
||||
return self.playlist_result(entries, collection_id, title_text)
|
Loading…
Reference in New Issue
Block a user