From c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7 Mon Sep 17 00:00:00 2001 From: Daniel Vogt Date: Mon, 29 May 2023 07:21:26 +0200 Subject: [PATCH] [extractor/owncloud] Add extractor (#6533) Authored by: C0D3D3V --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/owncloud.py | 80 +++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 yt_dlp/extractor/owncloud.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 6066b809b2..b022442849 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1378,6 +1378,7 @@ ORFIPTVIE, ) from .outsidetv import OutsideTVIE +from .owncloud import OwnCloudIE from .packtpub import ( PacktPubIE, PacktPubCourseIE, diff --git a/yt_dlp/extractor/owncloud.py b/yt_dlp/extractor/owncloud.py new file mode 100644 index 0000000000..e1d5682f87 --- /dev/null +++ b/yt_dlp/extractor/owncloud.py @@ -0,0 +1,80 @@ +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + url_or_none, + urlencode_postdata, +) + + +class OwnCloudIE(InfoExtractor): + _INSTANCES_RE = '|'.join(( + r'(?:[^\.]+\.)?sciebo\.de', + r'cloud\.uni-koblenz-landau\.de', + )) + _VALID_URL = rf'https?://(?:{_INSTANCES_RE})/s/(?P[\w.-]+)' + + _TESTS = [ + { + 'url': 'https://ruhr-uni-bochum.sciebo.de/s/wWhqZzh9jTumVFN', + 'info_dict': { + 'id': 'wWhqZzh9jTumVFN', + 'ext': 'mp4', + 'title': 'CmvpJST.mp4', + }, + }, + { + 'url': 'https://ruhr-uni-bochum.sciebo.de/s/WNDuFu0XuFtmm3f', + 'info_dict': { + 'id': 'WNDuFu0XuFtmm3f', + 'ext': 'mp4', + 'title': 'CmvpJST.mp4', + }, + 'params': { + 'videopassword': '12345', + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage, urlh = self._download_webpage_handle(url, video_id) + + if re.search(r']+for="password"', webpage): + webpage = self._verify_video_password(webpage, urlh.geturl(), video_id) + + hidden_inputs = self._hidden_inputs(webpage) + title = hidden_inputs.get('filename') + parsed_url = urllib.parse.urlparse(url) + + return { + 'id': video_id, + 'title': title, + 'url': url_or_none(hidden_inputs.get('downloadURL')) or parsed_url._replace( + path=urllib.parse.urljoin(parsed_url.path, 'download')).geturl(), + 'ext': determine_ext(title), + } + + def _verify_video_password(self, webpage, url, video_id): + password = self.get_param('videopassword') + if password is None: + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', + expected=True) + + validation_response = self._download_webpage( + url, video_id, 'Validating Password', 'Wrong password?', + data=urlencode_postdata({ + 'requesttoken': self._hidden_inputs(webpage)['requesttoken'], + 'password': password, + })) + + if re.search(r']+for="password"', validation_response): + warning = self._search_regex( + r']+class="warning">([^<]*)', validation_response, + 'warning', default='The password is wrong') + raise ExtractorError(f'Opening the video failed, {self.IE_NAME} said: {warning!r}', expected=True) + return validation_response