yt-dlp/yt_dlp/extractor/safari.py

import json
import re
import urllib.parse

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    update_url_query,
)


class SafariBaseIE(InfoExtractor):
    _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
    _NETRC_MACHINE = 'safari'

    _API_BASE = 'https://learning.oreilly.com/api/v1'
    _API_FORMAT = 'json'

    LOGGED_IN = False

    def _perform_login(self, username, password):
        _, urlh = self._download_webpage_handle(
            'https://learning.oreilly.com/accounts/login-check/', None,
            'Downloading login page')

        def is_logged(urlh):
            return 'learning.oreilly.com/home/' in urlh.url

        if is_logged(urlh):
            self.LOGGED_IN = True
            return

        redirect_url = urlh.url
        parsed_url = urllib.parse.urlparse(redirect_url)
        qs = urllib.parse.parse_qs(parsed_url.query)
        next_uri = urllib.parse.urljoin(
            'https://api.oreilly.com', qs['next'][0])

        auth, urlh = self._download_json_handle(
            'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
            data=json.dumps({
                'email': username,
                'password': password,
                'redirect_uri': next_uri,
            }).encode(), headers={
                'Content-Type': 'application/json',
                'Referer': redirect_url,
            }, expected_status=400)

        credentials = auth.get('credentials')
        if (not auth.get('logged_in') and not auth.get('redirect_uri')
                and credentials):
            raise ExtractorError(
                f'Unable to login: {credentials}', expected=True)

        # oreilly serves two same instances of the following cookies
        # in Set-Cookie header and expects first one to be actually set
        for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
            self._apply_first_set_cookie_header(urlh, cookie)

        _, urlh = self._download_webpage_handle(
            auth.get('redirect_uri') or next_uri, None, 'Completing login')

        if is_logged(urlh):
            self.LOGGED_IN = True
            return

        raise ExtractorError('Unable to log in')


class SafariIE(SafariBaseIE):
    IE_NAME = 'safari'
    IE_DESC = 'safaribooksonline.com online video'
    _VALID_URL = r'''(?x)
                        https?://
                            (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
                            (?:
                                library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
                                videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
                            )
                    '''

    _TESTS = [{
        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
        'md5': 'dcc5a425e79f2564148652616af1f2a3',
        'info_dict': {
            'id': '0_qbqx90ic',
            'ext': 'mp4',
            'title': 'Introduction to Hadoop Fundamentals LiveLessons',
            'timestamp': 1437758058,
            'upload_date': '20150724',
            'uploader_id': 'stork',
        },
    }, {
        # non-digits in course id
        'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
        'only_matching': True,
    }, {
        'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
        'only_matching': True,
    }, {
        'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
        'only_matching': True,
    }, {
        'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
        'only_matching': True,
    }, {
        'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
        'only_matching': True,
    }]

    _PARTNER_ID = '1926081'
    _UICONF_ID = '29375172'

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)

        reference_id = mobj.group('reference_id')
        if reference_id:
            video_id = reference_id
            partner_id = self._PARTNER_ID
            ui_id = self._UICONF_ID
        else:
            video_id = '{}-{}'.format(mobj.group('course_id'), mobj.group('part'))

            webpage, urlh = self._download_webpage_handle(url, video_id)

            mobj = re.match(self._VALID_URL, urlh.url)
            reference_id = mobj.group('reference_id')
            if not reference_id:
                reference_id = self._search_regex(
                    r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
                    webpage, 'kaltura reference id', group='id')
            partner_id = self._search_regex(
                r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
                webpage, 'kaltura widget id', default=self._PARTNER_ID,
                group='id')
            ui_id = self._search_regex(
                r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
                webpage, 'kaltura uiconf id', default=self._UICONF_ID,
                group='id')

        query = {
            'wid': f'_{partner_id}',
            'uiconf_id': ui_id,
            'flashvars[referenceId]': reference_id,
        }

        if self.LOGGED_IN:
            kaltura_session = self._download_json(
                f'{self._API_BASE}/player/kaltura_session/?reference_id={reference_id}',
                video_id, 'Downloading kaltura session JSON',
                'Unable to download kaltura session JSON', fatal=False,
                headers={'Accept': 'application/json'})
            if kaltura_session:
                session = kaltura_session.get('session')
                if session:
                    query['flashvars[ks]'] = session

        return self.url_result(update_url_query(
            'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
            'Kaltura')


class SafariApiIE(SafariBaseIE):
    IE_NAME = 'safari:api'
    _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'

    _TESTS = [{
        'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
        'only_matching': True,
    }, {
        'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        part = self._download_json(
            url, '{}/{}'.format(mobj.group('course_id'), mobj.group('part')),
            'Downloading part JSON')
        web_url = part['web_url']
        if 'library/view' in web_url:
            web_url = web_url.replace('library/view', 'videos')
            natural_keys = part['natural_key']
            web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}'
        return self.url_result(web_url, SafariIE.ie_key())


class SafariCourseIE(SafariBaseIE):
    IE_NAME = 'safari:course'
    IE_DESC = 'safaribooksonline.com online courses'

    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
                            (?:
                                library/view/[^/]+|
                                api/v1/book|
                                videos/[^/]+
                            )|
                            techbus\.safaribooksonline\.com
                        )
                        /(?P<id>[^/]+)
                    '''

    _TESTS = [{
        'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
        'info_dict': {
            'id': '9780133392838',
            'title': 'Hadoop Fundamentals LiveLessons',
        },
        'playlist_count': 22,
        'skip': 'Requires safaribooksonline account credentials',
    }, {
        'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
        'only_matching': True,
    }, {
        'url': 'http://techbus.safaribooksonline.com/9780134426365',
        'only_matching': True,
    }, {
        'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
        'only_matching': True,
    }, {
        'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
        'only_matching': True,
    }, {
        'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
        'only_matching': True,
    }]

    @classmethod
    def suitable(cls, url):
        return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
                else super().suitable(url))

    def _real_extract(self, url):
        course_id = self._match_id(url)

        course_json = self._download_json(
            f'{self._API_BASE}/book/{course_id}/?override_format={self._API_FORMAT}',
            course_id, 'Downloading course JSON')

        if 'chapters' not in course_json:
            raise ExtractorError(
                f'No chapters found for course {course_id}', expected=True)

        entries = [
            self.url_result(chapter, SafariApiIE.ie_key())
            for chapter in course_json['chapters']]

        course_title = course_json['title']

        return self.playlist_result(entries, course_id, course_title)
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`import json`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`import re`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`import urllib.parse`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`from .common import InfoExtractor`
			`from ..utils import (`
			`ExtractorError,`
[safari] extract free and preview videos(#7491) 2016-03-11 16:57:06 +01:00			`update_url_query,`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`)`


			`class SafariBaseIE(InfoExtractor):`
[safari] Add support for learning.oreilly.com (closes #18510) 2018-12-15 17:08:14 +01:00			`_LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`_NETRC_MACHINE = 'safari'`

[safari] Add support for learning.oreilly.com (closes #18510) 2018-12-15 17:08:14 +01:00			`_API_BASE = 'https://learning.oreilly.com/api/v1'`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`_API_FORMAT = 'json'`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`LOGGED_IN = False`

[extractor] Add `_perform_login` function (#2943) * Adds new functions `_initialize_pre_login` and `_perform_login` as part of the extractor API * Adds `ie.supports_login` to the public API 2022-03-18 21:53:33 +01:00			`def _perform_login(self, username, password):`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`_, urlh = self._download_webpage_handle(`
			`'https://learning.oreilly.com/accounts/login-check/', None,`
			`'Downloading login page')`
[safari] Improve authentication detection (closes #13319) 2017-06-08 18:20:48 +02:00
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`def is_logged(urlh):`
[compat, networking] Deprecate old functions (#2861) Authored by: coletdjnz, pukkandan 2023-07-09 09:53:02 +02:00			`return 'learning.oreilly.com/home/' in urlh.url`
[safari] Improve authentication detection (closes #13319) 2017-06-08 18:20:48 +02:00
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`if is_logged(urlh):`
[safari] Improve authentication detection (closes #13319) 2017-06-08 18:20:48 +02:00			`self.LOGGED_IN = True`
			`return`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[compat, networking] Deprecate old functions (#2861) Authored by: coletdjnz, pukkandan 2023-07-09 09:53:02 +02:00			`redirect_url = urlh.url`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`parsed_url = urllib.parse.urlparse(redirect_url)`
			`qs = urllib.parse.parse_qs(parsed_url.query)`
			`next_uri = urllib.parse.urljoin(`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`'https://api.oreilly.com', qs['next'][0])`

			`auth, urlh = self._download_json_handle(`
			`'https://www.oreilly.com/member/auth/login/', None, 'Logging in',`
			`data=json.dumps({`
			`'email': username,`
			`'password': password,`
			`'redirect_uri': next_uri,`
			`}).encode(), headers={`
			`'Content-Type': 'application/json',`
			`'Referer': redirect_url,`
			`}, expected_status=400)`

			`credentials = auth.get('credentials')`
			`if (not auth.get('logged_in') and not auth.get('redirect_uri')`
			`and credentials):`
			`raise ExtractorError(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'Unable to login: {credentials}', expected=True)`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Fix authentication (closes #22161) (#22184) 2019-08-27 05:16:04 +02:00			`# oreilly serves two same instances of the following cookies`
			`# in Set-Cookie header and expects first one to be actually set`
			`for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):`
			`self._apply_first_set_cookie_header(urlh, cookie)`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`_, urlh = self._download_webpage_handle(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`auth.get('redirect_uri') or next_uri, None, 'Completing login')`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`if is_logged(urlh):`
			`self.LOGGED_IN = True`
			`return`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`raise ExtractorError('Unable to log in')`
[safari] Fix authentication 2016-03-12 21:08:36 +01:00
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`class SafariIE(SafariBaseIE):`
			`IE_NAME = 'safari'`
			`IE_DESC = 'safaribooksonline.com online video'`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`_VALID_URL = r'''(?x)`
			`https?://`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`(?:www\.)?(?:safaribooksonline\|(?:learning\.)?oreilly)\.com/`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`(?:`
			`library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html\|`
			`videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)`
			`)`
			`'''`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00
			`_TESTS = [{`
			`'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',`
[safari] extract free and preview videos(#7491) 2016-03-11 16:57:06 +01:00			`'md5': 'dcc5a425e79f2564148652616af1f2a3',`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`'info_dict': {`
[safari] extract free and preview videos(#7491) 2016-03-11 16:57:06 +01:00			`'id': '0_qbqx90ic',`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`'ext': 'mp4',`
[safari] extract free and preview videos(#7491) 2016-03-11 16:57:06 +01:00			`'title': 'Introduction to Hadoop Fundamentals LiveLessons',`
			`'timestamp': 1437758058,`
			`'upload_date': '20150724',`
			`'uploader_id': 'stork',`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`},`
[safari] Add test for #5985 2015-06-15 17:36:30 +02:00			`}, {`
			`# non-digits in course id`
			`'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',`
			`'only_matching': True,`
[safari] Relax url regexes (Closes #10202) 2016-08-01 16:48:48 +02:00			`}, {`
			`'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',`
			`'only_matching': True,`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`}, {`
			`'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',`
			`'only_matching': True,`
[safari] Add support for learning.oreilly.com (closes #18510) 2018-12-15 17:08:14 +01:00			`}, {`
			`'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',`
			`'only_matching': True,`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`}, {`
			`'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',`
			`'only_matching': True,`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`}]`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`_PARTNER_ID = '1926081'`
			`_UICONF_ID = '29375172'`

[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 03:41:24 +02:00			`mobj = self._match_valid_url(url)`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00
			`reference_id = mobj.group('reference_id')`
			`if reference_id:`
			`video_id = reference_id`
			`partner_id = self._PARTNER_ID`
			`ui_id = self._UICONF_ID`
			`else:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`video_id = '{}-{}'.format(mobj.group('course_id'), mobj.group('part'))`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00
			`webpage, urlh = self._download_webpage_handle(url, video_id)`

[compat, networking] Deprecate old functions (#2861) Authored by: coletdjnz, pukkandan 2023-07-09 09:53:02 +02:00			`mobj = re.match(self._VALID_URL, urlh.url)`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`reference_id = mobj.group('reference_id')`
			`if not reference_id:`
			`reference_id = self._search_regex(`
			`r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',`
			`webpage, 'kaltura reference id', group='id')`
			`partner_id = self._search_regex(`
			`r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',`
			`webpage, 'kaltura widget id', default=self._PARTNER_ID,`
			`group='id')`
			`ui_id = self._search_regex(`
			`r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',`
			`webpage, 'kaltura uiconf id', default=self._UICONF_ID,`
			`group='id')`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00
[safari] Respect kaltura session (Closes #7491) 2016-03-12 21:03:07 +01:00			`query = {`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'wid': f'_{partner_id}',`
[safari] extract free and preview videos(#7491) 2016-03-11 16:57:06 +01:00			`'uiconf_id': ui_id,`
			`'flashvars[referenceId]': reference_id,`
[safari] Respect kaltura session (Closes #7491) 2016-03-12 21:03:07 +01:00			`}`

			`if self.LOGGED_IN:`
			`kaltura_session = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'{self._API_BASE}/player/kaltura_session/?reference_id={reference_id}',`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`video_id, 'Downloading kaltura session JSON',`
[safari] Fix kaltura session extraction (closes #23679) (#23670) 2020-01-10 19:34:26 +01:00			`'Unable to download kaltura session JSON', fatal=False,`
			`headers={'Accept': 'application/json'})`
[safari] Respect kaltura session (Closes #7491) 2016-03-12 21:03:07 +01:00			`if kaltura_session:`
			`session = kaltura_session.get('session')`
			`if session:`
			`query['flashvars[ks]'] = session`

			`return self.url_result(update_url_query(`
			`'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),`
			`'Kaltura')`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00

[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`class SafariApiIE(SafariBaseIE):`
			`IE_NAME = 'safari:api'`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`_VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline\|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00
[safari] Relax url regexes (Closes #10202) 2016-08-01 16:48:48 +02:00			`_TESTS = [{`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',`
			`'only_matching': True,`
[safari] Relax url regexes (Closes #10202) 2016-08-01 16:48:48 +02:00			`}, {`
			`'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',`
			`'only_matching': True,`
			`}]`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00
			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 03:41:24 +02:00			`mobj = self._match_valid_url(url)`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`part = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`url, '{}/{}'.format(mobj.group('course_id'), mobj.group('part')),`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`'Downloading part JSON')`
[Oreilly] Handle new web url (#990) The change in URL is most likely a server side issue. But we can work around it by a simple substitution Authored by: MKSherbini 2021-09-18 13:33:06 +02:00			`web_url = part['web_url']`
			`if 'library/view' in web_url:`
			`web_url = web_url.replace('library/view', 'videos')`
			`natural_keys = part['natural_key']`
[Oreilly] Bugfix for 7738bd32722154a26f70006e0fe586f40d06e606 2021-09-19 10:46:11 +02:00			`web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}'`
[Oreilly] Handle new web url (#990) The change in URL is most likely a server side issue. But we can work around it by a simple substitution Authored by: MKSherbini 2021-09-18 13:33:06 +02:00			`return self.url_result(web_url, SafariIE.ie_key())`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00

[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`class SafariCourseIE(SafariBaseIE):`
			`IE_NAME = 'safari:course'`
			`IE_DESC = 'safaribooksonline.com online courses'`

[safari:course] Add support for techbus.safaribooksonline.com 2016-10-14 19:29:33 +02:00			`_VALID_URL = r'''(?x)`
			`https?://`
			`(?:`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`(?:www\.)?(?:safaribooksonline\|(?:learning\.)?oreilly)\.com/`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`(?:`
			`library/view/[^/]+\|`
			`api/v1/book\|`
			`videos/[^/]+`
			`)\|`
[safari:course] Add support for techbus.safaribooksonline.com 2016-10-14 19:29:33 +02:00			`techbus\.safaribooksonline\.com`
			`)`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`/(?P<id>[^/]+)`
[safari:course] Add support for techbus.safaribooksonline.com 2016-10-14 19:29:33 +02:00			`'''`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`_TESTS = [{`
			`'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',`
			`'info_dict': {`
			`'id': '9780133392838',`
			`'title': 'Hadoop Fundamentals LiveLessons',`
			`},`
			`'playlist_count': 22,`
			`'skip': 'Requires safaribooksonline account credentials',`
			`}, {`
			`'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',`
			`'only_matching': True,`
[safari:course] Add support for techbus.safaribooksonline.com 2016-10-14 19:29:33 +02:00			`}, {`
			`'url': 'http://techbus.safaribooksonline.com/9780134426365',`
			`'only_matching': True,`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`}, {`
			`'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',`
			`'only_matching': True,`
[safari] Add support for learning.oreilly.com (closes #18510) 2018-12-15 17:08:14 +01:00			`}, {`
			`'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',`
			`'only_matching': True,`
[safari] Fix authentication (closes #21090) 2019-05-17 22:23:40 +02:00			`}, {`
			`'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',`
			`'only_matching': True,`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`}]`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00			`@classmethod`
			`def suitable(cls, url):`
			`return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`else super().suitable(url))`
[safari] Add support for new URL schema (closes #16614) 2018-06-02 19:52:22 +02:00
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00			`def _real_extract(self, url):`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`course_id = self._match_id(url)`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`course_json = self._download_json(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'{self._API_BASE}/book/{course_id}/?override_format={self._API_FORMAT}',`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`course_id, 'Downloading course JSON')`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`if 'chapters' not in course_json:`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`raise ExtractorError(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`f'No chapters found for course {course_id}', expected=True)`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`entries = [`
[safari:api] Separate extractor (Closes #8871) 2016-03-19 17:30:48 +01:00			`self.url_result(chapter, SafariApiIE.ie_key())`
[safari] Improve and simplify 2015-03-26 18:57:46 +01:00			`for chapter in course_json['chapters']]`
[safari] Add safaribooksonline extractor 2015-03-22 19:03:40 +01:00
			`course_title = course_json['title']`

			`return self.playlist_result(entries, course_id, course_title)`