From b14b33a2e9c35d41a1fd16cf53afae612392bf44 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 31 Jan 2022 04:28:54 +0000
Subject: [PATCH 01/15] [YouTube] Bypass age-gating for certain restricted
 videos

* Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client

* Also add and fix tests

* Introduce and use new utility function `update_url()`
---
 youtube_dl/extractor/youtube.py | 202 +++++++++++++++++++++++++-------
 youtube_dl/utils.py             |  11 ++
 2 files changed, 168 insertions(+), 45 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 28fdb086a..65428528d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -42,6 +42,7 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
     unsmuggle_url,
+    update_url,
     update_url_query,
     url_or_none,
     urlencode_postdata,
@@ -286,15 +287,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 
-    def _call_api(self, ep, query, video_id, fatal=True):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
         data = self._DEFAULT_API_DATA.copy()
         data.update(query)
+        real_headers = {'content-type': 'application/json'}
+        if headers:
+            real_headers.update(headers)
 
         return self._download_json(
             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
             note='Downloading API JSON', errnote='Unable to download API page',
             data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers={'content-type': 'application/json'},
+            headers=real_headers,
             query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
 
     def _extract_yt_initial_data(self, video_id, webpage):
@@ -515,6 +519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_id': 'phihag',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'channel': 'Philipp Hagemeister',
                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                 'upload_date': '20121002',
@@ -524,10 +529,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
+                'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
                 'start_time': 1,
                 'end_time': 9,
-            }
+            },
         },
         {
             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@@ -562,7 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'duration': 10,
                 'view_count': int,
                 'like_count': int,
-                'dislike_count': int,
             },
             'params': {
                 'skip_download': True,
@@ -621,8 +625,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
             }
         },
-        # Normal age-gate video (No vevo, embed allowed), available via embed page
+        # Age-gated videos
         {
+            'note': 'Age-gated video (No vevo, embed allowed)',
             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
             'info_dict': {
                 'id': 'HtVdAasjOgU',
@@ -631,17 +636,97 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
-                'uploader_id': 'WitcherGame',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
+                'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
+                'categories': ['Gaming'],
+                'tags': 'count:17',
+                'channel': 'The Witcher',
+                'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
+                'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
+                'view_count': int,
+                'like_count': int,
             },
         },
         {
-            # Age-gated video only available with authentication (unavailable
-            # via embed page workaround)
+            'note': 'Age-gated video with embed allowed in public site',
+            'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
+            'info_dict': {
+                'id': 'HsUATh_Nc2U',
+                'ext': 'mp4',
+                'title': 'Godzilla 2 (Official Video)',
+                'description': 'md5:bf77e03fcae5529475e500129b05668a',
+                'duration': 177,
+                'uploader': 'FlyingKitty',
+                'upload_date': '20200408',
+                'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
+                'age_limit': 18,
+                'categories': ['Entertainment'],
+                'tags': ['Flyingkitty', 'godzilla 2'],
+                'channel': 'FlyingKitty',
+                'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
+                'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
+            'info_dict': {
+                'id': 'Tq92D6wQ1mg',
+                'ext': 'mp4',
+                'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+                'description': 'md5:17eccca93a786d51bc67646756894066',
+                'duration': 106,
+                'uploader': 'Projekt Melody',
+                'upload_date': '20191227',
+                'age_limit': 18,
+                'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+                'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+                'categories': ['Entertainment'],
+                'channel': 'Projekt Melody',
+                'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-Age-gated non-embeddable video',
+            'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
+            'info_dict': {
+                'id': 'MeJVWBSsPAY',
+                'ext': 'mp4',
+                'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
+                'description': 'Fan Video. Music & Lyrics by OOMPH!.',
+                'duration': 210,
+                'uploader': 'Herr Lurik',
+                'uploader_id': 'st3in234',
+                'upload_date': '20130730',
+                'uploader_url': 'http://www.youtube.com/user/st3in234',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
+                'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
+                'categories': ['Music'],
+                'channel': 'Herr Lurik',
+                'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
+                'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
+                'artist': 'OOMPH!',
+                'view_count': int,
+                'like_count': int,
+            },
+        },
+        {
+            'note': 'Non-bypassable age-gated video',
+            'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
+            'only_matching': True,
+        },
+        {
+            'note': 'Age-gated video only available with authentication (not via embed workaround)',
             'url': 'XgnwCQzjau8',
             'only_matching': True,
+            'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
         },
         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
         # YouTube Red ad is not captured for creator
@@ -670,17 +755,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'info_dict': {
                 'id': 'lqQg6PlCWgI',
                 'ext': 'mp4',
+                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                 'duration': 6085,
                 'upload_date': '20150827',
                 'uploader_id': 'olympic',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympic',
-                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'uploader': r're:Olympics?',
+                'age_limit': 0,
+                'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
+                'categories': ['Sports'],
+                'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
+                'channel': 'Olympics',
+                'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
+                'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
+                'view_count': int,
+                'like_count': int,
             },
-            'params': {
-                'skip_download': 'requires avconv',
-            }
         },
         # Non-square pixels
         {
@@ -1683,27 +1774,52 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             player_response = self._call_api(
                 'player', {'videoId': video_id}, video_id)
 
-        playability_status = player_response.get('playabilityStatus') or {}
-        if playability_status.get('reason') == 'Sign in to confirm your age':
-            video_info = self._download_webpage(
-                base_url + 'get_video_info', video_id,
-                'Refetching age-gated info webpage',
-                'unable to download video info webpage', query={
-                    'video_id': video_id,
-                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                    'html5': 1,
-                    # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
-                    'c': 'TVHTML5',
-                    'cver': '6.20180913',
-                }, fatal=False)
-            if video_info:
-                pr = self._parse_json(
-                    try_get(
-                        compat_parse_qs(video_info),
-                        lambda x: x['player_response'][0], compat_str) or '{}',
-                    video_id, fatal=False)
-                if pr and isinstance(pr, dict):
-                    player_response = pr
+        def is_agegated(playability):
+            if not isinstance(playability, dict):
+                return
+
+            if playability.get('desktopLegacyAgeGateReason'):
+                return True
+
+            reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
+            AGE_GATE_REASONS = (
+                'confirm your age', 'age-restricted', 'inappropriate',  # reason
+                'age_verification_required', 'age_check_required',  # status
+            )
+            return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
+
+        def get_playability_status(response):
+            return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
+
+        playability_status = get_playability_status(player_response)
+        if (is_agegated(playability_status)
+                and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
+
+            self.report_age_confirmation()
+
+            # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
+            pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+            query = {
+                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'contentCheckOk': True,
+                'racyCheckOk': True,
+                'context': {
+                    'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
+                    'thirdParty': {'embedUrl': 'https://google.com'},
+                },
+                'videoId': video_id,
+            }
+            headers = {
+                'X-YouTube-Client-Name': '85',
+                'X-YouTube-Client-Version': '2.0',
+                'Origin': 'https://www.youtube.com'
+            }
+
+            video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
+            age_gate_status = get_playability_status(video_info)
+            if age_gate_status.get('status') == 'OK':
+                player_response = video_info
+                playability_status = age_gate_status
 
         trailer_video_id = try_get(
             playability_status,
@@ -1932,12 +2048,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             for thumbnail in (try_get(
                     container,
                     lambda x: x['thumbnail']['thumbnails'], list) or []):
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
                 thumbnails.append({
                     'height': int_or_none(thumbnail.get('height')),
-                    'url': thumbnail_url,
+                    'url': update_url(thumbnail_url, query=None, fragment=None),
                     'width': int_or_none(thumbnail.get('width')),
                 })
             if thumbnails:
@@ -2142,6 +2258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     sbr_tooltip = try_get(
                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
                     if sbr_tooltip:
+                        # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
                         like_count, dislike_count = sbr_tooltip.split(' / ')
                         info.update({
                             'like_count': str_to_int(like_count),
@@ -2411,7 +2528,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'tags': list,
             'view_count': int,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2438,7 +2554,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -2458,7 +2573,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'categories': ['News & Politics'],
             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'skip_download': True,
@@ -3043,8 +3157,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
 
     def _real_extract(self, url):
         item_id = self._match_id(url)
-        url = compat_urlparse.urlunparse(
-            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+        url = update_url(url, netloc='www.youtube.com')
         # Handle both video/playlist URLs
         qs = parse_qs(url)
         video_id = qs.get('v', [None])[0]
@@ -3178,7 +3291,6 @@ class YoutubeYtBeIE(InfoExtractor):
             'categories': ['Nonprofits & Activism'],
             'tags': list,
             'like_count': int,
-            'dislike_count': int,
         },
         'params': {
             'noplaylist': True,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e3c3ccff9..d5cc6386d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4121,6 +4121,17 @@ def update_url_query(url, query):
         query=compat_urllib_parse_urlencode(qs, True)))
 
 
+def update_url(url, **kwargs):
+    """Replace URL components specified by kwargs
+       url: compat_str or parsed URL tuple
+       returns: compat_str"""
+    if not kwargs:
+        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+    if not isinstance(url, tuple):
+        url = compat_urlparse.urlparse(url)
+    return compat_urlparse.urlunparse(url._replace(**kwargs))
+
+
 def update_Request(req, url=None, data=None, headers={}, query={}):
     req_headers = req.headers.copy()
     req_headers.update(headers)

From 2be0cd261601e5be431723b95cf0d838621760a9 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 20 Jun 2022 23:15:20 +0100
Subject: [PATCH 02/15] [YouTube] Add `signatureTimestamp` for age-gate bypass

---
 youtube_dl/extractor/youtube.py | 34 +++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 65428528d..6c1cfe7f2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1642,6 +1642,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             fmt['url'] = compat_urlparse.urlunparse(
                 parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
+    # from yt-dlp, with tweaks
+    def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
+        """
+        Extract signatureTimestamp (sts)
+        Required to tell API what sig/player version is in use.
+        """
+        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
+        if not sts:
+            # Attempt to extract from player
+            if player_url is None:
+                error_msg = 'Cannot extract signature timestamp without player_url.'
+                if fatal:
+                    raise ExtractorError(error_msg)
+                self._downloader.report_warning(error_msg)
+                return
+            code = self._get_player_code(video_id, player_url)
+            sts = int_or_none(self._search_regex(
+                r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
+                'JS player signature timestamp', group='sts', fatal=fatal))
+        return sts
+
     def _mark_watched(self, video_id, player_response):
         playback_url = url_or_none(try_get(
             player_response,
@@ -1766,6 +1787,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
 
         player_response = None
+        player_url = None
         if webpage:
             player_response = self._extract_yt_initial_variable(
                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
@@ -1799,8 +1821,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
             pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
+
+            # Use signatureTimestamp if available
+            # Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
+            player_url = self._extract_player_url(webpage)
+            ytcfg = self._extract_ytcfg(video_id, webpage)
+            sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
+            if sts:
+                pb_context['signatureTimestamp'] = sts
+
             query = {
-                'playbackContext': {'contentPlaybackContext': {'html5Preference': 'HTML5_PREF_WANTS'}},
+                'playbackContext': {'contentPlaybackContext': pb_context},
                 'contentCheckOk': True,
                 'racyCheckOk': True,
                 'context': {
@@ -1901,7 +1932,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         formats = []
         itags = []
         itag_qualities = {}
-        player_url = None
         q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
         streaming_data = player_response.get('streamingData') or {}
         streaming_formats = streaming_data.get('formats') or []

From ce81ae38463f81eb3ee4dd6f68063146ec22b819 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sat, 4 Feb 2023 23:18:24 +0000
Subject: [PATCH 03/15] [test] Fix TestAgeRestriction

* age restriction may cause DownloadError
* update obsolete test URLs
[skip ci]
---
 test/test_age_restriction.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 6f5513faa..db98494ab 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -11,6 +11,7 @@ from test.helper import try_rm
 
 
 from youtube_dl import YoutubeDL
+from youtube_dl.utils import DownloadError
 
 
 def _download_restricted(url, filename, age):
@@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
     ydl.add_default_info_extractors()
     json_filename = os.path.splitext(filename)[0] + '.info.json'
     try_rm(json_filename)
-    ydl.download([url])
+    try:
+        ydl.download([url])
+    except DownloadError:
+        try_rm(json_filename)
     res = os.path.exists(json_filename)
     try_rm(json_filename)
     return res
@@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
         self.assertFalse(_download_restricted(url, filename, age))
 
     def test_youtube(self):
-        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+        self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
 
     def test_youporn(self):
         self._assert_restricted(
-            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
-            '505835.mp4', 2, old_age=25)
+            'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
+            '16715086.mp4', 2, old_age=25)
 
 
 if __name__ == '__main__':

From 9ca224b69708cc32010edea19b73892197e32a95 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 15:43:43 +0000
Subject: [PATCH 04/15] [compat] Systematise compat_ naming

[skip ci]
---
 test/test_compat.py  |   3 +-
 youtube_dl/compat.py | 221 +++++++++++++++++++++++++++----------------
 2 files changed, 139 insertions(+), 85 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 0986cff37..4dddd9a38 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -48,7 +48,8 @@ class TestCompat(unittest.TestCase):
 
     def test_all_present(self):
         import youtube_dl.compat
-        all_names = youtube_dl.compat.__all__
+        all_names = sorted(
+            youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
         present_names = set(filter(
             lambda c: '_' in c and not c.startswith('_'),
             dir(youtube_dl.compat))) - set(['unicode_literals'])
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 28942a8c1..39551f810 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -21,6 +21,10 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 
+# naming convention
+# 'compat_' + Python3_name.replace('.', '_')
+# other aliases exist for convenience and/or legacy
+
 # deal with critical unicode/str things first
 try:
     # Python 2
@@ -28,6 +32,7 @@ try:
         unicode, basestring, unichr
     )
     from .casefold import casefold as compat_casefold
+
 except NameError:
     compat_str, compat_basestring, compat_chr = (
         str, str, chr
@@ -53,16 +58,15 @@ try:
     import urllib.parse as compat_urllib_parse
 except ImportError:  # Python 2
     import urllib as compat_urllib_parse
+    import urlparse as _urlparse
+    for a in dir(_urlparse):
+        if not hasattr(compat_urllib_parse, a):
+            setattr(compat_urllib_parse, a, getattr(_urlparse, a))
+    del _urlparse
 
-try:
-    from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError:  # Python 2
-    from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
-    import urllib.parse as compat_urlparse
-except ImportError:  # Python 2
-    import urlparse as compat_urlparse
+# unfavoured aliases
+compat_urlparse = compat_urllib_parse
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
 
 try:
     import urllib.response as compat_urllib_response
@@ -73,6 +77,7 @@ try:
     import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
     import cookielib as compat_cookiejar
+compat_http_cookiejar = compat_cookiejar
 
 if sys.version_info[0] == 2:
     class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
@@ -84,11 +89,13 @@ if sys.version_info[0] == 2:
             compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
 else:
     compat_cookiejar_Cookie = compat_cookiejar.Cookie
+compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
 
 try:
     import http.cookies as compat_cookies
 except ImportError:  # Python 2
     import Cookie as compat_cookies
+compat_http_cookies = compat_cookies
 
 if sys.version_info[0] == 2:
     class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
@@ -98,6 +105,7 @@ if sys.version_info[0] == 2:
             return super(compat_cookies_SimpleCookie, self).load(rawdata)
 else:
     compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
 
 try:
     import html.entities as compat_html_entities
@@ -2351,16 +2359,19 @@ try:
     from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
     from urllib2 import HTTPError as compat_HTTPError
+compat_urllib_HTTPError = compat_HTTPError
 
 try:
     from urllib.request import urlretrieve as compat_urlretrieve
 except ImportError:  # Python 2
     from urllib import urlretrieve as compat_urlretrieve
+compat_urllib_request_urlretrieve = compat_urlretrieve
 
 try:
     from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
     from HTMLParser import HTMLParser as compat_HTMLParser
+compat_html_parser_HTMLParser = compat_HTMLParser
 
 try:  # Python 2
     from HTMLParser import HTMLParseError as compat_HTMLParseError
@@ -2374,6 +2385,7 @@ except ImportError:  # Python <3.4
         # and uniform cross-version exception handling
         class compat_HTMLParseError(Exception):
             pass
+compat_html_parser_HTMLParseError = compat_HTMLParseError
 
 try:
     from subprocess import DEVNULL
@@ -2390,6 +2402,8 @@ try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
                 else re.compile(r'([\x00-\x7f]+)'))
@@ -2456,9 +2470,6 @@ except ImportError:  # Python 2
         string = string.replace('+', ' ')
         return compat_urllib_parse_unquote(string, encoding, errors)
 
-try:
-    from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError:  # Python 2
     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
     # Possible solutions are to either port it from python 3 with all
     # the friends or manually ensure input query contains only byte strings.
@@ -2480,7 +2491,62 @@ except ImportError:  # Python 2
         def encode_list(l):
             return [encode_elem(e) for e in l]
 
-        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
+
+    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+    # Python 2's version is apparently totally broken
+    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                   encoding='utf-8', errors='replace'):
+        qs, _coerce_result = qs, compat_str
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+        r = []
+        for name_value in pairs:
+            if not name_value and not strict_parsing:
+                continue
+            nv = name_value.split('=', 1)
+            if len(nv) != 2:
+                if strict_parsing:
+                    raise ValueError('bad query field: %r' % (name_value,))
+                # Handle case of a control-name with no equal sign
+                if keep_blank_values:
+                    nv.append('')
+                else:
+                    continue
+            if len(nv[1]) or keep_blank_values:
+                name = nv[0].replace('+', ' ')
+                name = compat_urllib_parse_unquote(
+                    name, encoding=encoding, errors=errors)
+                name = _coerce_result(name)
+                value = nv[1].replace('+', ' ')
+                value = compat_urllib_parse_unquote(
+                    value, encoding=encoding, errors=errors)
+                value = _coerce_result(value)
+                r.append((name, value))
+        return r
+
+    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                        encoding='utf-8', errors='replace'):
+        parsed_result = {}
+        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                           encoding=encoding, errors=errors)
+        for name, value in pairs:
+            if name in parsed_result:
+                parsed_result[name].append(value)
+            else:
+                parsed_result[name] = [value]
+        return parsed_result
+
+    setattr(compat_urllib_parse, '_urlencode',
+            getattr(compat_urllib_parse, 'urlencode'))
+    for name, fix in (
+            ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
+            ('parse_unquote', compat_urllib_parse_unquote),
+            ('unquote_plus', compat_urllib_parse_unquote_plus),
+            ('urlencode', compat_urllib_parse_urlencode),
+            ('parse_qs', compat_parse_qs)):
+        setattr(compat_urllib_parse, name, fix)
+
+compat_urllib_parse_parse_qs = compat_parse_qs
 
 try:
     from urllib.request import DataHandler as compat_urllib_request_DataHandler
@@ -2520,6 +2586,7 @@ try:
     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
+compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
 
 etree = xml.etree.ElementTree
 
@@ -2533,10 +2600,11 @@ try:
     # xml.etree.ElementTree.Element is a method in Python <=2.6 and
     # the following will crash with:
     #  TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
-    isinstance(None, xml.etree.ElementTree.Element)
+    isinstance(None, etree.Element)
     from xml.etree.ElementTree import Element as compat_etree_Element
 except TypeError:  # Python <=2.6
     from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+compat_xml_etree_ElementTree_Element = compat_etree_Element
 
 if sys.version_info[0] >= 3:
     def compat_etree_fromstring(text):
@@ -2592,6 +2660,7 @@ else:
             if k == uri or v == prefix:
                 del etree._namespace_map[k]
         etree._namespace_map[uri] = prefix
+compat_xml_etree_register_namespace = compat_etree_register_namespace
 
 if sys.version_info < (2, 7):
     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
@@ -2603,53 +2672,6 @@ if sys.version_info < (2, 7):
 else:
     compat_xpath = lambda xpath: xpath
 
-try:
-    from urllib.parse import parse_qs as compat_parse_qs
-except ImportError:  # Python 2
-    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-    # Python 2's version is apparently totally broken
-
-    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-                   encoding='utf-8', errors='replace'):
-        qs, _coerce_result = qs, compat_str
-        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-        r = []
-        for name_value in pairs:
-            if not name_value and not strict_parsing:
-                continue
-            nv = name_value.split('=', 1)
-            if len(nv) != 2:
-                if strict_parsing:
-                    raise ValueError('bad query field: %r' % (name_value,))
-                # Handle case of a control-name with no equal sign
-                if keep_blank_values:
-                    nv.append('')
-                else:
-                    continue
-            if len(nv[1]) or keep_blank_values:
-                name = nv[0].replace('+', ' ')
-                name = compat_urllib_parse_unquote(
-                    name, encoding=encoding, errors=errors)
-                name = _coerce_result(name)
-                value = nv[1].replace('+', ' ')
-                value = compat_urllib_parse_unquote(
-                    value, encoding=encoding, errors=errors)
-                value = _coerce_result(value)
-                r.append((name, value))
-        return r
-
-    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-                        encoding='utf-8', errors='replace'):
-        parsed_result = {}
-        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-                           encoding=encoding, errors=errors)
-        for name, value in pairs:
-            if name in parsed_result:
-                parsed_result[name].append(value)
-            else:
-                parsed_result[name] = [value]
-        return parsed_result
-
 
 compat_os_name = os._name if os.name == 'java' else os.name
 
@@ -2774,6 +2796,8 @@ else:
     else:
         compat_expanduser = os.path.expanduser
 
+compat_os_path_expanduser = compat_expanduser
+
 
 if compat_os_name == 'nt' and sys.version_info < (3, 8):
     # os.path.realpath on Windows does not follow symbolic links
@@ -2785,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
 else:
     compat_realpath = os.path.realpath
 
+compat_os_path_realpath = compat_realpath
+
 
 if sys.version_info < (3, 0):
     def compat_print(s):
@@ -2805,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
     compat_getpass = getpass.getpass
 
+compat_getpass_getpass = compat_getpass
+
+
 try:
     compat_input = raw_input
 except NameError:  # Python 3
     compat_input = input
 
+
 # Python < 2.6.5 require kwargs to be bytes
 try:
     def _testfunc(x):
@@ -2915,15 +2945,16 @@ else:
                 lines = _lines
         return _terminal_size(columns, lines)
 
+
 try:
     itertools.count(start=0, step=1)
     compat_itertools_count = itertools.count
 except TypeError:  # Python 2.6
     def compat_itertools_count(start=0, step=1):
-        n = start
         while True:
-            yield n
-            n += step
+            yield start
+            start += step
+
 
 if sys.version_info >= (3, 0):
     from tokenize import tokenize as compat_tokenize_tokenize
@@ -3075,6 +3106,8 @@ if sys.version_info < (3, 3):
 else:
     compat_b64decode = base64.b64decode
 
+compat_base64_b64decode = compat_b64decode
+
 
 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
     # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
@@ -3094,30 +3127,53 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
-__all__ = [
+legacy = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
-    'compat_Struct',
     'compat_b64decode',
+    'compat_cookiejar',
+    'compat_cookiejar_Cookie',
+    'compat_cookies',
+    'compat_cookies_SimpleCookie',
+    'compat_etree_Element',
+    'compat_etree_register_namespace',
+    'compat_expanduser',
+    'compat_getpass',
+    'compat_parse_qs',
+    'compat_realpath',
+    'compat_urllib_parse_parse_qs',
+    'compat_urllib_parse_unquote',
+    'compat_urllib_parse_unquote_plus',
+    'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
+    'compat_urllib_parse_urlparse',
+    'compat_urlparse',
+    'compat_urlretrieve',
+    'compat_xml_parse_error',
+]
+
+
+__all__ = [
+    'compat_html_parser_HTMLParseError',
+    'compat_html_parser_HTMLParser',
+    'compat_Struct',
+    'compat_base64_b64decode',
     'compat_basestring',
     'compat_casefold',
     'compat_chr',
     'compat_collections_abc',
     'compat_collections_chain_map',
-    'compat_cookiejar',
-    'compat_cookiejar_Cookie',
-    'compat_cookies',
-    'compat_cookies_SimpleCookie',
+    'compat_http_cookiejar',
+    'compat_http_cookiejar_Cookie',
+    'compat_http_cookies',
+    'compat_http_cookies_SimpleCookie',
     'compat_ctypes_WINFUNCTYPE',
-    'compat_etree_Element',
     'compat_etree_fromstring',
-    'compat_etree_register_namespace',
-    'compat_expanduser',
     'compat_filter',
     'compat_get_terminal_size',
     'compat_getenv',
-    'compat_getpass',
+    'compat_getpass_getpass',
     'compat_html_entities',
     'compat_html_entities_html5',
     'compat_http_client',
@@ -3131,11 +3187,11 @@ __all__ = [
     'compat_numeric_types',
     'compat_ord',
     'compat_os_name',
-    'compat_parse_qs',
+    'compat_os_path_expanduser',
+    'compat_os_path_realpath',
     'compat_print',
     'compat_re_Match',
     'compat_re_Pattern',
-    'compat_realpath',
     'compat_setenv',
     'compat_shlex_quote',
     'compat_shlex_split',
@@ -3147,17 +3203,14 @@ __all__ = [
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
-    'compat_urllib_parse_unquote',
-    'compat_urllib_parse_unquote_plus',
-    'compat_urllib_parse_unquote_to_bytes',
-    'compat_urllib_parse_urlencode',
-    'compat_urllib_parse_urlparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
     'compat_urllib_response',
-    'compat_urlparse',
-    'compat_urlretrieve',
-    'compat_xml_parse_error',
+    'compat_urllib_request_urlretrieve',
+    'compat_urllib_HTTPError',
+    'compat_xml_etree_ElementTree_Element',
+    'compat_xml_etree_ElementTree_ParseError',
+    'compat_xml_etree_register_namespace',
     'compat_xpath',
     'compat_zip',
     'workaround_optparse_bug9161',

From a6f7d10d441d25bcfeba9e63244dbef7d52d3163 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Sun, 5 Feb 2023 13:46:43 +0000
Subject: [PATCH 05/15] [utils] Add parse_qs, update_url

[skip ci]
---
 youtube_dl/utils.py | 64 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d5cc6386d..4edbfa27b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,6 +42,7 @@ from .compat import (
     compat_HTMLParser,
     compat_HTTPError,
     compat_basestring,
+    compat_casefold,
     compat_chr,
     compat_collections_abc,
     compat_cookiejar,
@@ -54,18 +55,18 @@ from .compat import (
     compat_integer_types,
     compat_kwargs,
     compat_os_name,
-    compat_parse_qs,
+    compat_re_Match,
     compat_shlex_quote,
     compat_str,
     compat_struct_pack,
     compat_struct_unpack,
     compat_urllib_error,
     compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
     compat_urllib_parse_unquote_plus,
     compat_urllib_request,
-    compat_urlparse,
     compat_xpath,
 )
 
@@ -80,12 +81,12 @@ def register_socks_protocols():
     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
-        if scheme not in compat_urlparse.uses_netloc:
-            compat_urlparse.uses_netloc.append(scheme)
+        if scheme not in compat_urllib_parse.uses_netloc:
+            compat_urllib_parse.uses_netloc.append(scheme)
 
 
-# This is not clearly defined otherwise
-compiled_regex_type = type(re.compile(''))
+# Unfavoured alias
+compiled_regex_type = compat_re_Match
 
 
 def random_user_agent():
@@ -2725,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
     assert issubclass(base_class, (
         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 
-    url_components = compat_urlparse.urlparse(socks_proxy)
+    url_components = compat_urllib_parse.urlparse(socks_proxy)
     if url_components.scheme.lower() == 'socks5':
         socks_type = ProxyType.SOCKS5
     elif url_components.scheme.lower() in ('socks', 'socks4'):
@@ -3673,7 +3674,7 @@ def remove_quotes(s):
 
 
 def url_basename(url):
-    path = compat_urlparse.urlparse(url).path
+    path = compat_urllib_parse.urlparse(url).path
     return path.strip('/').split('/')[-1]
 
 
@@ -3693,7 +3694,7 @@ def urljoin(base, path):
     if not isinstance(base, compat_str) or not re.match(
             r'^(?:https?:)?//', base):
         return None
-    return compat_urlparse.urljoin(base, path)
+    return compat_urllib_parse.urljoin(base, path)
 
 
 class HEADRequest(compat_urllib_request.Request):
@@ -4091,6 +4092,10 @@ def escape_url(url):
     ).geturl()
 
 
+def parse_qs(url):
+    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
+
+
 def read_batch_urls(batch_fd):
     def fixup(url):
         if not isinstance(url, compat_str):
@@ -4111,25 +4116,28 @@ def urlencode_postdata(*args, **kargs):
     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 
 
-def update_url_query(url, query):
-    if not query:
-        return url
-    parsed_url = compat_urlparse.urlparse(url)
-    qs = compat_parse_qs(parsed_url.query)
-    qs.update(query)
-    return compat_urlparse.urlunparse(parsed_url._replace(
-        query=compat_urllib_parse_urlencode(qs, True)))
-
-
 def update_url(url, **kwargs):
     """Replace URL components specified by kwargs
        url: compat_str or parsed URL tuple
-       returns: compat_str"""
+       if query_update is in kwargs, update query with
+       its value instead of replacing (overrides any `query`)
+       returns: compat_str
+    """
     if not kwargs:
-        return compat_urlparse.urlunparse(url) if isinstance(url, tuple) else url
+        return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
     if not isinstance(url, tuple):
-        url = compat_urlparse.urlparse(url)
-    return compat_urlparse.urlunparse(url._replace(**kwargs))
+        url = compat_urllib_parse.urlparse(url)
+    query = kwargs.pop('query_update', None)
+    if query:
+        qs = compat_parse_qs(url.query)
+        qs.update(query)
+        kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
+        kwargs = compat_kwargs(kwargs)
+    return compat_urllib_parse.urlunparse(url._replace(**kwargs))
+
+
+def update_url_query(url, query):
+    return update_url(url, query_update=query)
 
 
 def update_Request(req, url=None, data=None, headers={}, query={}):
@@ -5597,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 
         if proxy == '__noproxy__':
             return None  # No Proxy
-        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
             req.add_header('Ytdl-socks-proxy', proxy)
             # youtube-dl's http/https handlers do wrapping the socket with socks
             return None
@@ -6035,14 +6043,6 @@ def traverse_obj(obj, *paths, **kwargs):
     str = compat_str
 
     is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
-    # stand-in until compat_re_Match is added
-    compat_re_Match = type(re.match('a', 'a'))
-    # stand-in until casefold.py is added
-    try:
-        ''.casefold()
-        compat_casefold = lambda s: s.casefold()
-    except AttributeError:
-        compat_casefold = lambda s: s.lower()
     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 
     if isinstance(expected_type, type):

From b337af9c62fe17cb186788d550873726d4c7386f Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 15:50:28 +0000
Subject: [PATCH 06/15] [compat] Update test_compat

[skip ci]
---
 test/test_compat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 4dddd9a38..e233b1ae1 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -50,9 +50,9 @@ class TestCompat(unittest.TestCase):
         import youtube_dl.compat
         all_names = sorted(
             youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
-        present_names = set(filter(
+        present_names = set(map(compat_str, filter(
             lambda c: '_' in c and not c.startswith('_'),
-            dir(youtube_dl.compat))) - set(['unicode_literals'])
+            dir(youtube_dl.compat)))) - set(['unicode_literals'])
         self.assertEqual(all_names, sorted(present_names))
 
     def test_compat_urllib_parse_unquote(self):

From f640916de124206f9a8a397d44455ab917728f4a Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Mon, 6 Feb 2023 16:19:21 +0000
Subject: [PATCH 07/15] [YouTube] Refresh compat/utils usage * import
 parse_qs() * import parse_qs in lazy_extractors (clears old TODO) * clean up
 old compiled lazy_extractors for Py2 * use update_url()

---
 devscripts/make_lazy_extractors.py | 10 ++++-
 test/test_execution.py             | 12 +++---
 youtube_dl/extractor/youtube.py    | 61 +++++++++++-------------------
 3 files changed, 39 insertions(+), 44 deletions(-)

diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 878ae72b1..edc19183d 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
     os.remove(lazy_extractors_filename)
+# Py2: may be confused by leftover lazy_extractors.pyc
+try:
+    os.remove(lazy_extractors_filename + 'c')
+except OSError:
+    pass
 
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
 
 module_contents = [
     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
-    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
+    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
+    # needed for suitable() methods of Youtube extractor (see #28780)
+    'from youtube_dl.utils import parse_qs\n',
+]
 
 ie_template = '''
 class {name}({bases}):
diff --git a/test/test_execution.py b/test/test_execution.py
index 32948d93e..704e14612 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -40,14 +40,16 @@ class TestExecution(unittest.TestCase):
         self.assertFalse(stderr)
 
     def test_lazy_extractors(self):
+        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
             subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
         finally:
-            try:
-                os.remove('youtube_dl/extractor/lazy_extractors.py')
-            except (IOError, OSError):
-                pass
+            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
+                try:
+                    os.remove(lazy_extractors + x)
+                except (IOError, OSError):
+                    pass
 
 
 if __name__ == '__main__':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c1cfe7f2..6c70a98d1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -14,12 +14,11 @@ from ..compat import (
     compat_chr,
     compat_HTTPError,
     compat_map as map,
-    compat_parse_qs,
     compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_parse_qs as compat_parse_qs,
     compat_urllib_parse_unquote_plus,
-    compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
-    compat_urlparse,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@@ -33,6 +32,7 @@ from ..utils import (
     mimetype2ext,
     parse_codecs,
     parse_duration,
+    parse_qs,
     qualities,
     remove_start,
     smuggle_url,
@@ -50,10 +50,6 @@ from ..utils import (
 )
 
 
-def parse_qs(url):
-    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-
-
 class YoutubeBaseInfoExtractor(InfoExtractor):
     """Provide base functions for Youtube extractors"""
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@@ -636,6 +632,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
                 'duration': 142,
                 'uploader': 'The Witcher',
+                'uploader_id': 'WitcherGame',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                 'upload_date': '20140605',
                 'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                 'age_limit': 18,
@@ -671,7 +669,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             },
         },
         {
-            'note': 'Age-gated video embedable only with clientScreen=EMBED',
+            'note': 'Age-gated video embeddable only with clientScreen=EMBED',
             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
             'info_dict': {
                 'id': 'Tq92D6wQ1mg',
@@ -1392,11 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     @classmethod
     def suitable(cls, url):
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('list', [None])[0]:
+        if parse_qs(url).get('list', [None])[0]:
             return False
         return super(YoutubeIE, cls).suitable(url)
 
@@ -1546,7 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         if player_url.startswith('//'):
             player_url = 'https:' + player_url
         elif not re.match(r'https?://', player_url):
-            player_url = compat_urlparse.urljoin(
+            player_url = compat_urllib_parse.urljoin(
                 'https://www.youtube.com', player_url)
         return player_url
 
@@ -1628,9 +1622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _unthrottle_format_urls(self, video_id, player_url, formats):
         for fmt in formats:
-            parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
-            qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
-            n_param = qs.get('n')
+            parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
+            n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
             if not n_param:
                 continue
             n_param = n_param[-1]
@@ -1638,9 +1631,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if n_response is None:
                 # give up if descrambling failed
                 break
-            qs['n'] = [n_response]
-            fmt['url'] = compat_urlparse.urlunparse(
-                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+            fmt['url'] = update_url(
+                parsed_fmt_url, query_update={'n': [n_response]})
 
     # from yt-dlp, with tweaks
     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@@ -1669,20 +1661,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
         if not playback_url:
             return
-        parsed_playback_url = compat_urlparse.urlparse(playback_url)
-        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
 
         # cpn generation algorithm is reverse engineered from base.js.
         # In fact it works even with dummy cpn.
         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
 
-        qs.update({
-            'ver': ['2'],
-            'cpn': [cpn],
-        })
-        playback_url = compat_urlparse.urlunparse(
-            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+        playback_url = update_url(
+            playback_url, query_update={
+                'ver': ['2'],
+                'cpn': [cpn],
+            })
 
         self._download_webpage(
             playback_url, video_id, 'Marking watched',
@@ -2075,9 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         thumbnails = []
         for container in (video_details, microformat):
-            for thumbnail in (try_get(
+            for thumbnail in try_get(
                     container,
-                    lambda x: x['thumbnail']['thumbnails'], list) or []):
+                    lambda x: x['thumbnail']['thumbnails'], list) or []:
                 thumbnail_url = url_or_none(thumbnail.get('url'))
                 if not thumbnail_url:
                     continue
@@ -3287,11 +3276,7 @@ class YoutubePlaylistIE(InfoExtractor):
     def suitable(cls, url):
         if YoutubeTabIE.suitable(url):
             return False
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
-        qs = parse_qs(url)
-        if qs.get('v', [None])[0]:
+        if parse_qs(url).get('v', [None])[0]:
             return False
         return super(YoutubePlaylistIE, cls).suitable(url)
 
@@ -3430,9 +3415,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
     }]
 
     def _real_extract(self, url):
-        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-        query = (qs.get('search_query') or qs.get('q'))[0]
-        params = qs.get('sp', ('',))[0]
+        qs = parse_qs(url)
+        query = (qs.get('search_query') or qs.get('q'))[-1]
+        params = qs.get('sp', ('',))[-1]
         return self.playlist_result(self._search_results(query, params), query, query)
 
 

From bcf597ea1736395bbb893af16a012f10e7a2b544 Mon Sep 17 00:00:00 2001
From: dirkf <fieldhouse@gmx.net>
Date: Wed, 8 Feb 2023 18:16:51 +0000
Subject: [PATCH 08/15] [YouTube] Fix tests

---
 youtube_dl/extractor/youtube.py | 55 ++++++++++++++++++++++++++++-----
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6c70a98d1..ba0f5c8b6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -27,6 +27,8 @@ from ..utils import (
     dict_get,
     error_to_compat_str,
     float_or_none,
+    extract_attributes,
+    get_element_by_attribute,
     int_or_none,
     js_to_json,
     mimetype2ext,
@@ -38,6 +40,7 @@ from ..utils import (
     smuggle_url,
     str_or_none,
     str_to_int,
+    traverse_obj,
     try_get,
     unescapeHTML,
     unified_strdate,
@@ -656,6 +659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
                 'duration': 177,
                 'uploader': 'FlyingKitty',
+                'uploader_id': 'FlyingKitty900',
                 'upload_date': '20200408',
                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                 'age_limit': 18,
@@ -678,6 +682,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'description': 'md5:17eccca93a786d51bc67646756894066',
                 'duration': 106,
                 'uploader': 'Projekt Melody',
+                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                 'upload_date': '20191227',
                 'age_limit': 18,
                 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
@@ -929,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'id': 'lsguqyKfVQg',
                 'ext': 'mp4',
                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
-                'alt_title': 'Dark Walk - Position Music',
+                'alt_title': 'Dark Walk',
                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                 'duration': 133,
                 'upload_date': '20151119',
                 'uploader_id': 'IronSoulElf',
                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                 'uploader': 'IronSoulElf',
-                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
-                'track': 'Dark Walk - Position Music',
-                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
+                'track': 'Dark Walk',
+                'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
             },
             'params': {
@@ -2091,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             or microformat.get('lengthSeconds')) \
             or parse_duration(search_meta('duration'))
         is_live = video_details.get('isLive')
-        owner_profile_url = microformat.get('ownerProfileUrl')
+
+        def gen_owner_profile_url():
+            yield microformat.get('ownerProfileUrl')
+            yield extract_attributes(self._search_regex(
+                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
+                get_element_by_attribute('itemprop', 'author', webpage),
+                'owner_profile_url', default='')).get('href')
+
+        owner_profile_url = next(
+            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
+            None)
 
         if not player_url:
             player_url = self._extract_player_url(webpage)
@@ -2176,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         info[d_k] = parse_duration(query[k][0])
 
         if video_description:
+            # Youtube Music Auto-generated description
             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
             if mobj:
                 release_year = mobj.group('release_year')
@@ -2250,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
                             info['location'] = stl
                         else:
-                            mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+                            # •? doesn't match, but [•]? does; \xa0 = non-breaking space
+                            mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
                             if mobj:
                                 info.update({
                                     'series': mobj.group(1),
@@ -2261,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             vpir,
                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
                             list) or []):
-                        tbr = tlb.get('toggleButtonRenderer') or {}
+                        tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
                         for getter, regex in [(
                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
@@ -2315,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             elif mrr_title == 'Song':
                                 info['track'] = mrr_contents_text
 
+            # this is not extraction but spelunking!
+            carousel_lockups = traverse_obj(
+                initial_data,
+                ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
+                 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
+                 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
+                expected_type=dict) or []
+            # try to reproduce logic from metadataRowContainerRenderer above (if it still is)
+            fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
+            # multiple_songs ?
+            if len(carousel_lockups) > 1:
+                fields = fields[-1:]
+            for info_row in traverse_obj(
+                    carousel_lockups,
+                    (0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
+                    expected_type=dict):
+                row_title = traverse_obj(info_row, ('title', 'simpleText'))
+                row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
+                if not row_text:
+                    continue
+                for name, field in fields:
+                    if name == row_title and not info.get(field):
+                        info[field] = row_text
+
         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
             v = info.get(s_k)
             if v:

From 80c02287773ab2c136e0e05d27917d0aa5f6d402 Mon Sep 17 00:00:00 2001
From: Valentin Metz <31850924+Valentin-Metz@users.noreply.github.com>
Date: Thu, 9 Feb 2023 12:25:28 +0100
Subject: [PATCH 09/15] [rbgtum] Add new extractor (#31305)

* [rbgtum] Add new extractor

* Small update, force CI

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  4 ++
 youtube_dl/extractor/rbgtum.py     | 97 ++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 youtube_dl/extractor/rbgtum.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 96b27b179..dfaef0cc3 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1010,6 +1010,10 @@ from .raywenderlich import (
     RayWenderlichIE,
     RayWenderlichCourseIE,
 )
+from .rbgtum import (
+    RbgTumIE,
+    RbgTumCourseIE,
+)
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import (
diff --git a/youtube_dl/extractor/rbgtum.py b/youtube_dl/extractor/rbgtum.py
new file mode 100644
index 000000000..da48ebbc4
--- /dev/null
+++ b/youtube_dl/extractor/rbgtum.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RbgTumIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
+    _TESTS = [{
+        # Combined view
+        'url': 'https://live.rbg.tum.de/w/cpp/22128',
+        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
+        'info_dict': {
+            'id': 'cpp/22128',
+            'ext': 'mp4',
+            'title': 'Lecture: October 18. 2022',
+            'series': 'Concepts of C++ programming (IN2377)',
+        }
+    }, {
+        # Presentation only
+        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
+        'md5': '36c584272179f3e56b0db5d880639cba',
+        'info_dict': {
+            'id': 'I2DL/12349/PRES',
+            'ext': 'mp4',
+            'title': 'Lecture 3: Introduction to Neural Networks',
+            'series': 'Introduction to Deep Learning (IN2346)',
+        }
+    }, {
+        # Camera only
+        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
+        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
+        'info_dict': {
+            'id': 'fvv-info/16130/CAM',
+            'ext': 'mp4',
+            'title': 'Fachschaftsvollversammlung',
+            'series': 'Fachschaftsvollversammlung Informatik',
+        }
+    }, ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
+        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+        lecture_series_title = self._html_search_regex(
+            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
+
+        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': lecture_title,
+            'series': lecture_series_title,
+            'formats': formats,
+        }
+
+
+class RbgTumCourseIE(InfoExtractor):
+    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
+    _TESTS = [{
+        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
+        'info_dict': {
+            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
+            'id': '2022/S/fpv',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 13,
+    }, {
+        'url': 'https://live.rbg.tum.de/course/2022/W/set',
+        'info_dict': {
+            'title': 'SET FSMPIC',
+            'id': '2022/W/set',
+        },
+        'params': {
+            'noplaylist': False,
+        },
+        'playlist_count': 6,
+    }, ]
+
+    def _real_extract(self, url):
+        course_id = self._match_id(url)
+        webpage = self._download_webpage(url, course_id)
+
+        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
+
+        lecture_urls = []
+        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
+            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
+
+        return self.playlist_result(lecture_urls, course_id, lecture_series_title)

From 9256b026d05b51b53d1cfb9b0bf560ef76a6dfb7 Mon Sep 17 00:00:00 2001
From: teddy171 <teddy171@qq.com>
Date: Fri, 10 Feb 2023 04:19:27 +0800
Subject: [PATCH 10/15] [feat]: Add support to external downloader aria2p
 (#31500)

* feat: add class Aria2pFD

* feat: create call_downloader function

* feat: a colorful download interface to aria2pFD

* feat: change value name

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Typo in suggestion

* fix: remove unused value

* fix: add not function to return value(0 is normal); add total_seconds to download.eta(timedelta object); add waiting status when hook progress

* fix: remove unuse method ..utils.format_bytes

* fix: be up to flake8

* fix: be up to flake8

* Apply suggestions from code review

* [feat] test external downloader aria2p

* [feat] test external downloader aria2p

* [fix] test_external_downloader.py

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Apply suggestions from code review

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update test/test_external_downloader.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* Update youtube_dl/downloader/external.py

Co-authored-by: dirkf <fieldhouse@gmx.net>

* refactoring code and fix bugs

* Apply suggestions from code review

* Rename test_external_downloader.py to test_downloader_external.py

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 test/helper.py                    |  11 +++
 test/test_downloader_external.py  | 115 ++++++++++++++++++++++++++++++
 test/test_downloader_http.py      |  17 ++---
 test/test_http.py                 |  16 ++---
 youtube_dl/downloader/external.py |  58 +++++++++++++++
 5 files changed, 193 insertions(+), 24 deletions(-)
 create mode 100644 test/test_downloader_external.py

diff --git a/test/helper.py b/test/helper.py
index c6a2f0667..883b2e877 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -89,6 +89,17 @@ class FakeYDL(YoutubeDL):
         self.report_warning = types.MethodType(report_warning, self)
 
 
+class FakeLogger(object):
+    def debug(self, msg):
+        pass
+
+    def warning(self, msg):
+        pass
+
+    def error(self, msg):
+        pass
+
+
 def gettestcases(include_onlymatching=False):
     for ie in youtube_dl.extractor.gen_extractors():
         for tc in ie.get_testcases(include_onlymatching):
diff --git a/test/test_downloader_external.py b/test/test_downloader_external.py
new file mode 100644
index 000000000..c0239502b
--- /dev/null
+++ b/test/test_downloader_external.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding: utf-8
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import re
+import sys
+import subprocess
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
+from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_http_server
+from youtube_dl.utils import encodeFilename
+from youtube_dl.downloader.external import Aria2pFD
+import threading
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+TEST_SIZE = 10 * 1024
+
+
+class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+    def log_message(self, format, *args):
+        pass
+
+    def send_content_range(self, total=None):
+        range_header = self.headers.get('Range')
+        start = end = None
+        if range_header:
+            mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
+            if mobj:
+                start, end = (int(mobj.group(i)) for i in (1, 2))
+        valid_range = start is not None and end is not None
+        if valid_range:
+            content_range = 'bytes %d-%d' % (start, end)
+            if total:
+                content_range += '/%d' % total
+            self.send_header('Content-Range', content_range)
+        return (end - start + 1) if valid_range else total
+
+    def serve(self, range=True, content_length=True):
+        self.send_response(200)
+        self.send_header('Content-Type', 'video/mp4')
+        size = TEST_SIZE
+        if range:
+            size = self.send_content_range(TEST_SIZE)
+        if content_length:
+            self.send_header('Content-Length', size)
+        self.end_headers()
+        self.wfile.write(b'#' * size)
+
+    def do_GET(self):
+        if self.path == '/regular':
+            self.serve()
+        elif self.path == '/no-content-length':
+            self.serve(content_length=False)
+        elif self.path == '/no-range':
+            self.serve(range=False)
+        elif self.path == '/no-range-no-content-length':
+            self.serve(range=False, content_length=False)
+        else:
+            assert False, 'unrecognised server path'
+
+
+@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
+class TestAria2pFD(unittest.TestCase):
+    def setUp(self):
+        self.httpd = compat_http_server.HTTPServer(
+            ('127.0.0.1', 0), HTTPTestRequestHandler)
+        self.port = http_server_port(self.httpd)
+        self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+        self.server_thread.daemon = True
+        self.server_thread.start()
+
+    def download(self, params, ep):
+        with subprocess.Popen(
+            ['aria2c', '--enable-rpc'],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
+        ) as process:
+            if not process.poll():
+                filename = 'testfile.mp4'
+                params['logger'] = FakeLogger()
+                params['outtmpl'] = filename
+                ydl = YoutubeDL(params)
+                try_rm(encodeFilename(filename))
+                self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
+                self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
+                try_rm(encodeFilename(filename))
+            process.kill()
+
+    def download_all(self, params):
+        for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
+            self.download(params, ep)
+
+    def test_regular(self):
+        self.download_all({'external_downloader': 'aria2p'})
+
+    def test_chunked(self):
+        self.download_all({
+            'external_downloader': 'aria2p',
+            'http_chunk_size': 1000,
+        })
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 750472281..4e6d7a2a0 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,7 +9,11 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port, try_rm
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+    try_rm,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server
 from youtube_dl.downloader.http import HttpFD
@@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHttpFD(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/test/test_http.py b/test/test_http.py
index 3ee0a5dda..487a9bc77 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,7 +8,10 @@ import sys
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from test.helper import http_server_port
+from test.helper import (
+    FakeLogger,
+    http_server_port,
+)
 from youtube_dl import YoutubeDL
 from youtube_dl.compat import compat_http_server, compat_urllib_request
 import ssl
@@ -52,17 +55,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
             assert False
 
 
-class FakeLogger(object):
-    def debug(self, msg):
-        pass
-
-    def warning(self, msg):
-        pass
-
-    def error(self, msg):
-        pass
-
-
 class TestHTTP(unittest.TestCase):
     def setUp(self):
         self.httpd = compat_http_server.HTTPServer(
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index a06ab2e50..bffcd10b6 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -200,6 +200,64 @@ class Aria2cFD(ExternalFD):
         return cmd
 
 
+class Aria2pFD(ExternalFD):
+    ''' Aria2pFD class
+    This class support to use aria2p as downloader.
+    (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
+    through JSON-RPC.)
+    It can help you to get download progress more easily.
+    To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
+    Then run aria2c in the background and enable with the --enable-rpc option.
+    '''
+    try:
+        import aria2p
+        __avail = True
+    except ImportError:
+        __avail = False
+
+    @classmethod
+    def available(cls):
+        return cls.__avail
+
+    def _call_downloader(self, tmpfilename, info_dict):
+        aria2 = self.aria2p.API(
+            self.aria2p.Client(
+                host='http://localhost',
+                port=6800,
+                secret=''
+            )
+        )
+
+        options = {
+            'min-split-size': '1M',
+            'max-connection-per-server': 4,
+            'auto-file-renaming': 'false',
+        }
+        options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
+        options['out'] = os.path.basename(tmpfilename)
+        options['header'] = []
+        for key, val in info_dict['http_headers'].items():
+            options['header'].append('{0}: {1}'.format(key, val))
+        download = aria2.add_uris([info_dict['url']], options)
+        status = {
+            'status': 'downloading',
+            'tmpfilename': tmpfilename,
+        }
+        started = time.time()
+        while download.status in ['active', 'waiting']:
+            download = aria2.get_download(download.gid)
+            status.update({
+                'downloaded_bytes': download.completed_length,
+                'total_bytes': download.total_length,
+                'elapsed': time.time() - started,
+                'eta': download.eta.total_seconds(),
+                'speed': download.download_speed,
+            })
+            self._hook_progress(status)
+            time.sleep(.5)
+        return download.status != 'complete'
+
+
 class HttpieFD(ExternalFD):
     @classmethod
     def available(cls):

From cf5fb7e13972f508758a11490a46112cc0ee4891 Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:37:45 +0100
Subject: [PATCH 11/15] [FileMoonIE] Add extractor for filemoon.sx (#31515)

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/filemoon.py   | 43 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 youtube_dl/extractor/filemoon.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index dfaef0cc3..f63a2e030 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -376,6 +376,7 @@ from .fc2 import (
     FC2EmbedIE,
 )
 from .fczenit import FczenitIE
+from .filemoon import FileMoonIE
 from .fifa import FifaIE
 from .filmon import (
     FilmOnIE,
diff --git a/youtube_dl/extractor/filemoon.py b/youtube_dl/extractor/filemoon.py
new file mode 100644
index 000000000..654df9b69
--- /dev/null
+++ b/youtube_dl/extractor/filemoon.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    decode_packed_codes,
+    js_to_json,
+)
+
+
+class FileMoonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://filemoon.sx/e/dw40rxrzruqz',
+        'md5': '5a713742f57ac4aef29b74733e8dda01',
+        'info_dict': {
+            'id': 'dw40rxrzruqz',
+            'title': 'dw40rxrzruqz',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
+        packed = matches[-1]
+        unpacked = decode_packed_codes(packed)
+        jwplayer_sources = self._parse_json(
+            self._search_regex(
+                r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
+            video_id, transform_source=js_to_json)
+
+        formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
+
+        return {
+            'id': video_id,
+            'title': self._generic_title(url) or video_id,
+            'formats': formats
+        }

From 99c3cf6419aac3d781aa3b82202c07802ec6160a Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:47:43 +0100
Subject: [PATCH 12/15] [KommunetvIE] Add extractor for kommunetv.no (#31516)

* Add extractor for kommunetv.no
* Using utils.update_url instead of regex

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/kommunetv.py  | 35 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 youtube_dl/extractor/kommunetv.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f63a2e030..d8428f46f 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -557,6 +557,7 @@ from .khanacademy import (
 from .kickstarter import KickStarterIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
+from .kommunetv import KommunetvIE
 from .konserthusetplay import KonserthusetPlayIE
 from .krasview import KrasViewIE
 from .kth import KTHIE
diff --git a/youtube_dl/extractor/kommunetv.py b/youtube_dl/extractor/kommunetv.py
new file mode 100644
index 000000000..91d06a74f
--- /dev/null
+++ b/youtube_dl/extractor/kommunetv.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import update_url
+
+
+class KommunetvIE(InfoExtractor):
+    _VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
+    _TEST = {
+        'url': 'https://oslo.kommunetv.no/archive/921',
+        'md5': '5f102be308ee759be1e12b63d5da4bbc',
+        'info_dict': {
+            'id': '921',
+            'title': 'Bystyremøte',
+            'ext': 'mp4'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        headers = {
+            'Accept': 'application/json'
+        }
+        data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
+        title = data['stream']['title']
+        file = data['playlist'][0]['playlist'][0]['file']
+        url = update_url(file, query=None, fragment=None)
+        formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        self._sort_formats(formats)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title
+        }

From 70a4a8b7528a3acd34e9c6e1a654da834d107d5a Mon Sep 17 00:00:00 2001
From: fonkap <fonk666@gmail.com>
Date: Sat, 11 Feb 2023 03:54:45 +0100
Subject: [PATCH 13/15] [StreamsbIE] Add extractor for streamsb.com
 (viewsb.com) (#31517)

* Add extractor for streamsb.com (viewsb.com)

* make data url using app.js version

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
---
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/streamsb.py   | 61 ++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 youtube_dl/extractor/streamsb.py

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index d8428f46f..3a87f9e33 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1206,6 +1206,7 @@ from .storyfire import (
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
+from .streamsb import StreamsbIE
 from .streetvoice import StreetVoiceIE
 from .stretchinternet import StretchInternetIE
 from .stv import STVPlayerIE
diff --git a/youtube_dl/extractor/streamsb.py b/youtube_dl/extractor/streamsb.py
new file mode 100644
index 000000000..bffcb3de1
--- /dev/null
+++ b/youtube_dl/extractor/streamsb.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import binascii
+import random
+import re
+import string
+
+from .common import InfoExtractor
+from ..utils import urljoin, url_basename
+
+
+def to_ascii_hex(str1):
+    return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
+
+
+def generate_random_string(length):
+    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
+
+
+class StreamsbIE(InfoExtractor):
+    _DOMAINS = ('viewsb.com', )
+    _VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
+    _TEST = {
+        'url': 'https://viewsb.com/dxfvlu4qanjx',
+        'md5': '488d111a63415369bf90ea83adc8a325',
+        'info_dict': {
+            'id': 'dxfvlu4qanjx',
+            'ext': 'mp4',
+            'title': 'Sintel'
+        }
+    }
+
+    def _real_extract(self, url):
+        domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
+        webpage = self._download_webpage(url, video_id)
+
+        iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
+        iframe_url = urljoin('https://' + domain, iframe_rel_url)
+
+        iframe_data = self._download_webpage(iframe_url, video_id)
+        app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
+
+        video_code = url_basename(iframe_url).rsplit('.')[0]
+
+        length = 12
+        req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
+        ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
+
+        video_data = self._download_webpage(ereq, video_id, headers={
+            'Referer': iframe_url,
+            'watchsb': 'sbstream',
+        })
+        player_data = self._parse_json(video_data, video_id)
+        title = player_data['stream_data']['title']
+        formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+        }

From d524ac1898ad680e3476be30e218f44e42961374 Mon Sep 17 00:00:00 2001
From: Epsilon Spider <105926847+epsilonSpider@users.noreply.github.com>
Date: Sat, 11 Feb 2023 14:42:25 -0500
Subject: [PATCH 14/15] Check for oreilly login with new url

Resolves #30884
---
 youtube_dl/extractor/safari.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 2cc665122..2d41482e5 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -38,7 +38,8 @@ class SafariBaseIE(InfoExtractor):
             'Downloading login page')
 
         def is_logged(urlh):
-            return 'learning.oreilly.com/home/' in urlh.geturl()
+            return ('https://learning.oreilly.com/member/login/' == urlh.geturl()
+                    or 'learning.oreilly.com/home/' in urlh.geturl())
 
         if is_logged(urlh):
             self.LOGGED_IN = True

From f7ca7e77be69b7028d68b92332d6de499d528601 Mon Sep 17 00:00:00 2001
From: Epsilon Spider <105926847+epsilonSpider@users.noreply.github.com>
Date: Tue, 4 Apr 2023 16:29:00 -0400
Subject: [PATCH 15/15] [safari] Refactor login check

---
 youtube_dl/extractor/safari.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 2d41482e5..e923d7641 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -38,8 +38,11 @@ class SafariBaseIE(InfoExtractor):
             'Downloading login page')
 
         def is_logged(urlh):
-            return ('https://learning.oreilly.com/member/login/' == urlh.geturl()
-                    or 'learning.oreilly.com/home/' in urlh.geturl())
+            url = urlh.geturl()
+            parsed_url = compat_urlparse.urlparse(url)
+            return parsed_url.hostname.endswith('learning.oreilly.com') and (
+                parsed_url.path.startswith('/home/')
+                or (parsed_url.path == '/member/login/' and not parsed_url.query))
 
         if is_logged(urlh):
             self.LOGGED_IN = True