[spiegel] Test format video URLs for 404 (Closes #4579)

2025-03-02 03:21:20 +01:00 · 2015-01-14 20:27:14 +06:00 · 2015-01-14 20:27:14 +06:00 · e92d4a11f5
commit e92d4a11f5
parent f2cbc96c3e
1 changed files with 33 additions and 16 deletions
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@ -4,7 +4,14 @@
 import re
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
    compat_urlparse,
    compat_HTTPError,
 )
 from ..utils import (
    HEADRequest,
    ExtractorError,
 )
 from .spiegeltv import SpiegeltvIE
@ -60,21 +67,31 @@ def _real_extract(self, url):
        xml_url = base_url + video_id + '.xml'
        idoc = self._download_xml(xml_url, video_id)
-        formats = [
+        formats = []
-            {
+        for n in list(idoc):
-                'format_id': n.tag.rpartition('type')[2],
+            if n.tag.startswith('type') and n.tag != 'type6':
-                'url': base_url + n.find('./filename').text,
+                format_id = n.tag.rpartition('type')[2]
-                'width': int(n.find('./width').text),
+                video_url = base_url + n.find('./filename').text
-                'height': int(n.find('./height').text),
+                # Test video URLs beforehand as some of them are invalid
-                'abr': int(n.find('./audiobitrate').text),
+                try:
-                'vbr': int(n.find('./videobitrate').text),
+                    self._request_webpage(
-                'vcodec': n.find('./codec').text,
+                        HEADRequest(video_url), video_id,
-                'acodec': 'MP4A',
+                        'Checking %s video URL' % format_id)
-            }
+                except ExtractorError as e:
-            for n in list(idoc)
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
-            # Blacklist type 6, it's extremely LQ and not available on the same server
+                        self.report_warning(
-            if n.tag.startswith('type') and n.tag != 'type6'
+                            '%s video URL is invalid, skipping' % format_id, video_id)
-        ]
+                        continue
                formats.append({
                    'format_id': format_id,
                    'url': video_url,
                    'width': int(n.find('./width').text),
                    'height': int(n.find('./height').text),
                    'abr': int(n.find('./audiobitrate').text),
                    'vbr': int(n.find('./videobitrate').text),
                    'vcodec': n.find('./codec').text,
                    'acodec': 'MP4A',
                })
        duration = float(idoc[0].findall('./duration')[0].text)
        self._sort_formats(formats)