[tumblr] Detect and report sensitive media (closes #13829)

This commit is contained in:
Sergey M․ 2018-05-26 20:00:00 +07:00
parent 56cd31f320
commit 97b01144bd
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -150,11 +151,19 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
redirect_url = compat_str(urlh.geturl())
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
raise ExtractorError(
'This Tumblr may contain sensitive media. '
'Disable safe mode in your account settings '
'at https://www.tumblr.com/settings/account#safe_mode',
expected=True)
iframe_url = self._search_regex( iframe_url = self._search_regex(
r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'', r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
webpage, 'iframe url', default=None) webpage, 'iframe url', default=None)
if iframe_url is None: if iframe_url is None:
return self.url_result(urlh.geturl(), 'Generic') return self.url_result(redirect_url, 'Generic')
iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page') iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')