2013-06-23 21:34:03 +02:00
import re
2023-11-14 21:28:18 +01:00
import xml . etree . ElementTree
2013-06-23 21:34:03 +02:00
2015-02-18 18:27:45 +01:00
from . common import InfoExtractor
2023-07-09 09:53:02 +02:00
from . . networking import HEADRequest , Request
2014-12-13 12:24:42 +01:00
from . . utils import (
2013-06-23 21:34:03 +02:00
ExtractorError ,
2023-07-09 09:53:02 +02:00
RegexNotFoundError ,
2014-01-22 00:17:33 +01:00
find_xpath_attr ,
2014-01-20 22:11:34 +01:00
fix_xml_ampersands ,
2016-02-19 18:52:56 +01:00
float_or_none ,
2021-03-08 14:40:27 +01:00
int_or_none ,
2021-11-06 02:05:24 +01:00
join_nonempty ,
2016-07-17 11:56:39 +02:00
strip_or_none ,
2016-07-26 05:03:43 +02:00
timeconvert ,
2017-01-14 00:17:03 +01:00
try_get ,
2014-03-08 20:06:20 +01:00
unescapeHTML ,
2016-08-24 17:58:22 +02:00
update_url_query ,
2014-01-21 20:54:47 +01:00
url_basename ,
2016-03-24 17:23:31 +01:00
xpath_text ,
2013-06-23 21:34:03 +02:00
)
2014-01-22 00:17:33 +01:00
2013-07-14 14:02:04 +02:00
def _media_xml_tag ( tag ) :
2024-06-12 01:09:58 +02:00
return f ' {{ http://search.yahoo.com/mrss/ }} { tag } '
2013-06-23 21:34:03 +02:00
2013-07-14 13:41:46 +02:00
2015-02-18 18:27:45 +01:00
class MTVServicesInfoExtractor ( InfoExtractor ) :
2014-03-08 20:06:20 +01:00
_MOBILE_TEMPLATE = None
2015-04-25 16:22:20 +02:00
_LANG = None
2014-06-22 19:19:26 +02:00
2013-07-14 13:41:46 +02:00
@staticmethod
def _id_from_uri ( uri ) :
return uri . split ( ' : ' ) [ - 1 ]
2016-08-24 17:58:22 +02:00
@staticmethod
def _remove_template_parameter ( url ) :
# Remove the templates, like &device={device}
return re . sub ( r ' &[^=]*?= { .*?}(?=(&|$)) ' , ' ' , url )
2021-09-04 04:31:47 +02:00
def _get_feed_url ( self , uri , url = None ) :
2014-06-22 19:19:26 +02:00
return self . _FEED_URL
2013-07-14 14:29:15 +02:00
def _get_thumbnail_url ( self , uri , itemdoc ) :
2024-06-12 01:09:58 +02:00
search_path = ' {} / {} ' . format ( _media_xml_tag ( ' group ' ) , _media_xml_tag ( ' thumbnail ' ) )
2013-12-03 14:58:24 +01:00
thumb_node = itemdoc . find ( search_path )
if thumb_node is None :
return None
2017-07-23 16:00:19 +02:00
return thumb_node . get ( ' url ' ) or thumb_node . text or None
2013-07-14 13:41:46 +02:00
2014-03-08 20:06:20 +01:00
def _extract_mobile_video_formats ( self , mtvn_id ) :
webpage_url = self . _MOBILE_TEMPLATE % mtvn_id
2023-07-09 09:53:02 +02:00
req = Request ( webpage_url )
2014-03-08 20:06:20 +01:00
# Otherwise we get a webpage that would execute some javascript
2023-07-09 09:53:02 +02:00
req . headers [ ' User-Agent ' ] = ' curl/7 '
2014-03-08 20:06:20 +01:00
webpage = self . _download_webpage ( req , mtvn_id ,
2014-11-23 21:39:15 +01:00
' Downloading mobile page ' )
2014-03-08 22:06:28 +01:00
metrics_url = unescapeHTML ( self . _search_regex ( r ' <a href= " (http://metrics.+?) " ' , webpage , ' url ' ) )
req = HEADRequest ( metrics_url )
response = self . _request_webpage ( req , mtvn_id , ' Resolving url ' )
2023-07-09 09:53:02 +02:00
url = response . url
2014-03-08 22:06:28 +01:00
# Transform the url to get the best quality:
2024-06-12 01:09:58 +02:00
url = re . sub ( r ' .+pxE=mp4 ' , ' http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4 ' , url , count = 1 )
2014-11-23 20:41:03 +01:00
return [ { ' url ' : url , ' ext ' : ' mp4 ' } ]
2014-03-08 20:06:20 +01:00
2017-01-05 22:35:12 +01:00
def _extract_video_formats ( self , mdoc , mtvn_id , video_id ) :
2015-08-28 17:45:29 +02:00
if re . match ( r ' .*/(error_country_block \ .swf|geoblock \ .mp4|copyright_error \ .flv(?: \ ?geo \ b.+?)?)$ ' , mdoc . find ( ' .//src ' ) . text ) is not None :
2014-03-08 20:06:20 +01:00
if mtvn_id is not None and self . _MOBILE_TEMPLATE is not None :
2014-03-08 22:06:28 +01:00
self . to_screen ( ' The normal version is not available from your '
2014-11-23 21:39:15 +01:00
' country, trying with the mobile version ' )
2014-03-08 20:06:20 +01:00
return self . _extract_mobile_video_formats ( mtvn_id )
2014-03-08 19:43:18 +01:00
raise ExtractorError ( ' This video is not available from your country. ' ,
2014-11-23 21:39:15 +01:00
expected = True )
2013-07-14 13:41:46 +02:00
2013-10-04 11:10:04 +02:00
formats = [ ]
for rendition in mdoc . findall ( ' .//rendition ' ) :
2017-01-10 16:27:23 +01:00
if rendition . get ( ' method ' ) == ' hls ' :
2017-01-05 22:35:12 +01:00
hls_url = rendition . find ( ' ./src ' ) . text
2017-01-10 16:31:20 +01:00
formats . extend ( self . _extract_m3u8_formats (
2017-01-14 00:17:03 +01:00
hls_url , video_id , ext = ' mp4 ' , entry_protocol = ' m3u8_native ' ,
2017-07-22 19:25:23 +02:00
m3u8_id = ' hls ' , fatal = False ) )
2017-01-05 22:35:12 +01:00
else :
# fms
try :
_ , _ , ext = rendition . attrib [ ' type ' ] . partition ( ' / ' )
rtmp_video_url = rendition . find ( ' ./src ' ) . text
2017-01-14 00:17:03 +01:00
if ' error_not_available.swf ' in rtmp_video_url :
raise ExtractorError (
2024-06-12 01:09:58 +02:00
f ' { self . IE_NAME } said: video is not available ' ,
2017-01-14 00:17:03 +01:00
expected = True )
2017-01-05 22:35:12 +01:00
if rtmp_video_url . endswith ( ' siteunavail.png ' ) :
continue
formats . extend ( [ {
2017-01-14 00:17:03 +01:00
' ext ' : ' flv ' if rtmp_video_url . startswith ( ' rtmp ' ) else ext ,
' url ' : rtmp_video_url ,
2021-11-06 02:05:24 +01:00
' format_id ' : join_nonempty (
2017-01-14 00:17:03 +01:00
' rtmp ' if rtmp_video_url . startswith ( ' rtmp ' ) else None ,
2021-11-06 02:05:24 +01:00
rendition . get ( ' bitrate ' ) ) ,
2017-01-05 22:35:12 +01:00
' width ' : int ( rendition . get ( ' width ' ) ) ,
' height ' : int ( rendition . get ( ' height ' ) ) ,
2017-01-14 00:17:03 +01:00
} ] )
2017-01-05 22:35:12 +01:00
except ( KeyError , TypeError ) :
raise ExtractorError ( ' Invalid rendition field. ' )
2013-10-04 11:10:04 +02:00
return formats
2013-07-14 13:41:46 +02:00
2015-01-30 16:57:59 +01:00
def _extract_subtitles ( self , mdoc , mtvn_id ) :
subtitles = { }
for transcript in mdoc . findall ( ' .//transcript ' ) :
if transcript . get ( ' kind ' ) != ' captions ' :
continue
lang = transcript . get ( ' srclang ' )
2017-11-29 17:50:38 +01:00
for typographic in transcript . findall ( ' ./typographic ' ) :
sub_src = typographic . get ( ' src ' )
if not sub_src :
continue
ext = typographic . get ( ' format ' )
if ext == ' cea-608 ' :
ext = ' scc '
subtitles . setdefault ( lang , [ ] ) . append ( {
2024-06-12 01:09:58 +02:00
' url ' : str ( sub_src ) ,
' ext ' : ext ,
2017-11-29 17:50:38 +01:00
} )
2015-02-18 18:27:45 +01:00
return subtitles
2015-01-30 16:57:59 +01:00
2017-01-10 16:30:47 +01:00
def _get_video_info ( self , itemdoc , use_hls = True ) :
2013-07-14 13:41:46 +02:00
uri = itemdoc . find ( ' guid ' ) . text
video_id = self . _id_from_uri ( uri )
self . report_extraction ( video_id )
2024-06-12 01:09:58 +02:00
content_el = itemdoc . find ( ' {} / {} ' . format ( _media_xml_tag ( ' group ' ) , _media_xml_tag ( ' content ' ) ) )
2016-08-24 17:58:22 +02:00
mediagen_url = self . _remove_template_parameter ( content_el . attrib [ ' url ' ] )
2017-01-05 22:35:12 +01:00
mediagen_url = mediagen_url . replace ( ' device= {device} ' , ' ' )
2013-07-14 13:41:46 +02:00
if ' acceptMethods ' not in mediagen_url :
2015-08-28 17:27:36 +02:00
mediagen_url + = ' & ' if ' ? ' in mediagen_url else ' ? '
2017-01-05 22:35:12 +01:00
mediagen_url + = ' acceptMethods= '
mediagen_url + = ' hls ' if use_hls else ' fms '
2014-01-22 00:21:27 +01:00
2017-07-22 19:25:23 +02:00
mediagen_doc = self . _download_xml (
mediagen_url , video_id , ' Downloading video urls ' , fatal = False )
2023-11-14 21:28:18 +01:00
if not isinstance ( mediagen_doc , xml . etree . ElementTree . Element ) :
2017-07-22 19:25:23 +02:00
return None
2013-07-14 13:41:46 +02:00
2015-04-15 17:02:34 +02:00
item = mediagen_doc . find ( ' ./video/item ' )
if item is not None and item . get ( ' type ' ) == ' text ' :
2024-06-12 01:09:58 +02:00
message = f ' { self . IE_NAME } returned error: '
2015-04-15 17:02:34 +02:00
if item . get ( ' code ' ) is not None :
2024-06-12 01:09:58 +02:00
message + = ' {} - ' . format ( item . get ( ' code ' ) )
2015-04-15 17:02:34 +02:00
message + = item . text
raise ExtractorError ( message , expected = True )
2016-07-17 11:56:39 +02:00
description = strip_or_none ( xpath_text ( itemdoc , ' description ' ) )
2013-10-04 11:10:04 +02:00
2016-07-26 05:03:43 +02:00
timestamp = timeconvert ( xpath_text ( itemdoc , ' pubDate ' ) )
2014-01-22 00:17:33 +01:00
title_el = None
if title_el is None :
title_el = find_xpath_attr (
itemdoc , ' .// { http://search.yahoo.com/mrss/}category ' ,
' scheme ' , ' urn:mtvn:video_title ' )
if title_el is None :
2022-04-11 22:09:26 +02:00
title_el = itemdoc . find ( ' .// { http://search.yahoo.com/mrss/}title ' )
2014-02-11 19:07:30 +01:00
if title_el is None :
2022-04-11 22:09:26 +02:00
title_el = itemdoc . find ( ' .//title ' )
2014-02-06 04:15:11 +01:00
if title_el . text is None :
title_el = None
2014-01-22 00:17:33 +01:00
title = title_el . text
if title is None :
raise ExtractorError ( ' Could not find video title ' )
2014-01-22 03:49:16 +01:00
title = title . strip ( )
2014-01-22 00:17:33 +01:00
2021-03-08 14:40:27 +01:00
series = find_xpath_attr (
itemdoc , ' .// { http://search.yahoo.com/mrss/}category ' ,
' scheme ' , ' urn:mtvn:franchise ' )
season = find_xpath_attr (
itemdoc , ' .// { http://search.yahoo.com/mrss/}category ' ,
' scheme ' , ' urn:mtvn:seasonN ' )
episode = find_xpath_attr (
itemdoc , ' .// { http://search.yahoo.com/mrss/}category ' ,
' scheme ' , ' urn:mtvn:episodeN ' )
series = series . text if series is not None else None
season = season . text if season is not None else None
episode = episode . text if episode is not None else None
if season and episode :
# episode number includes season, so remove it
2024-06-12 01:09:58 +02:00
episode = re . sub ( rf ' ^ { season } ' , ' ' , episode )
2021-03-08 14:40:27 +01:00
2014-03-08 20:06:20 +01:00
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr ( itemdoc , ' .// { http://search.yahoo.com/mrss/}category ' ,
2014-11-23 21:39:15 +01:00
' scheme ' , ' urn:mtvn:id ' )
2014-03-08 20:06:20 +01:00
if mtvn_id_node is not None :
mtvn_id = mtvn_id_node . text
2017-01-05 22:35:12 +01:00
formats = self . _extract_video_formats ( mediagen_doc , mtvn_id , video_id )
2017-07-22 19:25:23 +02:00
# Some parts of complete video may be missing (e.g. missing Act 3 in
# http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
if not formats :
return None
2013-12-03 14:21:06 +01:00
return {
2014-01-22 00:17:33 +01:00
' title ' : title ,
2017-01-05 22:35:12 +01:00
' formats ' : formats ,
2015-01-30 16:57:59 +01:00
' subtitles ' : self . _extract_subtitles ( mediagen_doc , mtvn_id ) ,
2013-10-04 11:10:04 +02:00
' id ' : video_id ,
' thumbnail ' : self . _get_thumbnail_url ( uri , itemdoc ) ,
' description ' : description ,
2016-02-19 20:56:45 +01:00
' duration ' : float_or_none ( content_el . attrib . get ( ' duration ' ) ) ,
2016-07-26 05:03:43 +02:00
' timestamp ' : timestamp ,
2021-03-08 14:40:27 +01:00
' series ' : series ,
' season_number ' : int_or_none ( season ) ,
' episode_number ' : int_or_none ( episode ) ,
2013-10-04 11:10:04 +02:00
}
2015-12-31 22:10:00 +01:00
def _get_feed_query ( self , uri ) :
data = { ' uri ' : uri }
if self . _LANG :
data [ ' lang ' ] = self . _LANG
2016-08-24 17:58:22 +02:00
return data
2015-12-31 22:10:00 +01:00
2021-09-04 04:31:47 +02:00
def _get_videos_info ( self , uri , use_hls = True , url = None ) :
2013-07-14 13:41:46 +02:00
video_id = self . _id_from_uri ( uri )
2021-09-04 04:31:47 +02:00
feed_url = self . _get_feed_url ( uri , url )
2016-08-24 17:58:22 +02:00
info_url = update_url_query ( feed_url , self . _get_feed_query ( uri ) )
2017-01-05 22:35:12 +01:00
return self . _get_videos_info_from_url ( info_url , video_id , use_hls )
2015-08-28 17:24:54 +02:00
2017-01-10 16:30:47 +01:00
def _get_videos_info_from_url ( self , url , video_id , use_hls = True ) :
2013-12-10 12:45:22 +01:00
idoc = self . _download_xml (
2015-08-28 17:24:54 +02:00
url , video_id ,
2014-01-22 00:07:02 +01:00
' Downloading info ' , transform_source = fix_xml_ampersands )
2016-07-26 05:03:43 +02:00
title = xpath_text ( idoc , ' ./channel/title ' )
description = xpath_text ( idoc , ' ./channel/description ' )
2017-07-22 19:25:23 +02:00
entries = [ ]
for item in idoc . findall ( ' .//item ' ) :
info = self . _get_video_info ( item , use_hls )
if info :
entries . append ( info )
2021-07-03 18:23:13 +02:00
# TODO: should be multi-video
2014-11-20 16:25:19 +01:00
return self . playlist_result (
2017-07-22 19:25:23 +02:00
entries , playlist_title = title , playlist_description = description )
2013-06-23 21:34:03 +02:00
2017-01-14 00:17:03 +01:00
def _extract_triforce_mgid ( self , webpage , data_zone = None , video_id = None ) :
triforce_feed = self . _parse_json ( self . _search_regex (
2017-01-18 17:34:11 +01:00
r ' triforceManifestFeed \ s*= \ s*( { .+?}) \ s*; \ s* \ n ' , webpage ,
2017-01-14 00:17:03 +01:00
' triforce feed ' , default = ' {} ' ) , video_id , fatal = False )
data_zone = self . _search_regex (
r ' data-zone=([ " \' ])(?P<zone>.+?_lc_promo.*?) \ 1 ' , webpage ,
' data zone ' , default = data_zone , group = ' zone ' )
feed_url = try_get (
triforce_feed , lambda x : x [ ' manifest ' ] [ ' zones ' ] [ data_zone ] [ ' feed ' ] ,
2024-06-12 01:09:58 +02:00
str )
2017-01-14 00:17:03 +01:00
if not feed_url :
return
feed = self . _download_json ( feed_url , video_id , fatal = False )
if not feed :
return
2024-06-12 01:09:58 +02:00
return try_get ( feed , lambda x : x [ ' result ' ] [ ' data ' ] [ ' id ' ] , str )
2017-01-14 00:17:03 +01:00
2021-01-16 18:12:05 +01:00
@staticmethod
def _extract_child_with_type ( parent , t ) :
2021-04-10 18:47:11 +02:00
for c in parent [ ' children ' ] :
if c . get ( ' type ' ) == t :
return c
2021-01-16 18:12:05 +01:00
2021-03-02 11:54:47 +01:00
def _extract_mgid ( self , webpage ) :
2014-01-21 20:54:47 +01:00
try :
2014-01-22 11:35:17 +01:00
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid}
og_url = self . _og_search_video_url ( webpage )
mgid = url_basename ( og_url )
if mgid . endswith ( ' .swf ' ) :
mgid = mgid [ : - 4 ]
2014-01-21 20:54:47 +01:00
except RegexNotFoundError :
2014-07-13 13:15:18 +02:00
mgid = None
if mgid is None or ' : ' not in mgid :
2014-01-30 19:04:33 +01:00
mgid = self . _search_regex (
2017-10-09 18:50:53 +02:00
[ r ' data-mgid= " (.*?) " ' , r ' swfobject \ .embedSWF \ ( " .*?(mgid:.*?) " ' ] ,
2015-09-26 15:45:43 +02:00
webpage , ' mgid ' , default = None )
if not mgid :
sm4_embed = self . _html_search_meta (
' sm4:video:embed ' , webpage , ' sm4 embed ' , default = ' ' )
mgid = self . _search_regex (
2017-01-14 00:17:03 +01:00
r ' embed/(mgid:.+?)[ " \' &?/] ' , sm4_embed , ' mgid ' , default = None )
2020-10-09 07:06:49 +02:00
if not mgid :
2021-03-02 11:54:47 +01:00
mgid = self . _extract_triforce_mgid ( webpage )
2017-01-14 00:17:03 +01:00
2021-01-16 18:12:05 +01:00
if not mgid :
data = self . _parse_json ( self . _search_regex (
r ' __DATA__ \ s*= \ s*( { .+?}); ' , webpage , ' data ' ) , None )
main_container = self . _extract_child_with_type ( data , ' MainContainer ' )
2021-04-10 18:47:11 +02:00
ab_testing = self . _extract_child_with_type ( main_container , ' ABTesting ' )
video_player = self . _extract_child_with_type ( ab_testing or main_container , ' VideoPlayer ' )
2021-11-24 09:01:49 +01:00
if video_player :
mgid = try_get ( video_player , lambda x : x [ ' props ' ] [ ' media ' ] [ ' video ' ] [ ' config ' ] [ ' uri ' ] )
else :
flex_wrapper = self . _extract_child_with_type ( ab_testing or main_container , ' FlexWrapper ' )
auth_suite_wrapper = self . _extract_child_with_type ( flex_wrapper , ' AuthSuiteWrapper ' )
player = self . _extract_child_with_type ( auth_suite_wrapper or flex_wrapper , ' Player ' )
if player :
mgid = try_get ( player , lambda x : x [ ' props ' ] [ ' videoDetail ' ] [ ' mgid ' ] )
if not mgid :
raise ExtractorError ( ' Could not extract mgid ' )
2021-01-16 18:12:05 +01:00
2015-12-31 22:10:00 +01:00
return mgid
2015-01-30 16:57:59 +01:00
2015-12-31 22:10:00 +01:00
def _real_extract ( self , url ) :
title = url_basename ( url )
webpage = self . _download_webpage ( url , title )
2021-03-02 11:54:47 +01:00
mgid = self . _extract_mgid ( webpage )
2024-06-12 01:09:58 +02:00
return self . _get_videos_info ( mgid , url = url )
2014-01-21 20:54:47 +01:00
2013-12-03 14:58:24 +01:00
2014-06-22 19:19:26 +02:00
class MTVServicesEmbeddedIE ( MTVServicesInfoExtractor ) :
IE_NAME = ' mtvservices:embedded '
_VALID_URL = r ' https?://media \ .mtvnservices \ .com/embed/(?P<mgid>.+?)( \ ?|/|$) '
2022-08-01 03:23:25 +02:00
_EMBED_REGEX = [ r ' <iframe[^>]+?src=([ " \' ])(?P<url>(?:https?:)?//media \ .mtvnservices \ .com/embed/.+?) \ 1 ' ]
2014-06-22 19:19:26 +02:00
_TEST = {
# From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
' url ' : ' http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid % 3D1043906 %26u ri % 3Dmgid % 3Auma % 3Avideo % 3Amtv.com % 3A1043906 ' ,
' md5 ' : ' cb349b21a7897164cede95bd7bf3fbb9 ' ,
' info_dict ' : {
' id ' : ' 1043906 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Peter Dinklage Sums Up \' Game Of Thrones \' In 45 Seconds ' ,
' description ' : ' " Sexy sexy sexy, stabby stabby stabby, beautiful language, " says Peter Dinklage as he tries summarizing " Game of Thrones " in under a minute. ' ,
2016-07-26 05:03:43 +02:00
' timestamp ' : 1400126400 ,
' upload_date ' : ' 20140515 ' ,
2014-06-22 19:19:26 +02:00
} ,
}
2021-09-04 04:31:47 +02:00
def _get_feed_url ( self , uri , url = None ) :
2014-06-22 19:19:26 +02:00
video_id = self . _id_from_uri ( uri )
2016-08-24 17:58:22 +02:00
config = self . _download_json (
2024-06-12 01:09:58 +02:00
f ' http://media.mtvnservices.com/pmt/e1/access/index.html?uri= { uri } &configtype=edge ' , video_id )
2016-08-24 17:58:22 +02:00
return self . _remove_template_parameter ( config [ ' feedWithQueryParams ' ] )
2014-06-22 19:19:26 +02:00
def _real_extract ( self , url ) :
2021-08-19 03:41:24 +02:00
mobj = self . _match_valid_url ( url )
2014-06-22 19:19:26 +02:00
mgid = mobj . group ( ' mgid ' )
return self . _get_videos_info ( mgid )
2013-12-03 14:58:24 +01:00
class MTVIE ( MTVServicesInfoExtractor ) :
2016-09-24 11:50:14 +02:00
IE_NAME = ' mtv '
2017-01-26 15:49:34 +01:00
_VALID_URL = r ' https?://(?:www \ .)?mtv \ .com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+) '
2016-09-24 11:41:38 +02:00
_FEED_URL = ' http://www.mtv.com/feeds/mrss/ '
_TESTS = [ {
' url ' : ' http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer ' ,
' md5 ' : ' 1edbcdf1e7628e414a8c5dcebca3d32b ' ,
' info_dict ' : {
' id ' : ' 5e14040d-18a4-47c4-a582-43ff602de88e ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Unlocking The Truth|July 18, 2016|1|101|Trailer ' ,
' description ' : ' " Unlocking the Truth " premieres August 17th at 11/10c. ' ,
' timestamp ' : 1468846800 ,
' upload_date ' : ' 20160718 ' ,
} ,
} , {
' url ' : ' http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101 ' ,
' only_matching ' : True ,
2017-01-26 15:49:34 +01:00
} , {
' url ' : ' http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713 ' ,
' only_matching ' : True ,
2016-09-24 11:41:38 +02:00
} ]
2019-10-28 23:31:14 +01:00
class MTVJapanIE ( MTVServicesInfoExtractor ) :
IE_NAME = ' mtvjapan '
_VALID_URL = r ' https?://(?:www \ .)?mtvjapan \ .com/videos/(?P<id>[0-9a-z]+) '
2017-01-28 14:29:22 +01:00
_TEST = {
2019-10-28 23:31:14 +01:00
' url ' : ' http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade ' ,
2017-01-28 14:29:22 +01:00
' info_dict ' : {
2019-10-28 23:31:14 +01:00
' id ' : ' bc01da03-6fe5-4284-8880-f291f4e368f5 ' ,
2017-01-28 14:29:22 +01:00
' ext ' : ' mp4 ' ,
2019-10-28 23:31:14 +01:00
' title ' : ' 【Fresh Info】Cadillac ESCALADE Sport Edition ' ,
} ,
' params ' : {
' skip_download ' : True ,
2017-01-28 14:29:22 +01:00
} ,
}
2019-10-28 23:31:14 +01:00
_GEO_COUNTRIES = [ ' JP ' ]
_FEED_URL = ' http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed '
2017-01-28 14:29:22 +01:00
2019-10-28 23:31:14 +01:00
def _get_feed_query ( self , uri ) :
return {
' arcEp ' : ' mtvjapan.com ' ,
' mgid ' : uri ,
}
2017-01-28 14:29:22 +01:00
2016-09-24 11:41:38 +02:00
class MTVVideoIE ( MTVServicesInfoExtractor ) :
2016-09-24 11:50:14 +02:00
IE_NAME = ' mtv:video '
2013-12-16 22:05:28 +01:00
_VALID_URL = r ''' (?x)^https?://
( ? : ( ? : www \. ) ? mtv \. com / videos / . + ? / ( ? P < videoid > [ 0 - 9 ] + ) / [ ^ / ] + $ |
m \. mtv \. com / videos / video \. rbml \? . * ? id = ( ? P < mgid > [ ^ & ] + ) ) '''
2013-12-03 14:58:24 +01:00
_FEED_URL = ' http://www.mtv.com/player/embed/AS3/rss/ '
_TESTS = [
{
2014-01-22 00:07:02 +01:00
' url ' : ' http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml ' ,
' md5 ' : ' 850f3f143316b1e71fa56a4edfd6e0f8 ' ,
' info_dict ' : {
2015-02-01 12:08:21 +01:00
' id ' : ' 853555 ' ,
' ext ' : ' mp4 ' ,
2014-01-22 00:07:02 +01:00
' title ' : ' Taylor Swift - " Ours (VH1 Storytellers) " ' ,
' description ' : ' Album: Taylor Swift performs " Ours " for VH1 Storytellers at Harvey Mudd College. ' ,
2016-07-26 05:03:43 +02:00
' timestamp ' : 1352610000 ,
' upload_date ' : ' 20121111 ' ,
2013-12-03 14:58:24 +01:00
} ,
} ,
]
def _get_thumbnail_url ( self , uri , itemdoc ) :
return ' http://mtv.mtvnimages.com/uri/ ' + uri
2013-06-23 21:34:03 +02:00
def _real_extract ( self , url ) :
2021-08-19 03:41:24 +02:00
mobj = self . _match_valid_url ( url )
2013-06-23 21:34:03 +02:00
video_id = mobj . group ( ' videoid ' )
2013-12-31 17:21:44 +01:00
uri = mobj . groupdict ( ) . get ( ' mgid ' )
2013-12-16 22:05:28 +01:00
if uri is None :
webpage = self . _download_webpage ( url , video_id )
2014-11-23 20:41:03 +01:00
2013-12-16 22:05:28 +01:00
# Some videos come from Vevo.com
2015-02-01 12:08:21 +01:00
m_vevo = re . search (
r ' (?s)isVevoVideo = true;.*?vevoVideoId = " (.*?) " ; ' , webpage )
2013-12-16 22:05:28 +01:00
if m_vevo :
2014-11-23 21:20:46 +01:00
vevo_id = m_vevo . group ( 1 )
2024-06-12 01:09:58 +02:00
self . to_screen ( f ' Vevo video detected: { vevo_id } ' )
return self . url_result ( f ' vevo: { vevo_id } ' , ie = ' Vevo ' )
2014-11-23 20:41:03 +01:00
2014-01-22 00:07:02 +01:00
uri = self . _html_search_regex ( r ' /uri/(.*?) \ ? ' , webpage , ' uri ' )
2013-07-14 13:41:46 +02:00
return self . _get_videos_info ( uri )
2014-01-21 20:59:31 +01:00
2015-08-26 00:06:44 +02:00
class MTVDEIE ( MTVServicesInfoExtractor ) :
2024-03-09 01:02:45 +01:00
_WORKING = False
2015-08-26 00:06:44 +02:00
IE_NAME = ' mtv.de '
2019-10-28 22:55:01 +01:00
_VALID_URL = r ' https?://(?:www \ .)?mtv \ .de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+) '
2015-08-28 17:24:54 +02:00
_TESTS = [ {
2019-10-28 22:55:01 +01:00
' url ' : ' http://www.mtv.de/musik/videoclips/2gpnv7/Traum ' ,
2015-08-28 17:24:54 +02:00
' info_dict ' : {
2019-10-28 22:55:01 +01:00
' id ' : ' d5d472bc-f5b7-11e5-bffd-a4badb20dab5 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Traum ' ,
' description ' : ' Traum ' ,
2015-08-26 00:06:44 +02:00
} ,
2015-08-28 17:24:54 +02:00
' params ' : {
# rtmp download
' skip_download ' : True ,
} ,
2016-07-26 07:19:47 +02:00
' skip ' : ' Blocked at Travis CI ' ,
2015-08-28 17:31:38 +02:00
} , {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
2019-10-28 22:55:01 +01:00
' url ' : ' http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1 ' ,
2015-08-28 17:31:38 +02:00
' info_dict ' : {
2019-10-28 22:55:01 +01:00
' id ' : ' 1e5a878b-31c5-11e7-a442-0e40cf2fc285 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Teen Mom 2 ' ,
' description ' : ' md5:dc65e357ef7e1085ed53e9e9d83146a7 ' ,
2015-08-28 17:31:38 +02:00
} ,
' params ' : {
# rtmp download
' skip_download ' : True ,
} ,
2016-07-26 07:19:47 +02:00
' skip ' : ' Blocked at Travis CI ' ,
2015-08-28 17:41:54 +02:00
} , {
2019-10-28 22:55:01 +01:00
' url ' : ' http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3 ' ,
2015-08-28 17:41:54 +02:00
' info_dict ' : {
' id ' : ' local_playlist-4e760566473c4c8c5344 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1 ' ,
' description ' : ' MTV Movies Supercut ' ,
} ,
' params ' : {
# rtmp download
' skip_download ' : True ,
} ,
2016-07-26 05:03:43 +02:00
' skip ' : ' Das Video kann zur Zeit nicht abgespielt werden. ' ,
2015-08-28 17:24:54 +02:00
} ]
2019-10-28 22:55:01 +01:00
_GEO_COUNTRIES = [ ' DE ' ]
_FEED_URL = ' http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed '
2015-08-26 00:06:44 +02:00
2019-10-28 22:55:01 +01:00
def _get_feed_query ( self , uri ) :
return {
' arcEp ' : ' mtv.de ' ,
' mgid ' : uri ,
}
2021-03-08 14:40:27 +01:00
class MTVItaliaIE ( MTVServicesInfoExtractor ) :
IE_NAME = ' mtv.it '
_VALID_URL = r ' https?://(?:www \ .)?mtv \ .it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+) '
_TESTS = [ {
' url ' : ' http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1 ' ,
' info_dict ' : {
' id ' : ' 0f0fc78e-45fc-4cce-8f24-971c25477530 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Cavoli amario (episodio completo) ' ,
' description ' : ' md5:4962bccea8fed5b7c03b295ae1340660 ' ,
' series ' : ' Mario - Una Serie Di Maccio Capatonda ' ,
' season_number ' : 1 ,
' episode_number ' : 1 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ]
_GEO_COUNTRIES = [ ' IT ' ]
_FEED_URL = ' http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed '
def _get_feed_query ( self , uri ) :
return {
' arcEp ' : ' mtv.it ' ,
' mgid ' : uri ,
}
2022-11-16 01:57:43 +01:00
class MTVItaliaProgrammaIE ( MTVItaliaIE ) : # XXX: Do not subclass from concrete IE
2021-03-08 14:40:27 +01:00
IE_NAME = ' mtv.it:programma '
_VALID_URL = r ' https?://(?:www \ .)?mtv \ .it/(?:programmi|playlist)/(?P<id>[0-9a-z]+) '
_TESTS = [ {
# program page: general
' url ' : ' http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda ' ,
' info_dict ' : {
' id ' : ' a6f155bc-8220-4640-aa43-9b95f64ffa3d ' ,
' title ' : ' Mario - Una Serie Di Maccio Capatonda ' ,
' description ' : ' md5:72fbffe1f77ccf4e90757dd4e3216153 ' ,
} ,
' playlist_count ' : 2 ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
# program page: specific season
' url ' : ' http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2 ' ,
' info_dict ' : {
' id ' : ' 4deeb5d8-f272-490c-bde2-ff8d261c6dd1 ' ,
' title ' : ' Mario - Una Serie Di Maccio Capatonda - Stagione 2 ' ,
} ,
' playlist_count ' : 34 ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
# playlist page + redirect
' url ' : ' http://www.mtv.it/playlist/sexy-videos/ilctal ' ,
' info_dict ' : {
' id ' : ' dee8f9ee-756d-493b-bf37-16d1d2783359 ' ,
' title ' : ' Sexy Videos ' ,
} ,
' playlist_mincount ' : 145 ,
' params ' : {
' skip_download ' : True ,
} ,
} ]
_GEO_COUNTRIES = [ ' IT ' ]
_FEED_URL = ' http://www.mtv.it/feeds/triforce/manifest/v8 '
def _get_entries ( self , title , url ) :
while True :
pg = self . _search_regex ( r ' /( \ d+)$ ' , url , ' entries ' , ' 1 ' )
2024-06-12 01:09:58 +02:00
entries = self . _download_json ( url , title , f ' page { pg } ' )
2021-03-08 14:40:27 +01:00
url = try_get (
2024-06-12 01:09:58 +02:00
entries , lambda x : x [ ' result ' ] [ ' nextPageURL ' ] , str )
2021-03-08 14:40:27 +01:00
entries = try_get (
entries , (
lambda x : x [ ' result ' ] [ ' data ' ] [ ' items ' ] ,
lambda x : x [ ' result ' ] [ ' data ' ] [ ' seasons ' ] ) ,
list )
for entry in entries or [ ] :
if entry . get ( ' canonicalURL ' ) :
yield self . url_result ( entry [ ' canonicalURL ' ] )
if not url :
break
def _real_extract ( self , url ) :
query = { ' url ' : url }
info_url = update_url_query ( self . _FEED_URL , query )
video_id = self . _match_id ( url )
info = self . _download_json ( info_url , video_id ) . get ( ' manifest ' )
redirect = try_get (
2024-06-12 01:09:58 +02:00
info , lambda x : x [ ' newLocation ' ] [ ' url ' ] , str )
2021-03-08 14:40:27 +01:00
if redirect :
return self . url_result ( redirect )
title = info . get ( ' title ' )
video_id = try_get (
2024-06-12 01:09:58 +02:00
info , lambda x : x [ ' reporting ' ] [ ' itemId ' ] , str )
2021-03-08 14:40:27 +01:00
parent_id = try_get (
2024-06-12 01:09:58 +02:00
info , lambda x : x [ ' reporting ' ] [ ' parentId ' ] , str )
2021-03-08 14:40:27 +01:00
playlist_url = current_url = None
for z in ( info . get ( ' zones ' ) or { } ) . values ( ) :
if z . get ( ' moduleName ' ) in ( ' INTL_M304 ' , ' INTL_M209 ' ) :
info_url = z . get ( ' feed ' )
if z . get ( ' moduleName ' ) in ( ' INTL_M308 ' , ' INTL_M317 ' ) :
playlist_url = playlist_url or z . get ( ' feed ' )
if z . get ( ' moduleName ' ) in ( ' INTL_M300 ' , ) :
current_url = current_url or z . get ( ' feed ' )
if not info_url :
raise ExtractorError ( ' No info found ' )
if video_id == parent_id :
video_id = self . _search_regex (
r ' ([^ \ /]+)/[^ \ /]+$ ' , info_url , ' video_id ' )
info = self . _download_json ( info_url , video_id , ' Show infos ' )
info = try_get ( info , lambda x : x [ ' result ' ] [ ' data ' ] , dict )
title = title or try_get (
info , (
lambda x : x [ ' title ' ] ,
lambda x : x [ ' headline ' ] ) ,
2024-06-12 01:09:58 +02:00
str )
description = try_get ( info , lambda x : x [ ' content ' ] , str )
2021-03-08 14:40:27 +01:00
if current_url :
season = try_get (
self . _download_json ( playlist_url , video_id , ' Seasons info ' ) ,
lambda x : x [ ' result ' ] [ ' data ' ] , dict )
current = try_get (
2024-06-12 01:09:58 +02:00
season , lambda x : x [ ' currentSeason ' ] , str )
2021-03-08 14:40:27 +01:00
seasons = try_get (
season , lambda x : x [ ' seasons ' ] , list ) or [ ]
if current in [ s . get ( ' eTitle ' ) for s in seasons ] :
playlist_url = current_url
title = re . sub (
r ' [-|] \ s*(?:mtv \ s*italia|programma|playlist) ' ,
' ' , title , flags = re . IGNORECASE ) . strip ( )
return self . playlist_result (
self . _get_entries ( title , playlist_url ) ,
video_id , title , description )