- adding alura support

This commit is contained in:
Hugo Alves De Azevedo 2020-08-28 10:02:31 -03:00
parent 74dc105210
commit 4d75c363a7
2 changed files with 81 additions and 23 deletions

View File

@ -6,21 +6,15 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str,
compat_urlparse, compat_urlparse,
) )
from ..utils import ( from ..utils import (
dict_get,
ExtractorError,
float_or_none,
int_or_none,
parse_duration,
qualities,
srt_subtitles_timecode,
try_get,
update_url_query,
urlencode_postdata, urlencode_postdata,
urljoin,
int_or_none,
clean_html,
ExtractorError
) )
@ -28,17 +22,26 @@ class AluraIE(InfoExtractor):
_VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<course_name>[^/]+)/task/(?P<id>\d+)' _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<course_name>[^/]+)/task/(?P<id>\d+)'
_LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm' _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
_VIDEO_URL = 'https://cursos.alura.com.br/course/%s/task/%s/video' _VIDEO_URL = 'https://cursos.alura.com.br/course/%s/task/%s/video'
_TEST = { _NETRC_MACHINE = 'alura'
'url': 'https://cursos.alura.com.br/course/design-patterns-python/task/9651', _TESTS = [{
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60095',
'info_dict': { 'info_dict': {
'id': '9651', 'id': '60095',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Video title goes here', 'title': 'Referências, ref-set e alter'
'thumbnail': r're:^https?://.*\.jpg$', },
# TODO more properties, either as: 'skip': 'Requires alura account credentials',
},
{
# URL without video
'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs/task/60098',
'only_matching': True,
},
{
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
'only_matching': True,
} }
} ]
def _real_extract(self, url): def _real_extract(self, url):
@ -50,9 +53,9 @@ def _real_extract(self, url):
if video_dict: if video_dict:
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_title = self._search_regex( video_title = clean_html(self._search_regex(
r'<span[^>]+class=(["\'])task-body-header-title-text\1[^>]*>(?P<title>[^<]+)', r'<span[^>]+class=(["\'])task-body-header-title-text\1[^>]*>(?P<title>[^<]+)',
webpage, 'title', group='title') webpage, 'title', group='title'))
formats = [] formats = []
for video_obj in video_dict: for video_obj in video_dict:
@ -60,9 +63,15 @@ def _real_extract(self, url):
video_format = self._extract_m3u8_formats( video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native', video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in video_format:
m = re.search(r'^[\w \W]*-(?P<res>\w*).mp4[\W \w]*', f['url'])
if m:
if not f.get('height'):
f['height'] = int('720' if m.group('res') == 'hd' else '480')
formats.extend(video_format) formats.extend(video_format)
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
@ -111,9 +120,55 @@ def is_logged(webpage):
if not post_url.startswith('http'): if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
self._download_webpage( response = self._download_webpage(
post_url, None, 'Logging in', post_url, None, 'Logging in',
data=urlencode_postdata(login_form), data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'}) headers={'Content-Type': 'application/x-www-form-urlencoded'})
if not is_logged(response):
error = self._html_search_regex(
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
class AluraCourseIE(AluraIE):
_VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P<id>[^/]+)'
_LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm'
_NETRC_MACHINE = 'aluracourse'
_TESTS = [{
'url': 'https://cursos.alura.com.br/course/clojure-mutabilidade-com-atoms-e-refs',
'only_matching': True,
}]
def _real_extract(self, url):
course_path = self._match_id(url)
webpage = self._download_webpage(url, course_path)
course_title = self._search_regex(
r'<h1.*?>(.*?)<strong>(?P<course_title>.*?)</strong></h[0-9]>', webpage,
'course title', default=course_path, group='course_title')
entries = []
if webpage:
for path in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])courseSectionList-section[" ])(?=[^>]* href="([^"]*))', webpage):
page_url = urljoin(url, path)
section_path = self._download_webpage(page_url, course_path)
for path_video in re.findall(r'<a\b(?=[^>]* class="[^"]*(?<=[" ])task-menu-nav-item-link-VIDEO[" ])(?=[^>]* href="([^"]*))', section_path):
chapter = clean_html(self._search_regex(r'<h3[^>]+class=(["\'])task-menu-section-title-text\1[^>]*>(?P<chapter>[^<]+)',section_path, 'chapter', group='chapter'))
chapter_number = int_or_none(self._search_regex(r'<span[^>]+class=(["\'])task-menu-section-title-number[^>]*>(.*?)<strong>(?P<chapter_number>[^<]+)</strong>',section_path, 'chapter number', group='chapter_number'))
video_url = urljoin(url, path_video)
entry = {
'_type': 'url_transparent',
'id': self._match_id(video_url),
'url': video_url,
'id_key': self.ie_key(),
'chapter': chapter,
'chapter_number': chapter_number
}
entries.append(entry)
return self.playlist_result(entries, course_path, course_title)

View File

@ -36,7 +36,10 @@
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .alura import AluraIE from .alura import (
AluraIE,
AluraCourseIE
)
from .amcnetworks import AMCNetworksIE from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE from .animeondemand import AnimeOnDemandIE