mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-02-27 02:51:30 +01:00
[ie/instagram] Add app_id
extractor-arg (#12359)
Authored by: chrisellsworth
This commit is contained in:
parent
65c3c58c0a
commit
a90641c836
@ -1812,6 +1812,9 @@ #### hotstar
|
|||||||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||||
|
|
||||||
|
#### instagram
|
||||||
|
* `app_id`: The value of the `X-IG-App-ID` header used for API requests. Default is the web app ID, `936619743392459`
|
||||||
|
|
||||||
#### niconicochannelplus
|
#### niconicochannelplus
|
||||||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||||
|
|
||||||
|
@ -28,7 +28,8 @@
|
|||||||
|
|
||||||
def _pk_to_id(media_id):
|
def _pk_to_id(media_id):
|
||||||
"""Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
|
"""Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
|
||||||
return encode_base_n(int(media_id.split('_')[0]), table=_ENCODING_CHARS)
|
pk = int(str(media_id).split('_')[0])
|
||||||
|
return encode_base_n(pk, table=_ENCODING_CHARS)
|
||||||
|
|
||||||
|
|
||||||
def _id_to_pk(shortcode):
|
def _id_to_pk(shortcode):
|
||||||
@ -42,8 +43,11 @@ class InstagramBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
_API_BASE_URL = 'https://i.instagram.com/api/v1'
|
||||||
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
_LOGIN_URL = 'https://www.instagram.com/accounts/login'
|
||||||
_API_HEADERS = {
|
|
||||||
'X-IG-App-ID': '936619743392459',
|
@property
|
||||||
|
def _api_headers(self):
|
||||||
|
return {
|
||||||
|
'X-IG-App-ID': self._configuration_arg('app_id', ['936619743392459'], ie_key=InstagramIE)[0],
|
||||||
'X-ASBD-ID': '198387',
|
'X-ASBD-ID': '198387',
|
||||||
'X-IG-WWW-Claim': '0',
|
'X-IG-WWW-Claim': '0',
|
||||||
'Origin': 'https://www.instagram.com',
|
'Origin': 'https://www.instagram.com',
|
||||||
@ -62,7 +66,7 @@ def _perform_login(self, username, password):
|
|||||||
|
|
||||||
login = self._download_json(
|
login = self._download_json(
|
||||||
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
|
f'{self._LOGIN_URL}/ajax/', None, note='Logging in', headers={
|
||||||
**self._API_HEADERS,
|
**self._api_headers,
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'X-CSRFToken': shared_data['config']['csrf_token'],
|
'X-CSRFToken': shared_data['config']['csrf_token'],
|
||||||
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
'X-Instagram-AJAX': shared_data['rollout_hash'],
|
||||||
@ -209,7 +213,7 @@ def _extract_product(self, product_info):
|
|||||||
def _get_comments(self, video_id):
|
def _get_comments(self, video_id):
|
||||||
comments_info = self._download_json(
|
comments_info = self._download_json(
|
||||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
|
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/comments/?can_support_threading=true&permalink_enabled=false', video_id,
|
||||||
fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._API_HEADERS) or {}
|
fatal=False, errnote='Comments extraction failed', note='Downloading comments info', headers=self._api_headers) or {}
|
||||||
|
|
||||||
comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
|
comment_data = traverse_obj(comments_info, ('edge_media_to_parent_comment', 'edges'), 'comments')
|
||||||
for comment_dict in comment_data or []:
|
for comment_dict in comment_data or []:
|
||||||
@ -402,14 +406,14 @@ def _real_extract(self, url):
|
|||||||
info = traverse_obj(self._download_json(
|
info = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
f'{self._API_BASE_URL}/media/{_id_to_pk(video_id)}/info/', video_id,
|
||||||
fatal=False, errnote='Video info extraction failed',
|
fatal=False, errnote='Video info extraction failed',
|
||||||
note='Downloading video info', headers=self._API_HEADERS), ('items', 0))
|
note='Downloading video info', headers=self._api_headers), ('items', 0))
|
||||||
if info:
|
if info:
|
||||||
media.update(info)
|
media.update(info)
|
||||||
return self._extract_product(media)
|
return self._extract_product(media)
|
||||||
|
|
||||||
api_check = self._download_json(
|
api_check = self._download_json(
|
||||||
f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
|
f'{self._API_BASE_URL}/web/get_ruling_for_content/?content_type=MEDIA&target_id={_id_to_pk(video_id)}',
|
||||||
video_id, headers=self._API_HEADERS, fatal=False, note='Setting up session', errnote=False) or {}
|
video_id, headers=self._api_headers, fatal=False, note='Setting up session', errnote=False) or {}
|
||||||
csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
|
csrf_token = self._get_cookies('https://www.instagram.com').get('csrftoken')
|
||||||
|
|
||||||
if not csrf_token:
|
if not csrf_token:
|
||||||
@ -429,7 +433,7 @@ def _real_extract(self, url):
|
|||||||
general_info = self._download_json(
|
general_info = self._download_json(
|
||||||
'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
|
'https://www.instagram.com/graphql/query/', video_id, fatal=False, errnote=False,
|
||||||
headers={
|
headers={
|
||||||
**self._API_HEADERS,
|
**self._api_headers,
|
||||||
'X-CSRFToken': csrf_token or '',
|
'X-CSRFToken': csrf_token or '',
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
@ -727,7 +731,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
videos = traverse_obj(self._download_json(
|
videos = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||||
display_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels')
|
display_id, errnote=False, fatal=False, headers=self._api_headers), 'reels')
|
||||||
if not videos:
|
if not videos:
|
||||||
self.raise_login_required('You need to log in to access this content')
|
self.raise_login_required('You need to log in to access this content')
|
||||||
user_info = traverse_obj(videos, (user_id, 'user', {dict})) or {}
|
user_info = traverse_obj(videos, (user_id, 'user', {dict})) or {}
|
||||||
|
Loading…
Reference in New Issue
Block a user