diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 6fcc4ac932..554a570059 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -7,6 +7,7 @@
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
+ compat_str,
compat_parse_qs,
compat_urlparse,
)
@@ -15,6 +16,7 @@
int_or_none,
float_or_none,
parse_iso8601,
+ try_get,
smuggle_url,
str_or_none,
strip_jsonp,
@@ -113,6 +115,13 @@ class BiliBiliIE(InfoExtractor):
# new BV video id format
'url': 'https://www.bilibili.com/video/BV1JE411F741',
'only_matching': True,
+ }, {
+ # Anthology
+ 'url': 'https://www.bilibili.com/video/BV1bK411W797',
+ 'info_dict': {
+ 'id': 'BV1bK411W797',
+ },
+ 'playlist_count': 17,
}]
_APP_KEY = 'iVGUTjsxvpLeuDCf'
@@ -139,9 +148,19 @@ def _real_extract(self, url):
page_id = mobj.group('page')
webpage = self._download_webpage(url, video_id)
+ # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
+ # If the video has no page argument, check to see if it's an anthology
+ if page_id is None:
+ if not self._downloader.params.get('noplaylist'):
+ r = self._extract_anthology_entries(bv_id, video_id, webpage)
+ if r is not None:
+ self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
+ return r
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
if 'anime/' not in url:
cid = self._search_regex(
- r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
+ r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid',
default=None
) or self._search_regex(
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
@@ -224,7 +243,18 @@ def _real_extract(self, url):
title = self._html_search_regex(
(r'
]+\btitle=(["\'])(?P(?:(?!\1).)+)\1',
r'(?s)
]*>(?P.+?)
'), webpage, 'title',
- group='title') + ('_p' + str(page_id) if page_id is not None else '')
+ group='title')
+
+ # Get part title for anthologies
+ if page_id is not None:
+ # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
+ part_title = try_get(
+ self._download_json(
+ "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
+ video_id, note='Extracting videos in anthology'),
+ lambda x: x['data'][int(page_id) - 1]['part'])
+ title = part_title or title
+
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
r'