Sitemaps: Add XML sitemaps functionality to WordPress.

While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata.

See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details.

This feature exposes the sitemap index via `/wp-sitemap.xml` and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin.

This change also introduces a new `esc_xml()` function to escape strings for output in XML, as well as XML support to `wp_kses_normalize_entities()`.

Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry.
Fixes #50117.
See #3670. See #19998.

Built from https://develop.svn.wordpress.org/trunk@48072


git-svn-id: http://core.svn.wordpress.org/trunk@47839 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Pascal Birchler 2020-06-17 15:24:07 +00:00
parent 689fdc6319
commit 609dd1d14f
16 changed files with 1997 additions and 9 deletions

View File

@ -509,6 +509,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
$redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path;
}
// Remove trailing slash for sitemaps requests.
if ( ! empty( get_query_var( 'sitemap' ) ) ) {
$redirect['path'] = untrailingslashit( $redirect['path'] );
}
$redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path'];
}
@ -651,6 +656,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
$redirect['path'] = trailingslashit( $redirect['path'] );
}
// Remove trailing slash for sitemaps requests.
if ( ! empty( get_query_var( 'sitemap' ) ) || ! empty( get_query_var( 'sitemap-stylesheet' ) ) ) {
$redirect['path'] = untrailingslashit( $redirect['path'] );
}
// Strip multiple slashes out of the URL.
if ( strpos( $redirect['path'], '//' ) > -1 ) {
$redirect['path'] = preg_replace( '|/+|', '/', $redirect['path'] );

View File

@ -456,6 +456,9 @@ add_action( 'rest_api_init', 'register_initial_settings', 10 );
add_action( 'rest_api_init', 'create_initial_rest_routes', 99 );
add_action( 'parse_request', 'rest_api_loaded' );
// Sitemaps actions.
add_action( 'init', 'wp_sitemaps_get_server' );
/**
* Filters formerly mixed into wp-includes.
*/

View File

@ -935,6 +935,7 @@ function seems_utf8( $str ) {
* ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
*
* @since 1.2.2
* @since 5.5.0 `$quote_style` also accepts '`ENT_XML1`.
* @access private
*
* @staticvar string $_charset
@ -942,7 +943,10 @@ function seems_utf8( $str ) {
* @param string $string The text which is to be encoded.
* @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT,
* both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES.
* Also compatible with old values; converting single quotes if set to 'single',
* Converts single and double quotes, as well as converting HTML
* named entities (that are not also XML named entities) to their
* code points if set to ENT_XML1. Also compatible with old values;
* converting single quotes if set to 'single',
* double if set to 'double' or both if otherwise set.
* Default is ENT_NOQUOTES.
* @param false|string $charset Optional. The character encoding of the string. Default is false.
@ -964,7 +968,9 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
// Account for the previous behaviour of the function when the $quote_style is not an accepted value.
if ( empty( $quote_style ) ) {
$quote_style = ENT_NOQUOTES;
} elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
} elseif ( ENT_XML1 === $quote_style ) {
$quote_style = ENT_QUOTES | ENT_XML1;
} elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) {
$quote_style = ENT_QUOTES;
}
@ -994,7 +1000,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
if ( ! $double_encode ) {
// Guarantee every &entity; is valid, convert &garbage; into &garbage;
// This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable.
$string = wp_kses_normalize_entities( $string );
$string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' );
}
$string = htmlspecialchars( $string, $quote_style, $charset, $double_encode );
@ -4536,6 +4542,63 @@ function esc_textarea( $text ) {
return apply_filters( 'esc_textarea', $safe_text, $text );
}
/**
* Escaping for XML blocks.
*
* @since 5.5.0
*
* @param string $text Text to escape.
* @return string Escaped text.
*/
function esc_xml( $text ) {
$safe_text = wp_check_invalid_utf8( $text );
$cdata_regex = '\<\!\[CDATA\[.*?\]\]\>';
$regex = <<<EOF
/
(?=.*?{$cdata_regex}) # lookahead that will match anything followed by a CDATA Section
(?<non_cdata_followed_by_cdata>(.*?)) # the "anything" matched by the lookahead
(?<cdata>({$cdata_regex})) # the CDATA Section matched by the lookahead
| # alternative
(?<non_cdata>(.*)) # non-CDATA Section
/sx
EOF;
$safe_text = (string) preg_replace_callback(
$regex,
static function( $matches ) {
if ( ! $matches[0] ) {
return '';
}
if ( ! empty( $matches['non_cdata'] ) ) {
// escape HTML entities in the non-CDATA Section.
return _wp_specialchars( $matches['non_cdata'], ENT_XML1 );
}
// Return the CDATA Section unchanged, escape HTML entities in the rest.
return _wp_specialchars( $matches['non_cdata_followed_by_cdata'], ENT_XML1 ) . $matches['cdata'];
},
$safe_text
);
/**
* Filters a string cleaned and escaped for output in XML.
*
* Text passed to esc_xml() is stripped of invalid or special characters
* before output. HTML named character references are converted to their
* equivalent code points.
*
* @since 5.5.0
*
* @param string $safe_text The text after it has been escaped.
* @param string $text The text prior to being escaped.
*/
return apply_filters( 'esc_xml', $safe_text, $text );
}
/**
* Escape an HTML tag name.
*

View File

@ -47,7 +47,7 @@ if ( ! defined( 'CUSTOM_TAGS' ) ) {
// Ensure that these variables are added to the global namespace
// (e.g. if using namespaces / autoload in the current PHP environment).
global $allowedposttags, $allowedtags, $allowedentitynames;
global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames;
if ( ! CUSTOM_TAGS ) {
/**
@ -704,6 +704,18 @@ if ( ! CUSTOM_TAGS ) {
'there4',
);
/**
* @var string[] $allowedxmlentitynames Array of KSES allowed XML entitity names.
* @since 5.5.0
*/
$allowedxmlnamedentities = array(
'amp',
'lt',
'gt',
'apos',
'quot',
);
$allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags );
} else {
$allowedtags = wp_kses_array_lc( $allowedtags );
@ -1745,17 +1757,27 @@ function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) {
* This function normalizes HTML entities. It will convert `AT&T` to the correct
* `AT&amp;T`, `&#00058;` to `&#58;`, `&#XYZZY;` to `&amp;#XYZZY;` and so on.
*
* @since 1.0.0
* When `$context` is set to 'xml', HTML entities are converted to their code points. For
* example, `AT&T&hellip;&#XYZZY;` is converted to `AT&amp;T…&amp;#XYZZY;`.
*
* @param string $string Content to normalize entities.
* @since 1.0.0
* @since 5.5.0 Added `$context` parameter.
*
* @param string $string Content to normalize entities.
* @param string $context Context for normalization. Can be either 'html' or 'xml'.
* Default 'html'.
* @return string Content with normalized entities.
*/
function wp_kses_normalize_entities( $string ) {
function wp_kses_normalize_entities( $string, $context = 'html' ) {
// Disarm all entities by converting & to &amp;
$string = str_replace( '&', '&amp;', $string );
// Change back the allowed entities in our entity whitelist.
$string = preg_replace_callback( '/&amp;([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string );
if ( 'xml' === $context ) {
$string = preg_replace_callback( '/&amp;([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $string );
} else {
$string = preg_replace_callback( '/&amp;([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string );
}
$string = preg_replace_callback( '/&amp;#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string );
$string = preg_replace_callback( '/&amp;#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string );
@ -1786,6 +1808,39 @@ function wp_kses_named_entities( $matches ) {
return ( ! in_array( $i, $allowedentitynames, true ) ) ? "&amp;$i;" : "&$i;";
}
/**
* Callback for `wp_kses_normalize_entities()` regular expression.
*
* This function only accepts valid named entity references, which are finite,
* case-sensitive, and highly scrutinized by XML validators. HTML named entity
* references are converted to their code points.
*
* @since 5.5.0
*
* @global array $allowedentitynames
* @global array $allowedxmlnamedentities
*
* @param array $matches preg_replace_callback() matches array.
* @return string Correctly encoded entity.
*/
function wp_kses_xml_named_entities( $matches ) {
global $allowedentitynames, $allowedxmlnamedentities;
if ( empty( $matches[1] ) ) {
return '';
}
$i = $matches[1];
if ( in_array( $i, $allowedxmlnamedentities, true ) ) {
return "&$i;";
} elseif ( in_array( $i, $allowedentitynames, true ) ) {
return html_entity_decode( "&$i;", ENT_HTML5 );
}
return "&amp;$i;";
}
/**
* Callback for `wp_kses_normalize_entities()` regular expression.
*

119
wp-includes/sitemaps.php Normal file
View File

@ -0,0 +1,119 @@
<?php
/**
* Sitemaps: Public functions
*
* This file contains a variety of public functions developers can use to interact with
* the XML Sitemaps API.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Retrieves the current Sitemaps server instance.
*
* @since 5.5.0
*
* @return WP_Sitemaps|null Sitemaps instance, or null if sitemaps are disabled.
*/
function wp_sitemaps_get_server() {
/**
* Global Core Sitemaps instance.
*
* @since 5.5.0
*
* @var WP_Sitemaps $wp_sitemaps
*/
global $wp_sitemaps;
$is_enabled = (bool) get_option( 'blog_public' );
/**
* Filters whether XML Sitemaps are enabled or not.
*
* @since 5.5.0
*
* @param bool $is_enabled Whether XML Sitemaps are enabled or not. Defaults to true for public sites.
*/
$is_enabled = (bool) apply_filters( 'wp_sitemaps_is_enabled', $is_enabled );
if ( ! $is_enabled ) {
return null;
}
// If there isn't a global instance, set and bootstrap the sitemaps system.
if ( empty( $wp_sitemaps ) ) {
$wp_sitemaps = new WP_Sitemaps();
$wp_sitemaps->init();
/**
* Fires when initializing the Sitemaps object.
*
* Additional sitemaps should be registered on this hook.
*
* @since 5.5.0
*
* @param WP_Sitemaps $sitemaps Server object.
*/
do_action( 'wp_sitemaps_init', $wp_sitemaps );
}
return $wp_sitemaps;
}
/**
* Gets a list of sitemap providers.
*
* @since 5.5.0
*
* @return array $sitemaps A list of registered sitemap providers.
*/
function wp_get_sitemaps() {
$sitemaps = wp_sitemaps_get_server();
if ( ! $sitemaps ) {
return array();
}
return $sitemaps->registry->get_sitemaps();
}
/**
* Registers a new sitemap provider.
*
* @since 5.5.0
*
* @param string $name Unique name for the sitemap provider.
* @param WP_Sitemaps_Provider $provider The `Sitemaps_Provider` instance implementing the sitemap.
* @return bool Returns true if the sitemap was added. False on failure.
*/
function wp_register_sitemap( $name, WP_Sitemaps_Provider $provider ) {
$sitemaps = wp_sitemaps_get_server();
if ( ! $sitemaps ) {
return false;
}
return $sitemaps->registry->add_sitemap( $name, $provider );
}
/**
* Gets the maximum number of URLs for a sitemap.
*
* @since 5.5.0
*
* @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user').
* @return int The maximum number of URLs.
*/
function wp_sitemaps_get_max_urls( $object_type ) {
/**
* Filters the maximum number of URLs displayed on a sitemap.
*
* @since 5.5.0
*
* @param int $max_urls The maximum number of URLs included in a sitemap. Default 2000.
* @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user').
*/
return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type );
}

View File

@ -0,0 +1,82 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Index class.
*
* Generates the sitemap index.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Class WP_Sitemaps_Index.
* Builds the sitemap index page that lists the links to all of the sitemaps.
*
* @since 5.5.0
*/
class WP_Sitemaps_Index {
/**
* The main registry of supported sitemaps.
*
* @since 5.5.0
* @var WP_Sitemaps_Registry
*/
protected $registry;
/**
* WP_Sitemaps_Index constructor.
*
* @since 5.5.0
*
* @param WP_Sitemaps_Registry $registry Sitemap provider registry.
*/
public function __construct( WP_Sitemaps_Registry $registry ) {
$this->registry = $registry;
}
/**
* Gets a sitemap list for the index.
*
* @since 5.5.0
*
* @return array List of all sitemaps.
*/
public function get_sitemap_list() {
$sitemaps = array();
$providers = $this->registry->get_sitemaps();
/* @var WP_Sitemaps_Provider $provider */
foreach ( $providers as $provider ) {
$sitemap_entries = $provider->get_sitemap_entries();
// Prevent issues with array_push and empty arrays on PHP < 7.3.
if ( ! $sitemap_entries ) {
continue;
}
// Using array_push is more efficient than array_merge in a loop.
array_push( $sitemaps, ...$sitemap_entries );
}
return $sitemaps;
}
/**
* Builds the URL for the sitemap index.
*
* @since 5.5.0
*
* @return string The sitemap index url.
*/
public function get_index_url() {
/* @var WP_Rewrite $wp_rewrite */
global $wp_rewrite;
if ( ! $wp_rewrite->using_permalinks() ) {
return add_query_arg( 'sitemap', 'index', home_url( '/' ) );
}
return home_url( '/wp-sitemap.xml' );
}
}

View File

@ -0,0 +1,190 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Provider class
*
* This class is a base class for other sitemap providers to extend and contains shared functionality.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Class WP_Sitemaps_Provider.
*
* @since 5.5.0
*/
abstract class WP_Sitemaps_Provider {
/**
* Provider name.
*
* This will also be used as the public-facing name in URLs.
*
* @since 5.5.0
*
* @var string
*/
protected $name = '';
/**
* Object type name (e.g. 'post', 'term', 'user').
*
* @since 5.5.0
*
* @var string
*/
protected $object_type = '';
/**
* Gets a URL list for a sitemap.
*
* @since 5.5.0
*
* @param int $page_num Page of results.
* @param string $object_subtype Optional. Object subtype name. Default empty.
* @return array $url_list Array of URLs for a sitemap.
*/
abstract public function get_url_list( $page_num, $object_subtype = '' );
/**
* Gets the max number of pages available for the object type.
*
* @since 5.5.0
*
* @param string $object_subtype Optional. Object subtype. Default empty.
* @return int Total number of pages.
*/
abstract public function get_max_num_pages( $object_subtype = '' );
/**
* Gets data about each sitemap type.
*
* @since 5.5.0
*
* @return array List of sitemap types including object subtype name and number of pages.
*/
public function get_sitemap_type_data() {
$sitemap_data = array();
$object_subtypes = $this->get_object_subtypes();
// If there are no object subtypes, include a single sitemap for the
// entire object type.
if ( empty( $object_subtypes ) ) {
$sitemap_data[] = array(
'name' => '',
'pages' => $this->get_max_num_pages(),
);
return $sitemap_data;
}
// Otherwise, include individual sitemaps for every object subtype.
foreach ( $object_subtypes as $object_subtype_name => $data ) {
$object_subtype_name = (string) $object_subtype_name;
$sitemap_data[] = array(
'name' => $object_subtype_name,
'pages' => $this->get_max_num_pages( $object_subtype_name ),
);
}
return $sitemap_data;
}
/**
* Lists sitemap pages exposed by this provider.
*
* The returned data is used to populate the sitemap entries of the index.
*
* @since 5.5.0
*
* @return array List of sitemaps.
*/
public function get_sitemap_entries() {
$sitemaps = array();
$sitemap_types = $this->get_sitemap_type_data();
foreach ( $sitemap_types as $type ) {
for ( $page = 1; $page <= $type['pages']; $page ++ ) {
$loc = $this->get_sitemap_url( $type['name'], $page );
$sitemap_entry = array(
'loc' => $this->get_sitemap_url( $type['name'], $page ),
);
/**
* Filters the sitemap entry for the sitemap index.
*
* @since 5.5.0
*
* @param array $sitemap_entry Sitemap entry for the post.
* @param string $object_type Object empty name.
* @param string $object_subtype Object subtype name.
* Empty string if the object type does not support subtypes.
* @param string $page Page of results.
*/
$sitemap_entry = apply_filters( 'wp_sitemaps_index_entry', $sitemap_entry, $this->object_type, $type['name'], $page );
$sitemaps[] = $sitemap_entry;
}
}
return $sitemaps;
}
/**
* Gets the URL of a sitemap entry.
*
* @since 5.5.0
*
* @param string $name The name of the sitemap.
* @param int $page The page of the sitemap.
* @return string The composed URL for a sitemap entry.
*/
public function get_sitemap_url( $name, $page ) {
/* @var WP_Rewrite $wp_rewrite */
global $wp_rewrite;
if ( ! $wp_rewrite->using_permalinks() ) {
return add_query_arg(
// Accounts for cases where name is not included, ex: sitemaps-users-1.xml.
array_filter(
array(
'sitemap' => $this->name,
'sitemap-subtype' => $name,
'paged' => $page,
)
),
home_url( '/' )
);
}
$basename = sprintf(
'/wp-sitemap-%1$s.xml',
implode(
'-',
// Accounts for cases where name is not included, ex: sitemaps-users-1.xml.
array_filter(
array(
$this->name,
$name,
(string) $page,
)
)
)
);
return home_url( $basename );
}
/**
* Returns the list of supported object subtypes exposed by the provider.
*
* @since 5.5.0
*
* @return array List of object subtypes objects keyed by their name.
*/
public function get_object_subtypes() {
return array();
}
}

View File

@ -0,0 +1,87 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Registry class
*
* Handles registering sitemaps.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Class WP_Sitemaps_Registry.
*
* @since 5.5.0
*/
class WP_Sitemaps_Registry {
/**
* Registered sitemaps.
*
* @since 5.5.0
*
* @var array Array of registered sitemaps.
*/
private $sitemaps = array();
/**
* Maximum number of sitemaps to include in an index.
*
* @sincee 5.5.0
*
* @var int Maximum number of sitemaps.
*/
private $max_sitemaps = 50000;
/**
* Adds a sitemap with route to the registry.
*
* @since 5.5.0
*
* @param string $name Name of the sitemap.
* @param WP_Sitemaps_Provider $provider Instance of a WP_Sitemaps_Provider.
* @return bool True if the sitemap was added, false if it is already registered.
*/
public function add_sitemap( $name, WP_Sitemaps_Provider $provider ) {
if ( isset( $this->sitemaps[ $name ] ) ) {
return false;
}
$this->sitemaps[ $name ] = $provider;
return true;
}
/**
* Returns a single registered sitemaps provider.
*
* @since 5.5.0
*
* @param string $name Sitemap provider name.
* @return WP_Sitemaps_Provider|null Sitemaps provider if it exists, null otherwise.
*/
public function get_sitemap( $name ) {
if ( ! isset( $this->sitemaps[ $name ] ) ) {
return null;
}
return $this->sitemaps[ $name ];
}
/**
* Lists all registered sitemaps.
*
* @since 5.5.0
*
* @return array List of sitemaps.
*/
public function get_sitemaps() {
$total_sitemaps = count( $this->sitemaps );
if ( $total_sitemaps > $this->max_sitemaps ) {
return array_slice( $this->sitemaps, 0, $this->max_sitemaps, true );
}
return $this->sitemaps;
}
}

View File

@ -0,0 +1,269 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Renderer class
*
* Responsible for rendering Sitemaps data to XML in accordance with sitemap protocol.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Class WP_Sitemaps_Renderer
*
* @since 5.5.0
*/
class WP_Sitemaps_Renderer {
/**
* XSL stylesheet for styling a sitemap for web browsers.
*
* @since 5.5.0
*
* @var string
*/
protected $stylesheet = '';
/**
* XSL stylesheet for styling a sitemap for web browsers.
*
* @since 5.5.0
*
* @var string
*/
protected $stylesheet_index = '';
/**
* WP_Sitemaps_Renderer constructor.
*
* @since 5.5.0
*/
public function __construct() {
$stylesheet_url = $this->get_sitemap_stylesheet_url();
if ( $stylesheet_url ) {
$this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_url ) . '" ?>';
}
$stylesheet_index_url = $this->get_sitemap_index_stylesheet_url();
if ( $stylesheet_index_url ) {
$this->stylesheet_index = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_index_url ) . '" ?>';
}
}
/**
* Gets the URL for the sitemap stylesheet.
*
* @since 5.5.0
*
* @return string The sitemap stylesheet url.
*/
public function get_sitemap_stylesheet_url() {
/* @var WP_Rewrite $wp_rewrite */
global $wp_rewrite;
$sitemap_url = home_url( '/wp-sitemap.xsl' );
if ( ! $wp_rewrite->using_permalinks() ) {
$sitemap_url = add_query_arg( 'sitemap-stylesheet', 'sitemap', home_url( '/' ) );
}
/**
* Filters the URL for the sitemap stylesheet.
*
* If a falsy value is returned, no stylesheet will be used and
* the "raw" XML of the sitemap will be displayed.
*
* @since 5.5.0
*
* @param string $sitemap_url Full URL for the sitemaps xsl file.
*/
return apply_filters( 'wp_sitemaps_stylesheet_url', $sitemap_url );
}
/**
* Gets the URL for the sitemap index stylesheet.
*
* @since 5.5.0
*
* @return string The sitemap index stylesheet url.
*/
public function get_sitemap_index_stylesheet_url() {
/* @var WP_Rewrite $wp_rewrite */
global $wp_rewrite;
$sitemap_url = home_url( '/wp-sitemap-index.xsl' );
if ( ! $wp_rewrite->using_permalinks() ) {
$sitemap_url = add_query_arg( 'sitemap-stylesheet', 'index', home_url( '/' ) );
}
/**
* Filters the URL for the sitemap index stylesheet.
*
* If a falsy value is returned, no stylesheet will be used and
* the "raw" XML of the sitemap index will be displayed.
*
* @since 5.5.0
*
* @param string $sitemap_url Full URL for the sitemaps index xsl file.
*/
return apply_filters( 'wp_sitemaps_stylesheet_index_url', $sitemap_url );
}
/**
* Renders a sitemap index.
*
* @since 5.5.0
*
* @param array $sitemaps Array of sitemap URLs.
*/
public function render_index( $sitemaps ) {
header( 'Content-type: application/xml; charset=UTF-8' );
$this->check_for_simple_xml_availability();
$index_xml = $this->get_sitemap_index_xml( $sitemaps );
if ( ! empty( $index_xml ) ) {
// All output is escaped within get_sitemap_index_xml().
// phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped
echo $index_xml;
}
}
/**
* Gets XML for a sitemap index.
*
* @since 5.5.0
*
* @param array $sitemaps Array of sitemap URLs.
* @return string|false A well-formed XML string for a sitemap index. False on error.
*/
public function get_sitemap_index_xml( $sitemaps ) {
$sitemap_index = new SimpleXMLElement(
sprintf(
'%1$s%2$s%3$s',
'<?xml version="1.0" encoding="UTF-8" ?>',
$this->stylesheet_index,
'<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" />'
)
);
foreach ( $sitemaps as $entry ) {
$sitemap = $sitemap_index->addChild( 'sitemap' );
// Add each element as a child node to the <sitemap> entry.
foreach ( $entry as $name => $value ) {
if ( 'loc' === $name ) {
$sitemap->addChild( $name, esc_url( $value ) );
} elseif ( 'lastmod' === $name ) {
$sitemap->addChild( $name, esc_xml( $value ) );
} else {
_doing_it_wrong(
__METHOD__,
/* translators: %s: list of element names */
sprintf(
__( 'Fields other than %s are not currently supported for the sitemap index.' ),
implode( ',', array( 'loc', 'lastmod' ) )
),
'5.5.0'
);
}
}
}
return $sitemap_index->asXML();
}
/**
* Renders a sitemap.
*
* @since 5.5.0
*
* @param array $url_list Array of URLs for a sitemap.
*/
public function render_sitemap( $url_list ) {
header( 'Content-type: application/xml; charset=UTF-8' );
$this->check_for_simple_xml_availability();
$sitemap_xml = $this->get_sitemap_xml( $url_list );
if ( ! empty( $sitemap_xml ) ) {
// All output is escaped within get_sitemap_xml().
// phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped
echo $sitemap_xml;
}
}
/**
* Gets XML for a sitemap.
*
* @since 5.5.0
*
* @param array $url_list Array of URLs for a sitemap.
* @return string|false A well-formed XML string for a sitemap index. False on error.
*/
public function get_sitemap_xml( $url_list ) {
$urlset = new SimpleXMLElement(
sprintf(
'%1$s%2$s%3$s',
'<?xml version="1.0" encoding="UTF-8" ?>',
$this->stylesheet,
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" />'
)
);
foreach ( $url_list as $url_item ) {
$url = $urlset->addChild( 'url' );
// Add each element as a child node to the <url> entry.
foreach ( $url_item as $name => $value ) {
if ( 'loc' === $name ) {
$url->addChild( $name, esc_url( $value ) );
} elseif ( in_array( $name, array( 'lastmod', 'changefreq', 'priority' ), true ) ) {
$url->addChild( $name, esc_xml( $value ) );
} else {
_doing_it_wrong(
__METHOD__,
/* translators: %s: list of element names */
sprintf(
__( 'Fields other than %s are not currently supported for sitemaps.' ),
implode( ',', array( 'loc', 'lastmod', 'changefreq', 'priority' ) )
),
'5.5.0'
);
}
}
}
return $urlset->asXML();
}
/**
* Checks for the availability of the SimpleXML extension and errors if missing.
*
* @since 5.5.0
*/
private function check_for_simple_xml_availability() {
if ( ! class_exists( 'SimpleXMLElement' ) ) {
add_filter(
'wp_die_handler',
static function () {
return '_xml_wp_die_handler';
}
);
wp_die(
sprintf(
/* translators: %s: SimpleXML */
esc_xml( __( 'Could not generate XML sitemap due to missing %s extension' ) ),
'SimpleXML'
),
esc_xml( __( 'WordPress &rsaquo; Error' ) ),
array(
'response' => 501, // "Not implemented".
)
);
}
}
}

View File

@ -0,0 +1,288 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Stylesheet class
*
* This class provides the XSL stylesheets to style all sitemaps.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Stylesheet provider class.
*
* @since 5.5.0
*/
class WP_Sitemaps_Stylesheet {
/**
* Renders the xsl stylesheet depending on whether its the sitemap index or not.
*
* @param string $type Stylesheet type. Either 'sitemap' or 'index'.
*/
public function render_stylesheet( $type ) {
header( 'Content-type: application/xml; charset=UTF-8' );
if ( 'sitemap' === $type ) {
// phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- All content escaped below.
echo $this->get_sitemap_stylesheet();
}
if ( 'index' === $type ) {
// phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- All content escaped below.
echo $this->get_sitemap_index_stylesheet();
}
exit;
}
/**
* Returns the escaped xsl for all sitemaps, except index.
*
* @since 5.5.0
*/
public function get_sitemap_stylesheet() {
$css = $this->get_stylesheet_css();
$title = esc_xml( __( 'XML Sitemap' ) );
$description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) );
$learn_more = sprintf(
'<a href="%s">%s</a>',
esc_url( __( 'https://www.sitemaps.org/' ) ),
esc_xml( __( 'Learn more about XML sitemaps.' ) )
);
$text = sprintf(
/* translators: %s: number of URLs. */
esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ),
'<xsl:value-of select="count( sitemap:urlset/sitemap:url )" />'
);
$lang = get_language_attributes( 'html' );
$url = esc_xml( __( 'URL' ) );
$lastmod = esc_xml( __( 'Last Modified' ) );
$changefreq = esc_xml( __( 'Change Frequency' ) );
$priority = esc_xml( __( 'Priority' ) );
$xsl_content = <<<XSL
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
exclude-result-prefixes="sitemap"
>
<xsl:output method="html" encoding="UTF-8" indent="yes"/>
<!--
Set variables for whether lastmod, changefreq or priority occur for any url in the sitemap.
We do this up front because it can be expensive in a large sitemap.
-->
<xsl:variable name="has-lastmod" select="count( /sitemap:urlset/sitemap:url/sitemap:lastmod )" />
<xsl:variable name="has-changefreq" select="count( /sitemap:urlset/sitemap:url/sitemap:changefreq )" />
<xsl:variable name="has-priority" select="count( /sitemap:urlset/sitemap:url/sitemap:priority )" />
<xsl:template match="/">
<html {$lang}>
<head>
<title>{$title}</title>
<style>{$css}</style>
</head>
<body>
<div id="sitemap__header">
<h1>{$title}</h1>
<p>{$description}</p>
<p>{$learn_more}</p>
</div>
<div id="sitemap__content">
<p class="text">{$text}</p>
<table id="sitemap__table">
<thead>
<tr>
<th class="loc">{$url}</th>
<xsl:if test="\$has-lastmod">
<th class="lastmod">{$lastmod}</th>
</xsl:if>
<xsl:if test="\$has-changefreq">
<th class="changefreq">{$changefreq}</th>
</xsl:if>
<xsl:if test="\$has-priority">
<th class="priority">{$priority}</th>
</xsl:if>
</tr>
</thead>
<tbody>
<xsl:for-each select="sitemap:urlset/sitemap:url">
<tr>
<td class="loc"><a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc" /></a></td>
<xsl:if test="\$has-lastmod">
<td class="lastmod"><xsl:value-of select="sitemap:lastmod" /></td>
</xsl:if>
<xsl:if test="\$has-changefreq">
<td class="changefreq"><xsl:value-of select="sitemap:changefreq" /></td>
</xsl:if>
<xsl:if test="\$has-priority">
<td class="priority"><xsl:value-of select="sitemap:priority" /></td>
</xsl:if>
</tr>
</xsl:for-each>
</tbody>
</table>
</div>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
XSL;
/**
* Filters the content of the sitemap stylesheet.
*
* @since 5.5.0
*
* @param string $xsl Full content for the xml stylesheet.
*/
return apply_filters( 'wp_sitemaps_stylesheet_content', $xsl_content );
}
/**
* Returns the escaped xsl for the index sitemaps.
*
* @since 5.5.0
*/
public function get_sitemap_index_stylesheet() {
$css = $this->get_stylesheet_css();
$title = esc_xml( __( 'XML Sitemap' ) );
$description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) );
$learn_more = sprintf(
'<a href="%s">%s</a>',
esc_url( __( 'https://www.sitemaps.org/' ) ),
esc_xml( __( 'Learn more about XML sitemaps.' ) )
);
$text = sprintf(
/* translators: %s: number of URLs. */
esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ),
'<xsl:value-of select="count( sitemap:sitemapindex/sitemap:sitemap )" />'
);
$lang = get_language_attributes( 'html' );
$url = esc_xml( __( 'URL' ) );
$lastmod = esc_xml( __( 'Last Modified' ) );
$xsl_content = <<<XSL
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
exclude-result-prefixes="sitemap"
>
<xsl:output method="html" encoding="UTF-8" indent="yes" />
<!--
Set variables for whether lastmod occurs for any sitemap in the index.
We do this up front because it can be expensive in a large sitemap.
-->
<xsl:variable name="has-lastmod" select="count( /sitemap:sitemapindex/sitemap:sitemap/sitemap:lastmod )" />
<xsl:template match="/">
<html {$lang}>
<head>
<title>{$title}</title>
<style>{$css}</style>
</head>
<body>
<div id="sitemap__header">
<h1>{$title}</h1>
<p>{$description}</p>
<p>{$learn_more}</p>
</div>
<div id="sitemap__content">
<p class="text">{$text}</p>
<table id="sitemap__table">
<thead>
<tr>
<th class="loc">{$url}</th>
<xsl:if test="\$has-lastmod">
<th class="lastmod">{$lastmod}</th>
</xsl:if>
</tr>
</thead>
<tbody>
<xsl:for-each select="sitemap:sitemapindex/sitemap:sitemap">
<tr>
<td class="loc"><a href="{sitemap:loc}"><xsl:value-of select="sitemap:loc" /></a></td>
<xsl:if test="\$has-lastmod">
<td class="lastmod"><xsl:value-of select="sitemap:lastmod" /></td>
</xsl:if>
</tr>
</xsl:for-each>
</tbody>
</table>
</div>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
XSL;
/**
* Filters the content of the sitemap index stylesheet.
*
* @since 5.5.0
*
* @param string $xsl Full content for the xml stylesheet.
*/
return apply_filters( 'wp_sitemaps_stylesheet_index_content', $xsl_content );
}
/**
* Gets the CSS to be included in sitemap XSL stylesheets.
*
* @since 5.5.0
*
* @return string The CSS.
*/
public function get_stylesheet_css() {
$css = '
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
color: #444;
}
#sitemap__table {
border: solid 1px #ccc;
border-collapse: collapse;
}
#sitemap__table tr th {
text-align: left;
}
#sitemap__table tr td,
#sitemap__table tr th {
padding: 10px;
}
#sitemap__table tr:nth-child(odd) td {
background-color: #eee;
}
a:hover {
text-decoration: none;
}';
/**
* Filters the css only for the sitemap stylesheet.
*
* @since 5.5.0
*
* @param string $css CSS to be applied to default xsl file.
*/
return apply_filters( 'wp_sitemaps_stylesheet_css', $css );
}
}

View File

@ -0,0 +1,235 @@
<?php
/**
* Sitemaps: WP_Sitemaps class
*
* This is the main class integrating all other classes.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Class WP_Sitemaps.
*
* @since 5.5.0
*/
class WP_Sitemaps {
/**
* The main index of supported sitemaps.
*
* @since 5.5.0
*
* @var WP_Sitemaps_Index
*/
public $index;
/**
* The main registry of supported sitemaps.
*
* @since 5.5.0
*
* @var WP_Sitemaps_Registry
*/
public $registry;
/**
* An instance of the renderer class.
*
* @since 5.5.0
*
* @var WP_Sitemaps_Renderer
*/
public $renderer;
/**
* WP_Sitemaps constructor.
*
* @since 5.5.0
*/
public function __construct() {
$this->registry = new WP_Sitemaps_Registry();
$this->renderer = new WP_Sitemaps_Renderer();
$this->index = new WP_Sitemaps_Index( $this->registry );
}
/**
* Initiates all sitemap functionality.
*
* @since 5.5.0
*/
public function init() {
// These will all fire on the init hook.
$this->register_rewrites();
$this->register_sitemaps();
// Add additional action callbacks.
add_action( 'template_redirect', array( $this, 'render_sitemaps' ) );
add_filter( 'pre_handle_404', array( $this, 'redirect_sitemapxml' ), 10, 2 );
add_filter( 'robots_txt', array( $this, 'add_robots' ), 0, 2 );
}
/**
* Registers and sets up the functionality for all supported sitemaps.
*
* @since 5.5.0
*/
public function register_sitemaps() {
/**
* Filters the list of registered sitemap providers.
*
* @since 5.5.0
*
* @param array $providers {
* Array of WP_Sitemaps_Provider objects keyed by their name.
*
* @type object $posts The WP_Sitemaps_Posts object.
* @type object $taxonomies The WP_Sitemaps_Taxonomies object.
* @type object $users The WP_Sitemaps_Users object.
* }
*/
$providers = apply_filters(
'wp_sitemaps_register_providers',
array(
'posts' => new WP_Sitemaps_Posts(),
'taxonomies' => new WP_Sitemaps_Taxonomies(),
'users' => new WP_Sitemaps_Users(),
)
);
// Register each supported provider.
/* @var WP_Sitemaps_Provider $provider */
foreach ( $providers as $name => $provider ) {
$this->registry->add_sitemap( $name, $provider );
}
}
/**
* Registers sitemap rewrite tags and routing rules.
*
* @since 5.5.0
*/
public function register_rewrites() {
// Add rewrite tags.
add_rewrite_tag( '%sitemap%', '([^?]+)' );
add_rewrite_tag( '%sitemap-subtype%', '([^?]+)' );
// Register index route.
add_rewrite_rule( '^wp-sitemap\.xml$', 'index.php?sitemap=index', 'top' );
// Register rewrites for the XSL stylesheet.
add_rewrite_tag( '%sitemap-stylesheet%', '([^?]+)' );
add_rewrite_rule( '^wp-sitemap\.xsl$', 'index.php?sitemap-stylesheet=sitemap', 'top' );
add_rewrite_rule( '^wp-sitemap-index\.xsl$', 'index.php?sitemap-stylesheet=index', 'top' );
// Register routes for providers.
add_rewrite_rule(
'^wp-sitemap-([a-z]+?)-([a-z\d_-]+?)-(\d+?)\.xml$',
'index.php?sitemap=$matches[1]&sitemap-subtype=$matches[2]&paged=$matches[3]',
'top'
);
add_rewrite_rule(
'^wp-sitemap-([a-z]+?)-(\d+?)\.xml$',
'index.php?sitemap=$matches[1]&paged=$matches[2]',
'top'
);
}
/**
* Renders sitemap templates based on rewrite rules.
*
* @since 5.5.0
*/
public function render_sitemaps() {
global $wp_query;
$sitemap = sanitize_text_field( get_query_var( 'sitemap' ) );
$object_subtype = sanitize_text_field( get_query_var( 'sitemap-subtype' ) );
$stylesheet_type = sanitize_text_field( get_query_var( 'sitemap-stylesheet' ) );
$paged = absint( get_query_var( 'paged' ) );
// Bail early if this isn't a sitemap or stylesheet route.
if ( ! ( $sitemap || $stylesheet_type ) ) {
return;
}
// Render stylesheet if this is stylesheet route.
if ( $stylesheet_type ) {
$stylesheet = new WP_Sitemaps_Stylesheet();
$stylesheet->render_stylesheet( $stylesheet_type );
exit;
}
// Render the index.
if ( 'index' === $sitemap ) {
$sitemap_list = $this->index->get_sitemap_list();
$this->renderer->render_index( $sitemap_list );
exit;
}
$provider = $this->registry->get_sitemap( $sitemap );
if ( ! $provider ) {
return;
}
if ( empty( $paged ) ) {
$paged = 1;
}
$url_list = $provider->get_url_list( $paged, $object_subtype );
// Force a 404 and bail early if no URLs are present.
if ( empty( $url_list ) ) {
$wp_query->set_404();
return;
}
$this->renderer->render_sitemap( $url_list );
exit;
}
/**
* Redirects a URL to the wp-sitemap.xml
*
* @since 5.5.0
*
* @param bool $bypass Pass-through of the pre_handle_404 filter value.
* @param WP_Query $query The WP_Query object.
* @return bool Bypass value.
*/
public function redirect_sitemapxml( $bypass, $query ) {
// If a plugin has already utilized the pre_handle_404 function, return without action to avoid conflicts.
if ( $bypass ) {
return $bypass;
}
// 'pagename' is for most permalink types, name is for when the %postname% is used as a top-level field.
if ( 'sitemap-xml' === $query->get( 'pagename' ) ||
'sitemap-xml' === $query->get( 'name' ) ) {
wp_safe_redirect( $this->index->get_index_url() );
exit();
}
return $bypass;
}
/**
* Adds the sitemap index to robots.txt.
*
* @since 5.5.0
*
* @param string $output robots.txt output.
* @param bool $public Whether the site is public or not.
* @return string The robots.txt output.
*/
public function add_robots( $output, $public ) {
if ( $public ) {
$output .= "\nSitemap: " . esc_url( $this->index->get_index_url() ) . "\n";
}
return $output;
}
}

View File

@ -0,0 +1,221 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Posts class
*
* Builds the sitemaps for the 'post' object type.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Posts XML sitemap provider.
*
* @since 5.5.0
*/
class WP_Sitemaps_Posts extends WP_Sitemaps_Provider {
/**
* WP_Sitemaps_Posts constructor.
*
* @since 5.5.0
*/
public function __construct() {
$this->name = 'posts';
$this->object_type = 'post';
}
/**
* Returns the public post types, which excludes nav_items and similar types.
* Attachments are also excluded. This includes custom post types with public = true.
*
* @since 5.5.0
*
* @return array Map of registered post type objects (WP_Post_Type) keyed by their name.
*/
public function get_object_subtypes() {
$post_types = get_post_types( array( 'public' => true ), 'objects' );
unset( $post_types['attachment'] );
/**
* Filters the list of post object sub types available within the sitemap.
*
* @since 5.5.0
*
* @param array $post_types Map of registered post type objects (WP_Post_Type) keyed by their name.
*/
return apply_filters( 'wp_sitemaps_post_types', $post_types );
}
/**
* Gets a URL list for a post type sitemap.
*
* @since 5.5.0
*
* @param int $page_num Page of results.
* @param string $post_type Optional. Post type name. Default empty.
* @return array $url_list Array of URLs for a sitemap.
*/
public function get_url_list( $page_num, $post_type = '' ) {
// Bail early if the queried post type is not supported.
$supported_types = $this->get_object_subtypes();
if ( ! isset( $supported_types[ $post_type ] ) ) {
return array();
}
/**
* Filters the posts URL list before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param array $url_list The URL list. Default null.
* @param string $post_type Post type name.
* @param int $page_num Page of results.
*/
$url_list = apply_filters(
'wp_sitemaps_posts_pre_url_list',
null,
$post_type,
$page_num
);
if ( null !== $url_list ) {
return $url_list;
}
$args = $this->get_posts_query_args( $post_type );
$args['paged'] = $page_num;
$query = new WP_Query( $args );
/**
* Returns an array of posts.
*
* @var array<int, \WP_Post> $posts
*/
$posts = $query->get_posts();
$url_list = array();
/*
* Add a URL for the homepage in the pages sitemap.
* Shows only on the first page if the reading settings are set to display latest posts.
*/
if ( 'page' === $post_type && 1 === $page_num && 'posts' === get_option( 'show_on_front' ) ) {
// Extract the data needed for home URL to add to the array.
$sitemap_entry = array(
'loc' => home_url(),
);
/**
* Filters the sitemap entry for the home page when the 'show_on_front' option equals 'posts'.
*
* @since 5.5.0
*
* @param array $sitemap_entry Sitemap entry for the home page.
*/
$sitemap_entry = apply_filters( 'wp_sitemaps_posts_show_on_front_entry', $sitemap_entry );
$url_list[] = $sitemap_entry;
}
foreach ( $posts as $post ) {
$sitemap_entry = array(
'loc' => get_permalink( $post ),
);
/**
* Filters the sitemap entry for an individual post.
*
* @since 5.5.0
*
* @param array $sitemap_entry Sitemap entry for the post.
* @param WP_Post $post Post object.
* @param string $post_type Name of the post_type.
*/
$sitemap_entry = apply_filters( 'wp_sitemaps_posts_entry', $sitemap_entry, $post, $post_type );
$url_list[] = $sitemap_entry;
}
return $url_list;
}
/**
* Gets the max number of pages available for the object type.
*
* @since 5.5.0
*
* @param string $post_type Optional. Post type name. Default empty.
* @return int Total number of pages.
*/
public function get_max_num_pages( $post_type = '' ) {
if ( empty( $post_type ) ) {
return 0;
}
/**
* Filters the max number of pages before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param int $max_num_pages The maximum number of pages. Default null.
* @param string $post_type Post type name.
*/
$max_num_pages = apply_filters( 'wp_sitemaps_posts_pre_max_num_pages', null, $post_type );
if ( null !== $max_num_pages ) {
return $max_num_pages;
}
$args = $this->get_posts_query_args( $post_type );
$args['fields'] = 'ids';
$args['no_found_rows'] = false;
$query = new WP_Query( $args );
return isset( $query->max_num_pages ) ? $query->max_num_pages : 1;
}
/**
* Returns the query args for retrieving posts to list in the sitemap.
*
* @since 5.5.0
*
* @param string $post_type Post type name.
* @return array $args Array of WP_Query arguments.
*/
protected function get_posts_query_args( $post_type ) {
/**
* Filters the query arguments for post type sitemap queries.
*
* @see WP_Query for a full list of arguments.
*
* @since 5.5.0
*
* @param array $args Array of WP_Query arguments.
* @param string $post_type Post type name.
*/
$args = apply_filters(
'wp_sitemaps_posts_query_args',
array(
'orderby' => 'ID',
'order' => 'ASC',
'post_type' => $post_type,
'posts_per_page' => wp_sitemaps_get_max_urls( $this->object_type ),
'post_status' => array( 'publish' ),
'no_found_rows' => true,
'update_post_term_cache' => false,
'update_post_meta_cache' => false,
),
$post_type
);
return $args;
}
}

View File

@ -0,0 +1,193 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Taxonomies class
*
* Builds the sitemaps for the 'taxonomy' object type.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Taxonomies XML sitemap provider.
*
* @since 5.5.0
*/
class WP_Sitemaps_Taxonomies extends WP_Sitemaps_Provider {
/**
* WP_Sitemaps_Taxonomies constructor.
*
* @since 5.5.0
*/
public function __construct() {
$this->name = 'taxonomies';
$this->object_type = 'term';
}
/**
* Returns all public, registered taxonomies.
*
* @since 5.5.0
*
* @return array Map of registered taxonomy objects keyed by their name.
*/
public function get_object_subtypes() {
$taxonomies = get_taxonomies( array( 'public' => true ), 'objects' );
/**
* Filter the list of taxonomy object subtypes available within the sitemap.
*
* @since 5.5.0
*
* @param array $taxonomies Map of registered taxonomy objects keyed by their name.
*/
return apply_filters( 'wp_sitemaps_taxonomies', $taxonomies );
}
/**
* Gets a URL list for a taxonomy sitemap.
*
* @since 5.5.0
*
* @param int $page_num Page of results.
* @param string $taxonomy Optional. Taxonomy name. Default empty.
* @return array $url_list Array of URLs for a sitemap.
*/
public function get_url_list( $page_num, $taxonomy = '' ) {
$supported_types = $this->get_object_subtypes();
// Bail early if the queried taxonomy is not supported.
if ( ! isset( $supported_types[ $taxonomy ] ) ) {
return array();
}
/**
* Filters the taxonomies URL list before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param array $url_list The URL list. Default null.
* @param string $taxonomy Taxonomy name.
* @param int $page_num Page of results.
*/
$url_list = apply_filters(
'wp_sitemaps_taxonomies_pre_url_list',
null,
$taxonomy,
$page_num
);
if ( null !== $url_list ) {
return $url_list;
}
$url_list = array();
// Offset by how many terms should be included in previous pages.
$offset = ( $page_num - 1 ) * wp_sitemaps_get_max_urls( $this->object_type );
$args = $this->get_taxonomies_query_args( $taxonomy );
$args['offset'] = $offset;
$taxonomy_terms = new WP_Term_Query( $args );
if ( ! empty( $taxonomy_terms->terms ) ) {
foreach ( $taxonomy_terms->terms as $term ) {
$sitemap_entry = array(
'loc' => get_term_link( $term ),
);
/**
* Filters the sitemap entry for an individual term.
*
* @since 5.5.0
*
* @param array $sitemap_entry Sitemap entry for the term.
* @param WP_Term $term Term object.
* @param string $taxonomy Taxonomy name.
*/
$sitemap_entry = apply_filters( 'wp_sitemaps_taxonomies_entry', $sitemap_entry, $term, $taxonomy );
$url_list[] = $sitemap_entry;
}
}
return $url_list;
}
/**
* Gets the max number of pages available for the object type.
*
* @since 5.5.0
*
* @param string $taxonomy Taxonomy name.
* @return int Total number of pages.
*/
public function get_max_num_pages( $taxonomy = '' ) {
if ( empty( $taxonomy ) ) {
return 0;
}
/**
* Filters the max number of pages before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param int $max_num_pages The maximum number of pages. Default null.
* @param string $taxonomy Taxonomy name.
*/
$max_num_pages = apply_filters( 'wp_sitemaps_taxonomies_pre_max_num_pages', null, $taxonomy );
if ( null !== $max_num_pages ) {
return $max_num_pages;
}
$term_count = wp_count_terms( $taxonomy, $this->get_taxonomies_query_args( $taxonomy ) );
return (int) ceil( $term_count / wp_sitemaps_get_max_urls( $this->object_type ) );
}
/**
* Returns the query args for retrieving taxonomy terms to list in the sitemap.
*
* @since 5.5.0
*
* @param string $taxonomy Taxonomy name.
* @return array $args Array of WP_Term_Query arguments.
*/
protected function get_taxonomies_query_args( $taxonomy ) {
/**
* Filters the taxonomy terms query arguments.
*
* Allows modification of the taxonomy query arguments before querying.
*
* @see WP_Term_Query for a full list of arguments
*
* @since 5.5.0
*
* @param array $args Array of WP_Term_Query arguments.
* @param string $taxonomy Taxonomy name.
*/
$args = apply_filters(
'wp_sitemaps_taxonomies_query_args',
array(
'fields' => 'ids',
'taxonomy' => $taxonomy,
'orderby' => 'term_order',
'number' => wp_sitemaps_get_max_urls( $this->object_type ),
'hide_empty' => true,
'hierarchical' => false,
'update_term_meta_cache' => false,
),
$taxonomy
);
return $args;
}
}

View File

@ -0,0 +1,163 @@
<?php
/**
* Sitemaps: WP_Sitemaps_Users class
*
* Builds the sitemaps for the 'user' object type.
*
* @package WordPress
* @subpackage Sitemaps
* @since 5.5.0
*/
/**
* Users XML sitemap provider.
*
* @since 5.5.0
*/
class WP_Sitemaps_Users extends WP_Sitemaps_Provider {
/**
* WP_Sitemaps_Users constructor.
*
* @since 5.5.0
*/
public function __construct() {
$this->name = 'users';
$this->object_type = 'user';
}
/**
* Gets a URL list for a user sitemap.
*
* @since 5.5.0
*
* @param int $page_num Page of results.
* @param string $object_subtype Optional. Not applicable for Users but
* required for compatibility with the parent
* provider class. Default empty.
* @return array $url_list Array of URLs for a sitemap.
*/
public function get_url_list( $page_num, $object_subtype = '' ) {
/**
* Filters the users URL list before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param array $url_list The URL list. Default null.
* @param int $page_num Page of results.
*/
$url_list = apply_filters(
'wp_sitemaps_users_pre_url_list',
null,
$page_num
);
if ( null !== $url_list ) {
return $url_list;
}
$args = $this->get_users_query_args();
$args['paged'] = $page_num;
$query = new WP_User_Query( $args );
$users = $query->get_results();
$url_list = array();
foreach ( $users as $user ) {
$sitemap_entry = array(
'loc' => get_author_posts_url( $user->ID ),
);
/**
* Filters the sitemap entry for an individual user.
*
* @since 5.5.0
*
* @param array $sitemap_entry Sitemap entry for the user.
* @param WP_User $user User object.
*/
$sitemap_entry = apply_filters( 'wp_sitemaps_users_entry', $sitemap_entry, $user );
$url_list[] = $sitemap_entry;
}
return $url_list;
}
/**
* Gets the max number of pages available for the object type.
*
* @since 5.5.0
*
* @see WP_Sitemaps_Provider::max_num_pages
*
* @param string $object_subtype Optional. Not applicable for Users but
* required for compatibility with the parent
* provider class. Default empty.
* @return int Total page count.
*/
public function get_max_num_pages( $object_subtype = '' ) {
/**
* Filters the max number of pages before it is generated.
*
* Passing a non-null value will effectively short-circuit the generation,
* returning that value instead.
*
* @since 5.5.0
*
* @param int $max_num_pages The maximum number of pages. Default null.
*/
$max_num_pages = apply_filters( 'wp_sitemaps_users_pre_max_num_pages', null );
if ( null !== $max_num_pages ) {
return $max_num_pages;
}
$args = $this->get_users_query_args();
$query = new WP_User_Query( $args );
$total_users = $query->get_total();
return (int) ceil( $total_users / wp_sitemaps_get_max_urls( $this->object_type ) );
}
/**
* Returns the query args for retrieving users to list in the sitemap.
*
* @since 5.5.0
*
* @return array $args Array of WP_User_Query arguments.
*/
protected function get_users_query_args() {
$public_post_types = get_post_types(
array(
'public' => true,
)
);
// We're not supporting sitemaps for author pages for attachments.
unset( $public_post_types['attachment'] );
/**
* Filters the query arguments for authors with public posts.
*
* Allows modification of the authors query arguments before querying.
*
* @see WP_User_Query for a full list of arguments
*
* @since 5.5.0
*
* @param array $args Array of WP_User_Query arguments.
*/
$args = apply_filters(
'wp_sitemaps_users_query_args',
array(
'has_published_posts' => array_keys( $public_post_types ),
'number' => wp_sitemaps_get_max_urls( $this->object_type ),
)
);
return $args;
}
}

View File

@ -13,7 +13,7 @@
*
* @global string $wp_version
*/
$wp_version = '5.5-alpha-48071';
$wp_version = '5.5-alpha-48072';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

View File

@ -263,6 +263,16 @@ require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-term-meta-fields.php';
require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-user-meta-fields.php';
require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-search-handler.php';
require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-post-search-handler.php';
require ABSPATH . WPINC . '/sitemaps.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-index.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-provider.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-registry.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-renderer.php';
require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-stylesheet.php';
require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-posts.php';
require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-taxonomies.php';
require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-users.php';
require ABSPATH . WPINC . '/class-wp-block-type.php';
require ABSPATH . WPINC . '/class-wp-block-styles-registry.php';
require ABSPATH . WPINC . '/class-wp-block-type-registry.php';