From 609dd1d14f027728e1912933919d27e2f0143b23 Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Wed, 17 Jun 2020 15:24:07 +0000 Subject: [PATCH] Sitemaps: Add XML sitemaps functionality to WordPress. While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata. See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details. This feature exposes the sitemap index via `/wp-sitemap.xml` and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin. This change also introduces a new `esc_xml()` function to escape strings for output in XML, as well as XML support to `wp_kses_normalize_entities()`. Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry. Fixes #50117. See #3670. See #19998. Built from https://develop.svn.wordpress.org/trunk@48072 git-svn-id: http://core.svn.wordpress.org/trunk@47839 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/canonical.php | 10 + wp-includes/default-filters.php | 3 + wp-includes/formatting.php | 69 ++++- wp-includes/kses.php | 65 +++- wp-includes/sitemaps.php | 119 ++++++++ .../sitemaps/class-wp-sitemaps-index.php | 82 +++++ .../sitemaps/class-wp-sitemaps-provider.php | 190 ++++++++++++ .../sitemaps/class-wp-sitemaps-registry.php | 87 ++++++ .../sitemaps/class-wp-sitemaps-renderer.php | 269 ++++++++++++++++ .../sitemaps/class-wp-sitemaps-stylesheet.php | 288 ++++++++++++++++++ wp-includes/sitemaps/class-wp-sitemaps.php | 235 ++++++++++++++ .../providers/class-wp-sitemaps-posts.php | 221 ++++++++++++++ .../class-wp-sitemaps-taxonomies.php | 193 ++++++++++++ .../providers/class-wp-sitemaps-users.php | 163 ++++++++++ wp-includes/version.php | 2 +- wp-settings.php | 10 + 16 files changed, 1997 insertions(+), 9 deletions(-) create mode 100644 wp-includes/sitemaps.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps-index.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps-provider.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps-registry.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps-renderer.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php create mode 100644 wp-includes/sitemaps/class-wp-sitemaps.php create mode 100644 wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php create mode 100644 wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php create mode 100644 wp-includes/sitemaps/providers/class-wp-sitemaps-users.php diff --git a/wp-includes/canonical.php b/wp-includes/canonical.php index 42d6dcf44a..befe58e684 100644 --- a/wp-includes/canonical.php +++ b/wp-includes/canonical.php @@ -509,6 +509,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path; } + // Remove trailing slash for sitemaps requests. + if ( ! empty( get_query_var( 'sitemap' ) ) ) { + $redirect['path'] = untrailingslashit( $redirect['path'] ); + } + $redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path']; } @@ -651,6 +656,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['path'] = trailingslashit( $redirect['path'] ); } + // Remove trailing slash for sitemaps requests. + if ( ! empty( get_query_var( 'sitemap' ) ) || ! empty( get_query_var( 'sitemap-stylesheet' ) ) ) { + $redirect['path'] = untrailingslashit( $redirect['path'] ); + } + // Strip multiple slashes out of the URL. if ( strpos( $redirect['path'], '//' ) > -1 ) { $redirect['path'] = preg_replace( '|/+|', '/', $redirect['path'] ); diff --git a/wp-includes/default-filters.php b/wp-includes/default-filters.php index 43faaf6507..c9024e7ae0 100644 --- a/wp-includes/default-filters.php +++ b/wp-includes/default-filters.php @@ -456,6 +456,9 @@ add_action( 'rest_api_init', 'register_initial_settings', 10 ); add_action( 'rest_api_init', 'create_initial_rest_routes', 99 ); add_action( 'parse_request', 'rest_api_loaded' ); +// Sitemaps actions. +add_action( 'init', 'wp_sitemaps_get_server' ); + /** * Filters formerly mixed into wp-includes. */ diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 0f38d54bc8..bf16c10c3f 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -935,6 +935,7 @@ function seems_utf8( $str ) { * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. * * @since 1.2.2 + * @since 5.5.0 `$quote_style` also accepts '`ENT_XML1`. * @access private * * @staticvar string $_charset @@ -942,7 +943,10 @@ function seems_utf8( $str ) { * @param string $string The text which is to be encoded. * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. - * Also compatible with old values; converting single quotes if set to 'single', + * Converts single and double quotes, as well as converting HTML + * named entities (that are not also XML named entities) to their + * code points if set to ENT_XML1. Also compatible with old values; + * converting single quotes if set to 'single', * double if set to 'double' or both if otherwise set. * Default is ENT_NOQUOTES. * @param false|string $charset Optional. The character encoding of the string. Default is false. @@ -964,7 +968,9 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals // Account for the previous behaviour of the function when the $quote_style is not an accepted value. if ( empty( $quote_style ) ) { $quote_style = ENT_NOQUOTES; - } elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { + } elseif ( ENT_XML1 === $quote_style ) { + $quote_style = ENT_QUOTES | ENT_XML1; + } elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) { $quote_style = ENT_QUOTES; } @@ -994,7 +1000,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals if ( ! $double_encode ) { // Guarantee every &entity; is valid, convert &garbage; into &garbage; // This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable. - $string = wp_kses_normalize_entities( $string ); + $string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' ); } $string = htmlspecialchars( $string, $quote_style, $charset, $double_encode ); @@ -4536,6 +4542,63 @@ function esc_textarea( $text ) { return apply_filters( 'esc_textarea', $safe_text, $text ); } +/** + * Escaping for XML blocks. + * + * @since 5.5.0 + * + * @param string $text Text to escape. + * @return string Escaped text. + */ +function esc_xml( $text ) { + $safe_text = wp_check_invalid_utf8( $text ); + + $cdata_regex = '\<\!\[CDATA\[.*?\]\]\>'; + $regex = <<(.*?)) # the "anything" matched by the lookahead + (?({$cdata_regex})) # the CDATA Section matched by the lookahead + +| # alternative + + (?(.*)) # non-CDATA Section +/sx +EOF; + + $safe_text = (string) preg_replace_callback( + $regex, + static function( $matches ) { + if ( ! $matches[0] ) { + return ''; + } + + if ( ! empty( $matches['non_cdata'] ) ) { + // escape HTML entities in the non-CDATA Section. + return _wp_specialchars( $matches['non_cdata'], ENT_XML1 ); + } + + // Return the CDATA Section unchanged, escape HTML entities in the rest. + return _wp_specialchars( $matches['non_cdata_followed_by_cdata'], ENT_XML1 ) . $matches['cdata']; + }, + $safe_text + ); + + /** + * Filters a string cleaned and escaped for output in XML. + * + * Text passed to esc_xml() is stripped of invalid or special characters + * before output. HTML named character references are converted to their + * equivalent code points. + * + * @since 5.5.0 + * + * @param string $safe_text The text after it has been escaped. + * @param string $text The text prior to being escaped. + */ + return apply_filters( 'esc_xml', $safe_text, $text ); +} + /** * Escape an HTML tag name. * diff --git a/wp-includes/kses.php b/wp-includes/kses.php index 9c9d094d46..703d7c0002 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -47,7 +47,7 @@ if ( ! defined( 'CUSTOM_TAGS' ) ) { // Ensure that these variables are added to the global namespace // (e.g. if using namespaces / autoload in the current PHP environment). -global $allowedposttags, $allowedtags, $allowedentitynames; +global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames; if ( ! CUSTOM_TAGS ) { /** @@ -704,6 +704,18 @@ if ( ! CUSTOM_TAGS ) { 'there4', ); + /** + * @var string[] $allowedxmlentitynames Array of KSES allowed XML entitity names. + * @since 5.5.0 + */ + $allowedxmlnamedentities = array( + 'amp', + 'lt', + 'gt', + 'apos', + 'quot', + ); + $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags ); } else { $allowedtags = wp_kses_array_lc( $allowedtags ); @@ -1745,17 +1757,27 @@ function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) { * This function normalizes HTML entities. It will convert `AT&T` to the correct * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on. * - * @since 1.0.0 + * When `$context` is set to 'xml', HTML entities are converted to their code points. For + * example, `AT&T…&#XYZZY;` is converted to `AT&T…&#XYZZY;`. * - * @param string $string Content to normalize entities. + * @since 1.0.0 + * @since 5.5.0 Added `$context` parameter. + * + * @param string $string Content to normalize entities. + * @param string $context Context for normalization. Can be either 'html' or 'xml'. + * Default 'html'. * @return string Content with normalized entities. */ -function wp_kses_normalize_entities( $string ) { +function wp_kses_normalize_entities( $string, $context = 'html' ) { // Disarm all entities by converting & to & $string = str_replace( '&', '&', $string ); // Change back the allowed entities in our entity whitelist. - $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string ); + if ( 'xml' === $context ) { + $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $string ); + } else { + $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string ); + } $string = preg_replace_callback( '/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string ); $string = preg_replace_callback( '/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string ); @@ -1786,6 +1808,39 @@ function wp_kses_named_entities( $matches ) { return ( ! in_array( $i, $allowedentitynames, true ) ) ? "&$i;" : "&$i;"; } +/** + * Callback for `wp_kses_normalize_entities()` regular expression. + * + * This function only accepts valid named entity references, which are finite, + * case-sensitive, and highly scrutinized by XML validators. HTML named entity + * references are converted to their code points. + * + * @since 5.5.0 + * + * @global array $allowedentitynames + * @global array $allowedxmlnamedentities + * + * @param array $matches preg_replace_callback() matches array. + * @return string Correctly encoded entity. + */ +function wp_kses_xml_named_entities( $matches ) { + global $allowedentitynames, $allowedxmlnamedentities; + + if ( empty( $matches[1] ) ) { + return ''; + } + + $i = $matches[1]; + + if ( in_array( $i, $allowedxmlnamedentities, true ) ) { + return "&$i;"; + } elseif ( in_array( $i, $allowedentitynames, true ) ) { + return html_entity_decode( "&$i;", ENT_HTML5 ); + } + + return "&$i;"; +} + /** * Callback for `wp_kses_normalize_entities()` regular expression. * diff --git a/wp-includes/sitemaps.php b/wp-includes/sitemaps.php new file mode 100644 index 0000000000..6c92763270 --- /dev/null +++ b/wp-includes/sitemaps.php @@ -0,0 +1,119 @@ +init(); + + /** + * Fires when initializing the Sitemaps object. + * + * Additional sitemaps should be registered on this hook. + * + * @since 5.5.0 + * + * @param WP_Sitemaps $sitemaps Server object. + */ + do_action( 'wp_sitemaps_init', $wp_sitemaps ); + } + + return $wp_sitemaps; +} + +/** + * Gets a list of sitemap providers. + * + * @since 5.5.0 + * + * @return array $sitemaps A list of registered sitemap providers. + */ +function wp_get_sitemaps() { + $sitemaps = wp_sitemaps_get_server(); + + if ( ! $sitemaps ) { + return array(); + } + + return $sitemaps->registry->get_sitemaps(); +} + +/** + * Registers a new sitemap provider. + * + * @since 5.5.0 + * + * @param string $name Unique name for the sitemap provider. + * @param WP_Sitemaps_Provider $provider The `Sitemaps_Provider` instance implementing the sitemap. + * @return bool Returns true if the sitemap was added. False on failure. + */ +function wp_register_sitemap( $name, WP_Sitemaps_Provider $provider ) { + $sitemaps = wp_sitemaps_get_server(); + + if ( ! $sitemaps ) { + return false; + } + + return $sitemaps->registry->add_sitemap( $name, $provider ); +} + +/** + * Gets the maximum number of URLs for a sitemap. + * + * @since 5.5.0 + * + * @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user'). + * @return int The maximum number of URLs. + */ +function wp_sitemaps_get_max_urls( $object_type ) { + /** + * Filters the maximum number of URLs displayed on a sitemap. + * + * @since 5.5.0 + * + * @param int $max_urls The maximum number of URLs included in a sitemap. Default 2000. + * @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user'). + */ + return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type ); +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps-index.php b/wp-includes/sitemaps/class-wp-sitemaps-index.php new file mode 100644 index 0000000000..abaf9e539c --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps-index.php @@ -0,0 +1,82 @@ +registry = $registry; + } + + /** + * Gets a sitemap list for the index. + * + * @since 5.5.0 + * + * @return array List of all sitemaps. + */ + public function get_sitemap_list() { + $sitemaps = array(); + + $providers = $this->registry->get_sitemaps(); + /* @var WP_Sitemaps_Provider $provider */ + foreach ( $providers as $provider ) { + $sitemap_entries = $provider->get_sitemap_entries(); + + // Prevent issues with array_push and empty arrays on PHP < 7.3. + if ( ! $sitemap_entries ) { + continue; + } + + // Using array_push is more efficient than array_merge in a loop. + array_push( $sitemaps, ...$sitemap_entries ); + } + + return $sitemaps; + } + + /** + * Builds the URL for the sitemap index. + * + * @since 5.5.0 + * + * @return string The sitemap index url. + */ + public function get_index_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + if ( ! $wp_rewrite->using_permalinks() ) { + return add_query_arg( 'sitemap', 'index', home_url( '/' ) ); + } + + return home_url( '/wp-sitemap.xml' ); + } +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps-provider.php b/wp-includes/sitemaps/class-wp-sitemaps-provider.php new file mode 100644 index 0000000000..f89d9c9d33 --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps-provider.php @@ -0,0 +1,190 @@ +get_object_subtypes(); + + // If there are no object subtypes, include a single sitemap for the + // entire object type. + if ( empty( $object_subtypes ) ) { + $sitemap_data[] = array( + 'name' => '', + 'pages' => $this->get_max_num_pages(), + ); + return $sitemap_data; + } + + // Otherwise, include individual sitemaps for every object subtype. + foreach ( $object_subtypes as $object_subtype_name => $data ) { + $object_subtype_name = (string) $object_subtype_name; + + $sitemap_data[] = array( + 'name' => $object_subtype_name, + 'pages' => $this->get_max_num_pages( $object_subtype_name ), + ); + } + + return $sitemap_data; + } + + /** + * Lists sitemap pages exposed by this provider. + * + * The returned data is used to populate the sitemap entries of the index. + * + * @since 5.5.0 + * + * @return array List of sitemaps. + */ + public function get_sitemap_entries() { + $sitemaps = array(); + + $sitemap_types = $this->get_sitemap_type_data(); + + foreach ( $sitemap_types as $type ) { + for ( $page = 1; $page <= $type['pages']; $page ++ ) { + $loc = $this->get_sitemap_url( $type['name'], $page ); + $sitemap_entry = array( + 'loc' => $this->get_sitemap_url( $type['name'], $page ), + ); + + /** + * Filters the sitemap entry for the sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the post. + * @param string $object_type Object empty name. + * @param string $object_subtype Object subtype name. + * Empty string if the object type does not support subtypes. + * @param string $page Page of results. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_index_entry', $sitemap_entry, $this->object_type, $type['name'], $page ); + + $sitemaps[] = $sitemap_entry; + } + } + + return $sitemaps; + } + + /** + * Gets the URL of a sitemap entry. + * + * @since 5.5.0 + * + * @param string $name The name of the sitemap. + * @param int $page The page of the sitemap. + * @return string The composed URL for a sitemap entry. + */ + public function get_sitemap_url( $name, $page ) { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + if ( ! $wp_rewrite->using_permalinks() ) { + return add_query_arg( + // Accounts for cases where name is not included, ex: sitemaps-users-1.xml. + array_filter( + array( + 'sitemap' => $this->name, + 'sitemap-subtype' => $name, + 'paged' => $page, + ) + ), + home_url( '/' ) + ); + } + + $basename = sprintf( + '/wp-sitemap-%1$s.xml', + implode( + '-', + // Accounts for cases where name is not included, ex: sitemaps-users-1.xml. + array_filter( + array( + $this->name, + $name, + (string) $page, + ) + ) + ) + ); + + return home_url( $basename ); + } + + /** + * Returns the list of supported object subtypes exposed by the provider. + * + * @since 5.5.0 + * + * @return array List of object subtypes objects keyed by their name. + */ + public function get_object_subtypes() { + return array(); + } +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps-registry.php b/wp-includes/sitemaps/class-wp-sitemaps-registry.php new file mode 100644 index 0000000000..fe1bad398b --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps-registry.php @@ -0,0 +1,87 @@ +sitemaps[ $name ] ) ) { + return false; + } + + $this->sitemaps[ $name ] = $provider; + + return true; + } + + /** + * Returns a single registered sitemaps provider. + * + * @since 5.5.0 + * + * @param string $name Sitemap provider name. + * @return WP_Sitemaps_Provider|null Sitemaps provider if it exists, null otherwise. + */ + public function get_sitemap( $name ) { + if ( ! isset( $this->sitemaps[ $name ] ) ) { + return null; + } + + return $this->sitemaps[ $name ]; + } + + /** + * Lists all registered sitemaps. + * + * @since 5.5.0 + * + * @return array List of sitemaps. + */ + public function get_sitemaps() { + $total_sitemaps = count( $this->sitemaps ); + + if ( $total_sitemaps > $this->max_sitemaps ) { + return array_slice( $this->sitemaps, 0, $this->max_sitemaps, true ); + } + + return $this->sitemaps; + } +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps-renderer.php b/wp-includes/sitemaps/class-wp-sitemaps-renderer.php new file mode 100644 index 0000000000..860f74242d --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps-renderer.php @@ -0,0 +1,269 @@ +get_sitemap_stylesheet_url(); + if ( $stylesheet_url ) { + $this->stylesheet = ''; + } + $stylesheet_index_url = $this->get_sitemap_index_stylesheet_url(); + if ( $stylesheet_index_url ) { + $this->stylesheet_index = ''; + } + } + + /** + * Gets the URL for the sitemap stylesheet. + * + * @since 5.5.0 + * + * @return string The sitemap stylesheet url. + */ + public function get_sitemap_stylesheet_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + $sitemap_url = home_url( '/wp-sitemap.xsl' ); + + if ( ! $wp_rewrite->using_permalinks() ) { + $sitemap_url = add_query_arg( 'sitemap-stylesheet', 'sitemap', home_url( '/' ) ); + } + + /** + * Filters the URL for the sitemap stylesheet. + * + * If a falsy value is returned, no stylesheet will be used and + * the "raw" XML of the sitemap will be displayed. + * + * @since 5.5.0 + * + * @param string $sitemap_url Full URL for the sitemaps xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_url', $sitemap_url ); + } + + /** + * Gets the URL for the sitemap index stylesheet. + * + * @since 5.5.0 + * + * @return string The sitemap index stylesheet url. + */ + public function get_sitemap_index_stylesheet_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + $sitemap_url = home_url( '/wp-sitemap-index.xsl' ); + + if ( ! $wp_rewrite->using_permalinks() ) { + $sitemap_url = add_query_arg( 'sitemap-stylesheet', 'index', home_url( '/' ) ); + } + + /** + * Filters the URL for the sitemap index stylesheet. + * + * If a falsy value is returned, no stylesheet will be used and + * the "raw" XML of the sitemap index will be displayed. + * + * @since 5.5.0 + * + * @param string $sitemap_url Full URL for the sitemaps index xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_index_url', $sitemap_url ); + } + + /** + * Renders a sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemaps Array of sitemap URLs. + */ + public function render_index( $sitemaps ) { + header( 'Content-type: application/xml; charset=UTF-8' ); + + $this->check_for_simple_xml_availability(); + + $index_xml = $this->get_sitemap_index_xml( $sitemaps ); + + if ( ! empty( $index_xml ) ) { + // All output is escaped within get_sitemap_index_xml(). + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped + echo $index_xml; + } + } + + /** + * Gets XML for a sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemaps Array of sitemap URLs. + * @return string|false A well-formed XML string for a sitemap index. False on error. + */ + public function get_sitemap_index_xml( $sitemaps ) { + $sitemap_index = new SimpleXMLElement( + sprintf( + '%1$s%2$s%3$s', + '', + $this->stylesheet_index, + '' + ) + ); + + foreach ( $sitemaps as $entry ) { + $sitemap = $sitemap_index->addChild( 'sitemap' ); + + // Add each element as a child node to the entry. + foreach ( $entry as $name => $value ) { + if ( 'loc' === $name ) { + $sitemap->addChild( $name, esc_url( $value ) ); + } elseif ( 'lastmod' === $name ) { + $sitemap->addChild( $name, esc_xml( $value ) ); + } else { + _doing_it_wrong( + __METHOD__, + /* translators: %s: list of element names */ + sprintf( + __( 'Fields other than %s are not currently supported for the sitemap index.' ), + implode( ',', array( 'loc', 'lastmod' ) ) + ), + '5.5.0' + ); + } + } + } + + return $sitemap_index->asXML(); + } + + /** + * Renders a sitemap. + * + * @since 5.5.0 + * + * @param array $url_list Array of URLs for a sitemap. + */ + public function render_sitemap( $url_list ) { + header( 'Content-type: application/xml; charset=UTF-8' ); + + $this->check_for_simple_xml_availability(); + + $sitemap_xml = $this->get_sitemap_xml( $url_list ); + + if ( ! empty( $sitemap_xml ) ) { + // All output is escaped within get_sitemap_xml(). + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped + echo $sitemap_xml; + } + } + + /** + * Gets XML for a sitemap. + * + * @since 5.5.0 + * + * @param array $url_list Array of URLs for a sitemap. + * @return string|false A well-formed XML string for a sitemap index. False on error. + */ + public function get_sitemap_xml( $url_list ) { + $urlset = new SimpleXMLElement( + sprintf( + '%1$s%2$s%3$s', + '', + $this->stylesheet, + '' + ) + ); + + foreach ( $url_list as $url_item ) { + $url = $urlset->addChild( 'url' ); + + // Add each element as a child node to the entry. + foreach ( $url_item as $name => $value ) { + if ( 'loc' === $name ) { + $url->addChild( $name, esc_url( $value ) ); + } elseif ( in_array( $name, array( 'lastmod', 'changefreq', 'priority' ), true ) ) { + $url->addChild( $name, esc_xml( $value ) ); + } else { + _doing_it_wrong( + __METHOD__, + /* translators: %s: list of element names */ + sprintf( + __( 'Fields other than %s are not currently supported for sitemaps.' ), + implode( ',', array( 'loc', 'lastmod', 'changefreq', 'priority' ) ) + ), + '5.5.0' + ); + } + } + } + + return $urlset->asXML(); + } + + /** + * Checks for the availability of the SimpleXML extension and errors if missing. + * + * @since 5.5.0 + */ + private function check_for_simple_xml_availability() { + if ( ! class_exists( 'SimpleXMLElement' ) ) { + add_filter( + 'wp_die_handler', + static function () { + return '_xml_wp_die_handler'; + } + ); + + wp_die( + sprintf( + /* translators: %s: SimpleXML */ + esc_xml( __( 'Could not generate XML sitemap due to missing %s extension' ) ), + 'SimpleXML' + ), + esc_xml( __( 'WordPress › Error' ) ), + array( + 'response' => 501, // "Not implemented". + ) + ); + } + } +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php b/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php new file mode 100644 index 0000000000..3a80a7e2cd --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php @@ -0,0 +1,288 @@ +get_sitemap_stylesheet(); + } + + if ( 'index' === $type ) { + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- All content escaped below. + echo $this->get_sitemap_index_stylesheet(); + } + + exit; + } + + /** + * Returns the escaped xsl for all sitemaps, except index. + * + * @since 5.5.0 + */ + public function get_sitemap_stylesheet() { + $css = $this->get_stylesheet_css(); + $title = esc_xml( __( 'XML Sitemap' ) ); + $description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) ); + $learn_more = sprintf( + '%s', + esc_url( __( 'https://www.sitemaps.org/' ) ), + esc_xml( __( 'Learn more about XML sitemaps.' ) ) + ); + + $text = sprintf( + /* translators: %s: number of URLs. */ + esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ), + '' + ); + + $lang = get_language_attributes( 'html' ); + $url = esc_xml( __( 'URL' ) ); + $lastmod = esc_xml( __( 'Last Modified' ) ); + $changefreq = esc_xml( __( 'Change Frequency' ) ); + $priority = esc_xml( __( 'Priority' ) ); + + $xsl_content = << + + + + + + + + + + + + + {$title} + + + +
+

{$title}

+

{$description}

+

{$learn_more}

+
+
+

{$text}

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
{$url}{$lastmod}{$changefreq}{$priority}
+
+ + +
+
+ +XSL; + + /** + * Filters the content of the sitemap stylesheet. + * + * @since 5.5.0 + * + * @param string $xsl Full content for the xml stylesheet. + */ + return apply_filters( 'wp_sitemaps_stylesheet_content', $xsl_content ); + } + + /** + * Returns the escaped xsl for the index sitemaps. + * + * @since 5.5.0 + */ + public function get_sitemap_index_stylesheet() { + $css = $this->get_stylesheet_css(); + $title = esc_xml( __( 'XML Sitemap' ) ); + $description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) ); + $learn_more = sprintf( + '%s', + esc_url( __( 'https://www.sitemaps.org/' ) ), + esc_xml( __( 'Learn more about XML sitemaps.' ) ) + ); + + $text = sprintf( + /* translators: %s: number of URLs. */ + esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ), + '' + ); + + $lang = get_language_attributes( 'html' ); + $url = esc_xml( __( 'URL' ) ); + $lastmod = esc_xml( __( 'Last Modified' ) ); + + $xsl_content = << + + + + + + + + + + + {$title} + + + +
+

{$title}

+

{$description}

+

{$learn_more}

+
+
+

{$text}

+ + + + + + + + + + + + + + + + + + + +
{$url}{$lastmod}
+
+ + +
+
+ +XSL; + + /** + * Filters the content of the sitemap index stylesheet. + * + * @since 5.5.0 + * + * @param string $xsl Full content for the xml stylesheet. + */ + return apply_filters( 'wp_sitemaps_stylesheet_index_content', $xsl_content ); + } + + /** + * Gets the CSS to be included in sitemap XSL stylesheets. + * + * @since 5.5.0 + * + * @return string The CSS. + */ + public function get_stylesheet_css() { + $css = ' + body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; + color: #444; + } + + #sitemap__table { + border: solid 1px #ccc; + border-collapse: collapse; + } + + #sitemap__table tr th { + text-align: left; + } + + #sitemap__table tr td, + #sitemap__table tr th { + padding: 10px; + } + + #sitemap__table tr:nth-child(odd) td { + background-color: #eee; + } + + a:hover { + text-decoration: none; + }'; + + /** + * Filters the css only for the sitemap stylesheet. + * + * @since 5.5.0 + * + * @param string $css CSS to be applied to default xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_css', $css ); + } +} diff --git a/wp-includes/sitemaps/class-wp-sitemaps.php b/wp-includes/sitemaps/class-wp-sitemaps.php new file mode 100644 index 0000000000..6c6a795af8 --- /dev/null +++ b/wp-includes/sitemaps/class-wp-sitemaps.php @@ -0,0 +1,235 @@ +registry = new WP_Sitemaps_Registry(); + $this->renderer = new WP_Sitemaps_Renderer(); + $this->index = new WP_Sitemaps_Index( $this->registry ); + } + + /** + * Initiates all sitemap functionality. + * + * @since 5.5.0 + */ + public function init() { + // These will all fire on the init hook. + $this->register_rewrites(); + $this->register_sitemaps(); + + // Add additional action callbacks. + add_action( 'template_redirect', array( $this, 'render_sitemaps' ) ); + add_filter( 'pre_handle_404', array( $this, 'redirect_sitemapxml' ), 10, 2 ); + add_filter( 'robots_txt', array( $this, 'add_robots' ), 0, 2 ); + } + + /** + * Registers and sets up the functionality for all supported sitemaps. + * + * @since 5.5.0 + */ + public function register_sitemaps() { + /** + * Filters the list of registered sitemap providers. + * + * @since 5.5.0 + * + * @param array $providers { + * Array of WP_Sitemaps_Provider objects keyed by their name. + * + * @type object $posts The WP_Sitemaps_Posts object. + * @type object $taxonomies The WP_Sitemaps_Taxonomies object. + * @type object $users The WP_Sitemaps_Users object. + * } + */ + $providers = apply_filters( + 'wp_sitemaps_register_providers', + array( + 'posts' => new WP_Sitemaps_Posts(), + 'taxonomies' => new WP_Sitemaps_Taxonomies(), + 'users' => new WP_Sitemaps_Users(), + ) + ); + + // Register each supported provider. + /* @var WP_Sitemaps_Provider $provider */ + foreach ( $providers as $name => $provider ) { + $this->registry->add_sitemap( $name, $provider ); + } + } + + /** + * Registers sitemap rewrite tags and routing rules. + * + * @since 5.5.0 + */ + public function register_rewrites() { + // Add rewrite tags. + add_rewrite_tag( '%sitemap%', '([^?]+)' ); + add_rewrite_tag( '%sitemap-subtype%', '([^?]+)' ); + + // Register index route. + add_rewrite_rule( '^wp-sitemap\.xml$', 'index.php?sitemap=index', 'top' ); + + // Register rewrites for the XSL stylesheet. + add_rewrite_tag( '%sitemap-stylesheet%', '([^?]+)' ); + add_rewrite_rule( '^wp-sitemap\.xsl$', 'index.php?sitemap-stylesheet=sitemap', 'top' ); + add_rewrite_rule( '^wp-sitemap-index\.xsl$', 'index.php?sitemap-stylesheet=index', 'top' ); + + // Register routes for providers. + add_rewrite_rule( + '^wp-sitemap-([a-z]+?)-([a-z\d_-]+?)-(\d+?)\.xml$', + 'index.php?sitemap=$matches[1]&sitemap-subtype=$matches[2]&paged=$matches[3]', + 'top' + ); + add_rewrite_rule( + '^wp-sitemap-([a-z]+?)-(\d+?)\.xml$', + 'index.php?sitemap=$matches[1]&paged=$matches[2]', + 'top' + ); + } + + /** + * Renders sitemap templates based on rewrite rules. + * + * @since 5.5.0 + */ + public function render_sitemaps() { + global $wp_query; + + $sitemap = sanitize_text_field( get_query_var( 'sitemap' ) ); + $object_subtype = sanitize_text_field( get_query_var( 'sitemap-subtype' ) ); + $stylesheet_type = sanitize_text_field( get_query_var( 'sitemap-stylesheet' ) ); + $paged = absint( get_query_var( 'paged' ) ); + + // Bail early if this isn't a sitemap or stylesheet route. + if ( ! ( $sitemap || $stylesheet_type ) ) { + return; + } + + // Render stylesheet if this is stylesheet route. + if ( $stylesheet_type ) { + $stylesheet = new WP_Sitemaps_Stylesheet(); + + $stylesheet->render_stylesheet( $stylesheet_type ); + exit; + } + + // Render the index. + if ( 'index' === $sitemap ) { + $sitemap_list = $this->index->get_sitemap_list(); + + $this->renderer->render_index( $sitemap_list ); + exit; + } + + $provider = $this->registry->get_sitemap( $sitemap ); + + if ( ! $provider ) { + return; + } + + if ( empty( $paged ) ) { + $paged = 1; + } + + $url_list = $provider->get_url_list( $paged, $object_subtype ); + + // Force a 404 and bail early if no URLs are present. + if ( empty( $url_list ) ) { + $wp_query->set_404(); + return; + } + + $this->renderer->render_sitemap( $url_list ); + exit; + } + + /** + * Redirects a URL to the wp-sitemap.xml + * + * @since 5.5.0 + * + * @param bool $bypass Pass-through of the pre_handle_404 filter value. + * @param WP_Query $query The WP_Query object. + * @return bool Bypass value. + */ + public function redirect_sitemapxml( $bypass, $query ) { + // If a plugin has already utilized the pre_handle_404 function, return without action to avoid conflicts. + if ( $bypass ) { + return $bypass; + } + + // 'pagename' is for most permalink types, name is for when the %postname% is used as a top-level field. + if ( 'sitemap-xml' === $query->get( 'pagename' ) || + 'sitemap-xml' === $query->get( 'name' ) ) { + wp_safe_redirect( $this->index->get_index_url() ); + exit(); + } + + return $bypass; + } + + /** + * Adds the sitemap index to robots.txt. + * + * @since 5.5.0 + * + * @param string $output robots.txt output. + * @param bool $public Whether the site is public or not. + * @return string The robots.txt output. + */ + public function add_robots( $output, $public ) { + if ( $public ) { + $output .= "\nSitemap: " . esc_url( $this->index->get_index_url() ) . "\n"; + } + + return $output; + } +} diff --git a/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php b/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php new file mode 100644 index 0000000000..4cca6ea6ec --- /dev/null +++ b/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php @@ -0,0 +1,221 @@ +name = 'posts'; + $this->object_type = 'post'; + } + + /** + * Returns the public post types, which excludes nav_items and similar types. + * Attachments are also excluded. This includes custom post types with public = true. + * + * @since 5.5.0 + * + * @return array Map of registered post type objects (WP_Post_Type) keyed by their name. + */ + public function get_object_subtypes() { + $post_types = get_post_types( array( 'public' => true ), 'objects' ); + unset( $post_types['attachment'] ); + + /** + * Filters the list of post object sub types available within the sitemap. + * + * @since 5.5.0 + * + * @param array $post_types Map of registered post type objects (WP_Post_Type) keyed by their name. + */ + return apply_filters( 'wp_sitemaps_post_types', $post_types ); + } + + /** + * Gets a URL list for a post type sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $post_type Optional. Post type name. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $post_type = '' ) { + // Bail early if the queried post type is not supported. + $supported_types = $this->get_object_subtypes(); + + if ( ! isset( $supported_types[ $post_type ] ) ) { + return array(); + } + + /** + * Filters the posts URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param string $post_type Post type name. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_posts_pre_url_list', + null, + $post_type, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $args = $this->get_posts_query_args( $post_type ); + $args['paged'] = $page_num; + + $query = new WP_Query( $args ); + + /** + * Returns an array of posts. + * + * @var array $posts + */ + $posts = $query->get_posts(); + + $url_list = array(); + + /* + * Add a URL for the homepage in the pages sitemap. + * Shows only on the first page if the reading settings are set to display latest posts. + */ + if ( 'page' === $post_type && 1 === $page_num && 'posts' === get_option( 'show_on_front' ) ) { + // Extract the data needed for home URL to add to the array. + $sitemap_entry = array( + 'loc' => home_url(), + ); + + /** + * Filters the sitemap entry for the home page when the 'show_on_front' option equals 'posts'. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the home page. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_posts_show_on_front_entry', $sitemap_entry ); + $url_list[] = $sitemap_entry; + } + + foreach ( $posts as $post ) { + $sitemap_entry = array( + 'loc' => get_permalink( $post ), + ); + + /** + * Filters the sitemap entry for an individual post. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the post. + * @param WP_Post $post Post object. + * @param string $post_type Name of the post_type. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_posts_entry', $sitemap_entry, $post, $post_type ); + $url_list[] = $sitemap_entry; + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @param string $post_type Optional. Post type name. Default empty. + * @return int Total number of pages. + */ + public function get_max_num_pages( $post_type = '' ) { + if ( empty( $post_type ) ) { + return 0; + } + + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + * @param string $post_type Post type name. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_posts_pre_max_num_pages', null, $post_type ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $args = $this->get_posts_query_args( $post_type ); + $args['fields'] = 'ids'; + $args['no_found_rows'] = false; + + $query = new WP_Query( $args ); + + return isset( $query->max_num_pages ) ? $query->max_num_pages : 1; + } + + /** + * Returns the query args for retrieving posts to list in the sitemap. + * + * @since 5.5.0 + * + * @param string $post_type Post type name. + * @return array $args Array of WP_Query arguments. + */ + protected function get_posts_query_args( $post_type ) { + /** + * Filters the query arguments for post type sitemap queries. + * + * @see WP_Query for a full list of arguments. + * + * @since 5.5.0 + * + * @param array $args Array of WP_Query arguments. + * @param string $post_type Post type name. + */ + $args = apply_filters( + 'wp_sitemaps_posts_query_args', + array( + 'orderby' => 'ID', + 'order' => 'ASC', + 'post_type' => $post_type, + 'posts_per_page' => wp_sitemaps_get_max_urls( $this->object_type ), + 'post_status' => array( 'publish' ), + 'no_found_rows' => true, + 'update_post_term_cache' => false, + 'update_post_meta_cache' => false, + ), + $post_type + ); + + return $args; + } +} diff --git a/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php b/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php new file mode 100644 index 0000000000..45c65145b2 --- /dev/null +++ b/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php @@ -0,0 +1,193 @@ +name = 'taxonomies'; + $this->object_type = 'term'; + } + + /** + * Returns all public, registered taxonomies. + * + * @since 5.5.0 + * + * @return array Map of registered taxonomy objects keyed by their name. + */ + public function get_object_subtypes() { + $taxonomies = get_taxonomies( array( 'public' => true ), 'objects' ); + + /** + * Filter the list of taxonomy object subtypes available within the sitemap. + * + * @since 5.5.0 + * + * @param array $taxonomies Map of registered taxonomy objects keyed by their name. + */ + return apply_filters( 'wp_sitemaps_taxonomies', $taxonomies ); + } + + /** + * Gets a URL list for a taxonomy sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $taxonomy Optional. Taxonomy name. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $taxonomy = '' ) { + $supported_types = $this->get_object_subtypes(); + + // Bail early if the queried taxonomy is not supported. + if ( ! isset( $supported_types[ $taxonomy ] ) ) { + return array(); + } + + /** + * Filters the taxonomies URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param string $taxonomy Taxonomy name. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_taxonomies_pre_url_list', + null, + $taxonomy, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $url_list = array(); + + // Offset by how many terms should be included in previous pages. + $offset = ( $page_num - 1 ) * wp_sitemaps_get_max_urls( $this->object_type ); + + $args = $this->get_taxonomies_query_args( $taxonomy ); + $args['offset'] = $offset; + + $taxonomy_terms = new WP_Term_Query( $args ); + + if ( ! empty( $taxonomy_terms->terms ) ) { + foreach ( $taxonomy_terms->terms as $term ) { + $sitemap_entry = array( + 'loc' => get_term_link( $term ), + ); + + /** + * Filters the sitemap entry for an individual term. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the term. + * @param WP_Term $term Term object. + * @param string $taxonomy Taxonomy name. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_taxonomies_entry', $sitemap_entry, $term, $taxonomy ); + $url_list[] = $sitemap_entry; + } + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @param string $taxonomy Taxonomy name. + * @return int Total number of pages. + */ + public function get_max_num_pages( $taxonomy = '' ) { + if ( empty( $taxonomy ) ) { + return 0; + } + + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + * @param string $taxonomy Taxonomy name. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_taxonomies_pre_max_num_pages', null, $taxonomy ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $term_count = wp_count_terms( $taxonomy, $this->get_taxonomies_query_args( $taxonomy ) ); + + return (int) ceil( $term_count / wp_sitemaps_get_max_urls( $this->object_type ) ); + } + + /** + * Returns the query args for retrieving taxonomy terms to list in the sitemap. + * + * @since 5.5.0 + * + * @param string $taxonomy Taxonomy name. + * @return array $args Array of WP_Term_Query arguments. + */ + protected function get_taxonomies_query_args( $taxonomy ) { + /** + * Filters the taxonomy terms query arguments. + * + * Allows modification of the taxonomy query arguments before querying. + * + * @see WP_Term_Query for a full list of arguments + * + * @since 5.5.0 + * + * @param array $args Array of WP_Term_Query arguments. + * @param string $taxonomy Taxonomy name. + */ + $args = apply_filters( + 'wp_sitemaps_taxonomies_query_args', + array( + 'fields' => 'ids', + 'taxonomy' => $taxonomy, + 'orderby' => 'term_order', + 'number' => wp_sitemaps_get_max_urls( $this->object_type ), + 'hide_empty' => true, + 'hierarchical' => false, + 'update_term_meta_cache' => false, + ), + $taxonomy + ); + + return $args; + } +} diff --git a/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php b/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php new file mode 100644 index 0000000000..1be9a81db1 --- /dev/null +++ b/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php @@ -0,0 +1,163 @@ +name = 'users'; + $this->object_type = 'user'; + } + + /** + * Gets a URL list for a user sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $object_subtype Optional. Not applicable for Users but + * required for compatibility with the parent + * provider class. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $object_subtype = '' ) { + /** + * Filters the users URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_users_pre_url_list', + null, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $args = $this->get_users_query_args(); + $args['paged'] = $page_num; + + $query = new WP_User_Query( $args ); + $users = $query->get_results(); + $url_list = array(); + + foreach ( $users as $user ) { + $sitemap_entry = array( + 'loc' => get_author_posts_url( $user->ID ), + ); + + /** + * Filters the sitemap entry for an individual user. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the user. + * @param WP_User $user User object. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_users_entry', $sitemap_entry, $user ); + $url_list[] = $sitemap_entry; + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @see WP_Sitemaps_Provider::max_num_pages + * + * @param string $object_subtype Optional. Not applicable for Users but + * required for compatibility with the parent + * provider class. Default empty. + * @return int Total page count. + */ + public function get_max_num_pages( $object_subtype = '' ) { + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_users_pre_max_num_pages', null ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $args = $this->get_users_query_args(); + $query = new WP_User_Query( $args ); + + $total_users = $query->get_total(); + + return (int) ceil( $total_users / wp_sitemaps_get_max_urls( $this->object_type ) ); + } + + /** + * Returns the query args for retrieving users to list in the sitemap. + * + * @since 5.5.0 + * + * @return array $args Array of WP_User_Query arguments. + */ + protected function get_users_query_args() { + $public_post_types = get_post_types( + array( + 'public' => true, + ) + ); + + // We're not supporting sitemaps for author pages for attachments. + unset( $public_post_types['attachment'] ); + + /** + * Filters the query arguments for authors with public posts. + * + * Allows modification of the authors query arguments before querying. + * + * @see WP_User_Query for a full list of arguments + * + * @since 5.5.0 + * + * @param array $args Array of WP_User_Query arguments. + */ + $args = apply_filters( + 'wp_sitemaps_users_query_args', + array( + 'has_published_posts' => array_keys( $public_post_types ), + 'number' => wp_sitemaps_get_max_urls( $this->object_type ), + ) + ); + + return $args; + } +} diff --git a/wp-includes/version.php b/wp-includes/version.php index 810cb6d717..36e428f981 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -13,7 +13,7 @@ * * @global string $wp_version */ -$wp_version = '5.5-alpha-48071'; +$wp_version = '5.5-alpha-48072'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. diff --git a/wp-settings.php b/wp-settings.php index 1a5f296733..a78be1c091 100644 --- a/wp-settings.php +++ b/wp-settings.php @@ -263,6 +263,16 @@ require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-term-meta-fields.php'; require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-user-meta-fields.php'; require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-search-handler.php'; require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-post-search-handler.php'; +require ABSPATH . WPINC . '/sitemaps.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-index.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-provider.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-registry.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-renderer.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-stylesheet.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-posts.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-taxonomies.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-users.php'; require ABSPATH . WPINC . '/class-wp-block-type.php'; require ABSPATH . WPINC . '/class-wp-block-styles-registry.php'; require ABSPATH . WPINC . '/class-wp-block-type-registry.php';