Robots: Introduce Robots API.

This changeset introduces a filter-based Robots API, providing central control over the `robots` meta tag.

* Introduces `wp_robots()` function which should be called anywhere a `robots` meta tag should be included.
* Introduces `wp_robots` filter which allows adding or modifying directives for the `robots` meta tag. The `wp_robots()` function is entirely filter-based, i.e. if no filter is added to `wp_robots`, no directives will be present, and therefore the entire `robots` meta tag will be omitted.
* Introduces the following `wp_robots` filter functions which replace similar existing functions that were manually rendering a `robots` meta tag:
    * `wp_robots_noindex()` replaces `noindex()`, which has been deprecated.
    * `wp_robots_no_robots()` replaces `wp_no_robots()`, which has been deprecated.
    * `wp_robots_sensitive_page()` replaces `wp_sensitive_page_meta()`, which has been deprecated. Its rendering of the `referrer` meta tag has been moved to another new function `wp_strict_cross_origin_referrer()`.

Migration to the new functions is straightforward. For example, a call to `add_action( 'wp_head', 'wp_no_robots' )` should be replaced with `add_filter( 'wp_robots', 'wp_robots_no_robots' )`.

Plugins and themes that render their own `robots` meta tags are encouraged to switch to rely on the `wp_robots` filter in order to use the central management layer now provided by WordPress core.

Props adamsilverstein, flixos90, timothyblynjacobs, westonruter.
See #51511.

Built from https://develop.svn.wordpress.org/trunk@49992


git-svn-id: http://core.svn.wordpress.org/trunk@49693 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Felix Arntz 2021-01-21 01:37:00 +00:00
parent 3eb9b3e714
commit 3716c8c20f
12 changed files with 248 additions and 57 deletions

View File

@ -114,7 +114,8 @@ function wpmu_activate_stylesheet() {
<?php
}
add_action( 'wp_head', 'wpmu_activate_stylesheet' );
add_action( 'wp_head', 'wp_sensitive_page_meta' );
add_action( 'wp_head', 'wp_strict_cross_origin_referrer' );
add_filter( 'wp_robots', 'wp_robots_sensitive_page' );
get_header( 'wp-activate' );

View File

@ -1900,7 +1900,7 @@ final class WP_Customize_Manager {
nocache_headers();
header( 'X-Robots: noindex, nofollow, noarchive' );
}
add_action( 'wp_head', 'wp_no_robots' );
add_filter( 'wp_robots', 'wp_robots_no_robots' );
add_filter( 'wp_headers', array( $this, 'filter_iframe_security_headers' ) );
/*

View File

@ -231,6 +231,9 @@ add_filter( 'the_guid', 'esc_url' );
// Email filters.
add_filter( 'wp_mail', 'wp_staticize_emoji_for_email' );
// Robots filters.
add_filter( 'wp_robots', 'wp_robots_noindex' );
// Mark site as no longer fresh.
foreach ( array( 'publish_post', 'publish_page', 'wp_ajax_save-widget', 'wp_ajax_widgets-order', 'customize_save_after' ) as $action ) {
add_action( $action, '_delete_option_fresh_site', 0 );
@ -291,7 +294,7 @@ add_action( 'wp_head', 'rsd_link' );
add_action( 'wp_head', 'wlwmanifest_link' );
add_action( 'wp_head', 'locale_stylesheet' );
add_action( 'publish_future_post', 'check_and_publish_future_post', 10, 1 );
add_action( 'wp_head', 'noindex', 1 );
add_action( 'wp_head', 'wp_robots', 1 );
add_action( 'wp_head', 'print_emoji_detection_script', 7 );
add_action( 'wp_head', 'wp_print_styles', 8 );
add_action( 'wp_head', 'wp_print_head_scripts', 9 );
@ -311,10 +314,11 @@ add_action( 'after_switch_theme', '_wp_sidebars_changed' );
add_action( 'wp_print_styles', 'print_emoji_styles' );
if ( isset( $_GET['replytocom'] ) ) {
add_action( 'wp_head', 'wp_no_robots' );
add_filter( 'wp_robots', 'wp_robots_no_robots' );
}
// Login actions.
add_action( 'login_head', 'wp_robots', 1 );
add_filter( 'login_head', 'wp_resource_hints', 8 );
add_action( 'login_head', 'wp_print_head_scripts', 9 );
add_action( 'login_head', 'print_admin_styles', 9 );
@ -585,7 +589,7 @@ add_action( 'embed_head', 'print_emoji_detection_script' );
add_action( 'embed_head', 'print_embed_styles' );
add_action( 'embed_head', 'wp_print_head_scripts', 20 );
add_action( 'embed_head', 'wp_print_styles', 20 );
add_action( 'embed_head', 'wp_no_robots' );
add_action( 'embed_head', 'wp_robots' );
add_action( 'embed_head', 'rel_canonical' );
add_action( 'embed_head', 'locale_stylesheet', 30 );
@ -596,6 +600,7 @@ add_action( 'embed_footer', 'print_embed_sharing_dialog' );
add_action( 'embed_footer', 'print_embed_scripts' );
add_action( 'embed_footer', 'wp_print_footer_scripts', 20 );
add_filter( 'wp_robots', 'wp_embed_no_robots' );
add_filter( 'excerpt_more', 'wp_embed_excerpt_more', 20 );
add_filter( 'the_excerpt_embed', 'wptexturize' );
add_filter( 'the_excerpt_embed', 'convert_chars' );

View File

@ -4135,3 +4135,71 @@ function wp_slash_strings_only( $value ) {
function addslashes_strings_only( $value ) {
return is_string( $value ) ? addslashes( $value ) : $value;
}
/**
* Displays a noindex meta tag if required by the blog configuration.
*
* If a blog is marked as not being public then the noindex meta tag will be
* output to tell web robots not to index the page content. Add this to the
* {@see 'wp_head'} action.
*
* Typical usage is as a {@see 'wp_head'} callback:
*
* add_action( 'wp_head', 'noindex' );
*
* @see wp_no_robots()
*
* @since 2.1.0
* @deprecated 5.7.0 Use wp_robots_noindex() instead on 'wp_robots' filter.
*/
function noindex() {
_deprecated_function( __FUNCTION__, '5.7.0', 'wp_robots_noindex()' );
// If the blog is not public, tell robots to go away.
if ( '0' == get_option( 'blog_public' ) ) {
wp_no_robots();
}
}
/**
* Display a noindex meta tag.
*
* Outputs a noindex meta tag that tells web robots not to index the page content.
* Typical usage is as a {@see 'wp_head'} callback. add_action( 'wp_head', 'wp_no_robots' );
*
* @since 3.3.0
* @since 5.3.0 Echo "noindex,nofollow" if search engine visibility is discouraged.
* @deprecated 5.7.0 Use wp_robots_no_robots() instead on 'wp_robots' filter.
*/
function wp_no_robots() {
_deprecated_function( __FUNCTION__, '5.7.0', 'wp_robots_no_robots()' );
if ( get_option( 'blog_public' ) ) {
echo "<meta name='robots' content='noindex,follow' />\n";
return;
}
echo "<meta name='robots' content='noindex,nofollow' />\n";
}
/**
* Display a noindex,noarchive meta tag and referrer origin-when-cross-origin meta tag.
*
* Outputs a noindex,noarchive meta tag that tells web robots not to index or cache the page content.
* Outputs a referrer origin-when-cross-origin meta tag that tells the browser not to send the full
* url as a referrer to other sites when cross-origin assets are loaded.
*
* Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_sensitive_page_meta' );
*
* @since 5.0.1
* @deprecated 5.7.0 Use wp_robots_sensitive_page() instead on 'wp_robots' filter
* and wp_strict_cross_origin_referrer() on 'wp_head' action.
*/
function wp_sensitive_page_meta() {
_deprecated_function( __FUNCTION__, '5.7.0', 'wp_robots_sensitive_page()' );
?>
<meta name='robots' content='noindex,noarchive' />
<?php
wp_strict_cross_origin_referrer();
}

View File

@ -1246,3 +1246,23 @@ function wp_filter_pre_oembed_result( $result, $url, $args ) {
return $result;
}
/**
* Adds noindex to the robots meta tag for embeds.
*
* Typical usage is as a {@see 'wp_robots'} callback:
*
* add_filter( 'wp_robots', 'wp_embed_no_robots' );
*
* @since 5.7.0
*
* @param array $robots Associative array of robots directives.
* @return array Filtered robots directives.
*/
function wp_embed_no_robots( array $robots ) {
if ( ! is_embed() ) {
return $robots;
}
return wp_robots_no_robots( $robots );
}

View File

@ -1640,7 +1640,8 @@ function do_feed_atom( $for_comments ) {
*
* @since 2.1.0
* @since 5.3.0 Remove the "Disallow: /" output if search engine visiblity is
* discouraged in favor of robots meta HTML tag in wp_no_robots().
* discouraged in favor of robots meta HTML tag via wp_robots_no_robots()
* filter callback.
*/
function do_robots() {
header( 'Content-Type: text/plain; charset=utf-8' );
@ -3491,8 +3492,9 @@ function _default_wp_die_handler( $message, $title = '', $args = array() ) {
<meta http-equiv="Content-Type" content="text/html; charset=<?php echo $parsed_args['charset']; ?>" />
<meta name="viewport" content="width=device-width">
<?php
if ( function_exists( 'wp_no_robots' ) ) {
wp_no_robots();
if ( function_exists( 'wp_robots' ) && function_exists( 'wp_robots_no_robots' ) && function_exists( 'add_filter' ) ) {
add_filter( 'wp_robots', 'wp_robots_no_robots' );
wp_robots();
}
?>
<title><?php echo $title; ?></title>

View File

@ -3191,59 +3191,17 @@ function wlwmanifest_link() {
}
/**
* Displays a noindex meta tag if required by the blog configuration.
* Displays a referrer strict-origin-when-cross-origin meta tag.
*
* If a blog is marked as not being public then the noindex meta tag will be
* output to tell web robots not to index the page content. Add this to the
* {@see 'wp_head'} action.
*
* Typical usage is as a {@see 'wp_head'} callback:
*
* add_action( 'wp_head', 'noindex' );
*
* @see wp_no_robots()
*
* @since 2.1.0
*/
function noindex() {
// If the blog is not public, tell robots to go away.
if ( '0' == get_option( 'blog_public' ) ) {
wp_no_robots();
}
}
/**
* Display a noindex meta tag.
*
* Outputs a noindex meta tag that tells web robots not to index the page content.
* Typical usage is as a {@see 'wp_head'} callback. add_action( 'wp_head', 'wp_no_robots' );
*
* @since 3.3.0
* @since 5.3.0 Echo "noindex,nofollow" if search engine visibility is discouraged.
*/
function wp_no_robots() {
if ( get_option( 'blog_public' ) ) {
echo "<meta name='robots' content='noindex,follow' />\n";
return;
}
echo "<meta name='robots' content='noindex,nofollow' />\n";
}
/**
* Display a noindex,noarchive meta tag and referrer origin-when-cross-origin meta tag.
*
* Outputs a noindex,noarchive meta tag that tells web robots not to index or cache the page content.
* Outputs a referrer origin-when-cross-origin meta tag that tells the browser not to send the full
* url as a referrer to other sites when cross-origin assets are loaded.
*
* Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_sensitive_page_meta' );
* Typical usage is as a wp_head callback. add_action( 'wp_head', 'wp_strict_cross_origin_referrer' );
*
* @since 5.0.1
* @since 5.7.0
*/
function wp_sensitive_page_meta() {
function wp_strict_cross_origin_referrer() {
?>
<meta name='robots' content='noindex,noarchive' />
<meta name='referrer' content='strict-origin-when-cross-origin' />
<?php
}

View File

@ -0,0 +1,135 @@
<?php
/**
* Robots template functions.
*
* @package WordPress
* @subpackage Robots
* @since 5.7.0
*/
/**
* Displays the robots meta tag as necessary.
*
* Gathers robots directives to include for the current context, using the
* {@see 'wp_robots'} filter. The directives are then sanitized, and the
* robots meta tag is output if there is at least one relevant directive.
*
* @since 5.7.0
*/
function wp_robots() {
/**
* Filters the directives to be included in the 'robots' meta tag.
*
* The meta tag will only be included as necessary.
*
* @since 5.7.0
*
* @param array $robots Associative array of directives. Every key must be the name of the directive, and the
* corresponding value must either be a string to provide as value for the directive or a
* boolean `true` if it is a boolean directive, i.e. without a value.
*/
$robots = apply_filters( 'wp_robots', array() );
// Don't allow mutually exclusive directives.
if ( ! empty( $robots['follow'] ) ) {
unset( $robots['nofollow'] );
}
if ( ! empty( $robots['nofollow'] ) ) {
unset( $robots['follow'] );
}
if ( ! empty( $robots['archive'] ) ) {
unset( $robots['noarchive'] );
}
if ( ! empty( $robots['noarchive'] ) ) {
unset( $robots['archive'] );
}
$robots_strings = array();
foreach ( $robots as $directive => $value ) {
if ( is_string( $value ) ) {
// If a string value, include it as value for the directive.
$robots_strings[] = "{$directive}:{$value}";
} elseif ( $value ) {
// Otherwise, include the directive if it is truthy.
$robots_strings[] = $directive;
}
}
if ( empty( $robots_strings ) ) {
return;
}
echo "<meta name='robots' content='" . esc_attr( implode( ', ', $robots_strings ) ) . "' />\n";
}
/**
* Adds noindex to the robots meta tag if required by the site configuration.
*
* If a blog is marked as not being public then noindex will be output to
* tell web robots not to index the page content. Add this to the
* {@see 'wp_robots'} filter.
*
* Typical usage is as a {@see 'wp_robots'} callback:
*
* add_filter( 'wp_robots', 'wp_robots_noindex' );
*
* @since 5.7.0
* @see wp_robots_no_robots()
*
* @param array $robots Associative array of robots directives.
* @return array Filtered robots directives.
*/
function wp_robots_noindex( array $robots ) {
if ( ! get_option( 'blog_public' ) ) {
return wp_robots_no_robots( $robots );
}
return $robots;
}
/**
* Adds noindex to the robots meta tag.
*
* This directive tells web robots not to index the page content.
*
* Typical usage is as a {@see 'wp_robots'} callback:
*
* add_filter( 'wp_robots', 'wp_robots_no_robots' );
*
* @since 5.7.0
*
* @param array $robots Associative array of robots directives.
* @return array Filtered robots directives.
*/
function wp_robots_no_robots( array $robots ) {
$robots['noindex'] = true;
if ( get_option( 'blog_public' ) ) {
$robots['follow'] = true;
} else {
$robots['nofollow'] = true;
}
return $robots;
}
/**
* Adds noindex and noarchive to the robots meta tag.
*
* This directive tells web robots not to index or archive the page content and
* is recommended to be used for sensitive pages.
*
* Typical usage is as a {@see 'wp_robots'} callback:
*
* add_filter( 'wp_robots', 'wp_robots_sensitive_page' );
*
* @since 5.7.0
*
* @param array $robots Associative array of robots directives.
* @return array Filtered robots directives.
*/
function wp_robots_sensitive_page( array $robots ) {
$robots['noindex'] = true;
$robots['noarchive'] = true;
return $robots;
}

View File

@ -13,7 +13,7 @@
*
* @global string $wp_version
*/
$wp_version = '5.7-alpha-49991';
$wp_version = '5.7-alpha-49992';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

View File

@ -42,7 +42,8 @@ function login_header( $title = 'Log In', $message = '', $wp_error = null ) {
global $error, $interim_login, $action;
// Don't index any of these forms.
add_action( 'login_head', 'wp_sensitive_page_meta' );
add_filter( 'wp_robots', 'wp_robots_sensitive_page' );
add_action( 'login_head', 'wp_strict_cross_origin_referrer' );
add_action( 'login_head', 'wp_login_viewport_meta' );

View File

@ -181,6 +181,7 @@ require ABSPATH . WPINC . '/class-wp-metadata-lazyloader.php';
require ABSPATH . WPINC . '/general-template.php';
require ABSPATH . WPINC . '/link-template.php';
require ABSPATH . WPINC . '/author-template.php';
require ABSPATH . WPINC . '/robots-template.php';
require ABSPATH . WPINC . '/post.php';
require ABSPATH . WPINC . '/class-walker-page.php';
require ABSPATH . WPINC . '/class-walker-page-dropdown.php';

View File

@ -3,7 +3,7 @@
/** Sets up the WordPress Environment. */
require __DIR__ . '/wp-load.php';
add_action( 'wp_head', 'wp_no_robots' );
add_filter( 'wp_robots', 'wp_robots_no_robots' );
require __DIR__ . '/wp-blog-header.php';