Canonical/Rewrite: sanity check posts that are paged with <!--nextpage-->. Page numbers past the max number of pages are returning the last page of content and causing infinite duplicate content.

Awesome rewrite bug: the `page` query var was being set to `'/4'` in `$wp`. When cast to `int`, it returns `0` (Bless you, PHP). `WP_Query` calls `trim( $page, '/' )` when setting its own query var. The few places that were checking `page`	before posts were queried now have sanity checks, so that these changes work without flushing rewrites.	

Adds/updates unit tests.

Props wonderboymusic, dd32.
See #11694.

Built from https://develop.svn.wordpress.org/trunk@34492


git-svn-id: http://core.svn.wordpress.org/trunk@34456 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Scott Taylor 2015-09-24 14:04:24 +00:00
parent f1532ccde8
commit 175d476b0e
5 changed files with 29 additions and 8 deletions

View File

@ -148,6 +148,13 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
} }
} }
if ( get_query_var( 'page' ) && $wp_query->post &&
false !== strpos( $wp_query->post->post_content, '<!--nextpage-->' ) ) {
$redirect['path'] = rtrim( $redirect['path'], (int) get_query_var( 'page' ) . '/' );
$redirect['query'] = remove_query_arg( 'page', $redirect['query'] );
$redirect_url = get_permalink( $wp_query->post->ID );
}
} elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) { } elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) {
// rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101 // rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101
if ( is_attachment() && ! $redirect_url ) { if ( is_attachment() && ! $redirect_url ) {
@ -560,7 +567,7 @@ function redirect_guess_404_permalink() {
return false; return false;
if ( get_query_var( 'feed' ) ) if ( get_query_var( 'feed' ) )
return get_post_comments_feed_link( $post_id, get_query_var( 'feed' ) ); return get_post_comments_feed_link( $post_id, get_query_var( 'feed' ) );
elseif ( get_query_var( 'page' ) ) elseif ( get_query_var( 'page' ) && 1 < get_query_var( 'page' ) )
return trailingslashit( get_permalink( $post_id ) ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' ); return trailingslashit( get_permalink( $post_id ) ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' );
else else
return get_permalink( $post_id ); return get_permalink( $post_id );

View File

@ -1071,8 +1071,10 @@ class WP_Rewrite {
$sub1 .= '?$'; $sub1 .= '?$';
$sub2 .= '?$'; $sub2 .= '?$';
//post pagination, e.g. <permalink>/2/ // Post pagination, e.g. <permalink>/2/
$match = $match . '(/[0-9]+)?/?$'; // Previously: '(/[0-9]+)?/?$', which produced '/2' for page.
// When cast to int, returned 0.
$match = $match . '(?:/([0-9]+))?/?$';
$query = $index . '?' . $query . '&page=' . $this->preg_index($num_toks + 1); $query = $index . '?' . $query . '&page=' . $this->preg_index($num_toks + 1);
} else { //not matching a permalink so this is a lot simpler } else { //not matching a permalink so this is a lot simpler
//close the match and finalise the query //close the match and finalise the query

View File

@ -587,7 +587,7 @@ class WP {
* @global WP_Query $wp_query * @global WP_Query $wp_query
*/ */
public function handle_404() { public function handle_404() {
global $wp_query; global $wp_query, $wp;
// If we've already issued a 404, bail. // If we've already issued a 404, bail.
if ( is_404() ) if ( is_404() )
@ -596,17 +596,27 @@ class WP {
// Never 404 for the admin, robots, or if we found posts. // Never 404 for the admin, robots, or if we found posts.
if ( is_admin() || is_robots() || $wp_query->posts ) { if ( is_admin() || is_robots() || $wp_query->posts ) {
// Only set X-Pingback for single posts. $success = true;
if ( is_singular() ) { if ( is_singular() ) {
$p = clone $wp_query->post; $p = clone $wp_query->post;
// Only set X-Pingback for single posts that allow pings.
if ( $p && pings_open( $p ) ) { if ( $p && pings_open( $p ) ) {
@header( 'X-Pingback: ' . get_bloginfo( 'pingback_url' ) ); @header( 'X-Pingback: ' . get_bloginfo( 'pingback_url' ) );
} }
// check for paged content that exceeds the max number of pages
$next = '<!--nextpage-->';
if ( $p && false !== strpos( $p->post_content, $next ) && ! empty( $wp->query_vars['page'] ) ) {
$page = trim( $wp->query_vars['page'], '/' );
$success = (int) $page <= ( substr_count( $p->post_content, $next ) + 1 );
}
} }
if ( $success ) {
status_header( 200 ); status_header( 200 );
return; return;
} }
}
// We will 404 for paged queries, as no posts were found. // We will 404 for paged queries, as no posts were found.
if ( ! is_paged() ) { if ( ! is_paged() ) {

View File

@ -256,6 +256,8 @@ function wp_resolve_numeric_slug_conflicts( $query_vars = array() ) {
} elseif ( 'monthnum' === $compare && isset( $query_vars['day'] ) ) { } elseif ( 'monthnum' === $compare && isset( $query_vars['day'] ) ) {
$maybe_page = $query_vars['day']; $maybe_page = $query_vars['day'];
} }
// Bug found in #11694 - 'page' was returning '/4'
$maybe_page = (int) trim( $maybe_page, '/' );
$post_page_count = substr_count( $post->post_content, '<!--nextpage-->' ) + 1; $post_page_count = substr_count( $post->post_content, '<!--nextpage-->' ) + 1;

View File

@ -4,7 +4,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '4.4-alpha-34491'; $wp_version = '4.4-alpha-34492';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.