HTTP API: Simplify wp_parse_url() to ensure consistent results.

[38694] revealed some URL formats were been parsed incorrectly, including those used by Google Fonts. This change simplifies the function to use placeholder values which cause PHP's parsing to behave consistently.

Props jrf, peterwilsoncc.
Fixes #36356.

Built from https://develop.svn.wordpress.org/trunk@38726


git-svn-id: http://core.svn.wordpress.org/trunk@38669 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Peter Wilson 2016-10-04 20:33:29 +00:00
parent 86ad7855be
commit 7a52a3aac7
2 changed files with 93 additions and 55 deletions

View File

@ -623,11 +623,16 @@ function ms_allowed_http_request_hosts( $is_external, $host ) {
}
/**
* A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7
* A wrapper for PHP's parse_url() function that handles consistency in the return
* values across PHP versions.
*
* PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including
* schemeless and relative url's with :// in the path, this works around those
* limitations providing a standard output on PHP 5.2~5.4+.
* schemeless and relative url's with :// in the path. This function works around
* those limitations providing a standard output on PHP 5.2~5.4+.
*
* Secondly, across various PHP versions, schemeless URLs starting containing a ":"
* in the query are being handled inconsistently. This function works around those
* differences as well.
*
* Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated
* when URL parsing failed.
@ -640,63 +645,96 @@ function ms_allowed_http_request_hosts( $is_external, $host ) {
* predefined constants to specify which one.
* Defaults to -1 (= return all parts as an array).
* @see http://php.net/manual/en/function.parse-url.php
* @return mixed False on failure; Array of URL components on success;
* When a specific component has been requested: null if the component doesn't
* exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer
* when it does; See parse_url()'s return values.
* @return mixed False on parse failure; Array of URL components on success;
* When a specific component has been requested: null if the component
* doesn't exist in the given URL; a sting or - in the case of
* PHP_URL_PORT - integer when it does. See parse_url()'s return values.
*/
function wp_parse_url( $url, $component = -1 ) {
$parts = @parse_url( $url, $component );
$to_unset = array();
$url = strval( $url );
if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) {
if ( '//' === substr( $url, 0, 2 ) ) {
$to_unset[] = 'scheme';
$url = 'placeholder:' . $url;
} elseif ( '/' === substr( $url, 0, 1 ) ) {
$to_unset[] = 'scheme';
$to_unset[] = 'host';
$url = 'placeholder://placeholder' . $url;
}
$parts = @parse_url( $url );
if ( false === $parts ) {
// Parsing failure.
return $parts;
}
if ( false === $parts ) {
// < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path.
if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) {
if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) {
return null;
}
// Since we know it's a relative path, prefix with a scheme/host placeholder and try again.
if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) {
return $parts;
}
// Remove the placeholder values.
if ( -1 === $component ) {
unset( $parts['scheme'], $parts['host'] );
}
} else {
return $parts;
}
// Remove the placeholder values.
foreach ( $to_unset as $key ) {
unset( $parts[ $key ] );
}
// < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field.
if ( '//' == substr( $url, 0, 2 ) ) {
if ( -1 === $component && ! isset( $parts['host'] ) ) {
$path_parts = explode( '/', substr( $parts['path'], 2 ), 2 );
$parts['host'] = $path_parts[0];
if ( isset( $path_parts[1] ) ) {
$parts['path'] = '/' . $path_parts[1];
} else {
unset( $parts['path'] );
}
} elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) {
$all_parts = @parse_url( $url );
if ( ! isset( $all_parts['host'] ) ) {
$path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 );
if ( PHP_URL_PATH === $component ) {
if ( isset( $path_parts[1] ) ) {
$parts = '/' . $path_parts[1];
} else {
$parts = null;
}
} elseif ( PHP_URL_HOST === $component ) {
$parts = $path_parts[0];
}
}
}
}
return $parts;
return _get_component_from_parsed_url_array( $parts, $component );
}
/**
* Retrieve a specific component from a parsed URL array.
*
* @internal
*
* @since 4.7.0
*
* @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse.
* @param int $component The specific component to retrieve. Use one of the PHP
* predefined constants to specify which one.
* Defaults to -1 (= return all parts as an array).
* @see http://php.net/manual/en/function.parse-url.php
* @return mixed False on parse failure; Array of URL components on success;
* When a specific component has been requested: null if the component
* doesn't exist in the given URL; a sting or - in the case of
* PHP_URL_PORT - integer when it does. See parse_url()'s return values.
*/
function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) {
if ( -1 === $component ) {
return $url_parts;
}
$key = _wp_translate_php_url_constant_to_key( $component );
if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) {
return $url_parts[ $key ];
} else {
return null;
}
}
/**
* Translate a PHP_URL_* constant to the named array keys PHP uses.
*
* @internal
*
* @since 4.7.0
*
* @see http://php.net/manual/en/url.constants.php
*
* @param int $constant PHP_URL_* constant.
* @return string|bool The named key or false.
*/
function _wp_translate_php_url_constant_to_key( $constant ) {
$translation = array(
PHP_URL_SCHEME => 'scheme',
PHP_URL_HOST => 'host',
PHP_URL_PORT => 'port',
PHP_URL_USER => 'user',
PHP_URL_PASS => 'pass',
PHP_URL_PATH => 'path',
PHP_URL_QUERY => 'query',
PHP_URL_FRAGMENT => 'fragment',
);
if ( isset( $translation[ $constant ] ) ) {
return $translation[ $constant ];
} else {
return false;
}
}

View File

@ -4,7 +4,7 @@
*
* @global string $wp_version
*/
$wp_version = '4.7-alpha-38725';
$wp_version = '4.7-alpha-38726';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.