Ensure that shortcode_unautop() treats   like whitespace.

`shortcode_unautop()` and `wptexturize()` now use `wp_spaces_regexp()` instead of raw regex.

Adds unit tests.

Props miqrogroove.
See #27588.

Built from https://develop.svn.wordpress.org/trunk@28716


git-svn-id: http://core.svn.wordpress.org/trunk@28530 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Scott Taylor 2014-06-10 01:55:15 +00:00
parent b4b0efe701
commit 66d2144e9a

View File

@ -82,14 +82,7 @@ function wptexturize($text) {
$static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney );
$static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
/* $spaces = wp_spaces_regexp();
* Regex for common whitespace characters.
*
* By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp.
* This is designed to replace the PCRE \s sequence. In #WP22692, that sequence
* was found to be unreliable due to random inclusion of the A0 byte.
*/
$spaces = '[\r\n\t ]|\xC2\xA0| ';
// Pattern-based replacements of characters. // Pattern-based replacements of characters.
@ -370,11 +363,12 @@ function shortcode_unautop( $pee ) {
} }
$tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
$spaces = wp_spaces_regexp();
$pattern = $pattern =
'/' '/'
. '<p>' // Opening paragraph . '<p>' // Opening paragraph
. '\\s*+' // Optional leading whitespace . '(?:' . $spaces . ')*+' // Optional leading whitespace
. '(' // 1: The shortcode . '(' // 1: The shortcode
. '\\[' // Opening bracket . '\\[' // Opening bracket
. "($tagregexp)" // 2: Shortcode name . "($tagregexp)" // 2: Shortcode name
@ -399,7 +393,7 @@ function shortcode_unautop( $pee ) {
. ')?' . ')?'
. ')' . ')'
. ')' . ')'
. '\\s*+' // optional trailing whitespace . '(?:' . $spaces . ')*+' // optional trailing whitespace
. '<\\/p>' // closing paragraph . '<\\/p>' // closing paragraph
. '/s'; . '/s';
@ -3857,8 +3851,19 @@ function wp_spaces_regexp() {
static $spaces; static $spaces;
if ( empty( $spaces ) ) { if ( empty( $spaces ) ) {
/**
* Regexp for common whitespace characters.
*
* This string is substituted for the \s sequence as needed in regular expressions.
* For websites not written in English, different characters may represent whitespace.
* For websites not encoded in UTF-8, the 0xC2 0xA0 sequence may not be in use.
*
* @since 4.0.0
*
* @param string $spaces
*/
$spaces = apply_filters( 'wp_spaces_regexp', '[\r\n\t ]|\xC2\xA0|&nbsp;' ); $spaces = apply_filters( 'wp_spaces_regexp', '[\r\n\t ]|\xC2\xA0|&nbsp;' );
} }
return $spaces; return $spaces;
} }