From 66d2144e9a9493cd52a8ed82c65d1abe49489cef Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Tue, 10 Jun 2014 01:55:15 +0000 Subject: [PATCH] Ensure that `shortcode_unautop()` treats ` ` like whitespace. `shortcode_unautop()` and `wptexturize()` now use `wp_spaces_regexp()` instead of raw regex. Adds unit tests. Props miqrogroove. See #27588. Built from https://develop.svn.wordpress.org/trunk@28716 git-svn-id: http://core.svn.wordpress.org/trunk@28530 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index d8f78d1556..8aa6ad721d 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -82,14 +82,7 @@ function wptexturize($text) { $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - /* - * Regex for common whitespace characters. - * - * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. - * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence - * was found to be unreliable due to random inclusion of the A0 byte. - */ - $spaces = '[\r\n\t ]|\xC2\xA0| '; + $spaces = wp_spaces_regexp(); // Pattern-based replacements of characters. @@ -370,11 +363,12 @@ function shortcode_unautop( $pee ) { } $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); + $spaces = wp_spaces_regexp(); $pattern = '/' . '

' // Opening paragraph - . '\\s*+' // Optional leading whitespace + . '(?:' . $spaces . ')*+' // Optional leading whitespace . '(' // 1: The shortcode . '\\[' // Opening bracket . "($tagregexp)" // 2: Shortcode name @@ -399,7 +393,7 @@ function shortcode_unautop( $pee ) { . ')?' . ')' . ')' - . '\\s*+' // optional trailing whitespace + . '(?:' . $spaces . ')*+' // optional trailing whitespace . '<\\/p>' // closing paragraph . '/s'; @@ -3857,8 +3851,19 @@ function wp_spaces_regexp() { static $spaces; if ( empty( $spaces ) ) { + /** + * Regexp for common whitespace characters. + * + * This string is substituted for the \s sequence as needed in regular expressions. + * For websites not written in English, different characters may represent whitespace. + * For websites not encoded in UTF-8, the 0xC2 0xA0 sequence may not be in use. + * + * @since 4.0.0 + * + * @param string $spaces + */ $spaces = apply_filters( 'wp_spaces_regexp', '[\r\n\t ]|\xC2\xA0| ' ); } return $spaces; -} \ No newline at end of file +}