From d04cd4147aa3f74125e56f3d17b573393b331823 Mon Sep 17 00:00:00 2001 From: Andrew Nacin Date: Sat, 29 Mar 2014 07:16:16 +0000 Subject: [PATCH] Texturize: Massive performance improvements (~600% faster); better handling of nbsp, double, and weird spaces; 136 new unit tests. big props miqrogroove. fixes #22692. Built from https://develop.svn.wordpress.org/trunk@27839 git-svn-id: http://core.svn.wordpress.org/trunk@27673 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 73 +++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index ee3bcd0a8d..e1a0ee3104 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -73,27 +73,50 @@ function wptexturize($text) { $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); - $dynamic = array(); - if ( "'" != $apos ) { - $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's - $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99 - } - if ( "'" != $opening_single_quote ) - $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [ - if ( '"' != $double_prime ) - $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime) - if ( "'" != $prime ) - $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime) - if ( "'" != $apos ) - $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word - if ( '"' != $opening_quote ) - $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [ - if ( '"' != $closing_quote ) - $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote - if ( "'" != $closing_single_quote ) - $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote + /* + * Regex for common whitespace characters. + * + * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp. + * This is designed to replace the PCRE \s sequence. In #WP22692, that sequence + * was found to be unreliable due to random inclusion of the A0 byte. + */ + $spaces = '[\r\n\t ]|\xC2\xA0| '; - $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times) + + // Pattern-based replacements of characters. + $dynamic = array(); + + // '99 '99s '99's (apostrophe) + if ( "'" != $apos ) + $dynamic[ '/\'(?=\d)/' ] = $apos; + + // Single quote at start, or preceded by (, {, <, [, ", or spaces. + if ( "'" != $opening_single_quote ) + $dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote; + + // 9" (double prime) + if ( '"' != $double_prime ) + $dynamic[ '/(?<=\d)"/' ] = $double_prime; + + // 9' (prime) + if ( "'" != $prime ) + $dynamic[ '/(?<=\d)\'/' ] = $prime; + + // Apostrophe in a word. No spaces or double primes. + if ( "'" != $apos ) + $dynamic[ '/(?