mirror of
https://github.com/WordPress/WordPress.git
synced 2025-03-11 22:29:48 +01:00
Texturize: Massive performance improvements (~600% faster); better handling of nbsp, double, and weird spaces; 136 new unit tests.
big props miqrogroove. fixes #22692. Built from https://develop.svn.wordpress.org/trunk@27839 git-svn-id: http://core.svn.wordpress.org/trunk@27673 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
5851e00b93
commit
d04cd4147a
@ -73,27 +73,50 @@ function wptexturize($text) {
|
||||
$static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney );
|
||||
$static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
|
||||
|
||||
$dynamic = array();
|
||||
if ( "'" != $apos ) {
|
||||
$dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's
|
||||
$dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99
|
||||
}
|
||||
if ( "'" != $opening_single_quote )
|
||||
$dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [
|
||||
if ( '"' != $double_prime )
|
||||
$dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime)
|
||||
if ( "'" != $prime )
|
||||
$dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime)
|
||||
if ( "'" != $apos )
|
||||
$dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word
|
||||
if ( '"' != $opening_quote )
|
||||
$dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [
|
||||
if ( '"' != $closing_quote )
|
||||
$dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote
|
||||
if ( "'" != $closing_single_quote )
|
||||
$dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote
|
||||
/*
|
||||
* Regex for common whitespace characters.
|
||||
*
|
||||
* By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp.
|
||||
* This is designed to replace the PCRE \s sequence. In #WP22692, that sequence
|
||||
* was found to be unreliable due to random inclusion of the A0 byte.
|
||||
*/
|
||||
$spaces = '[\r\n\t ]|\xC2\xA0| ';
|
||||
|
||||
$dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times)
|
||||
|
||||
// Pattern-based replacements of characters.
|
||||
$dynamic = array();
|
||||
|
||||
// '99 '99s '99's (apostrophe)
|
||||
if ( "'" != $apos )
|
||||
$dynamic[ '/\'(?=\d)/' ] = $apos;
|
||||
|
||||
// Single quote at start, or preceded by (, {, <, [, ", or spaces.
|
||||
if ( "'" != $opening_single_quote )
|
||||
$dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote;
|
||||
|
||||
// 9" (double prime)
|
||||
if ( '"' != $double_prime )
|
||||
$dynamic[ '/(?<=\d)"/' ] = $double_prime;
|
||||
|
||||
// 9' (prime)
|
||||
if ( "'" != $prime )
|
||||
$dynamic[ '/(?<=\d)\'/' ] = $prime;
|
||||
|
||||
// Apostrophe in a word. No spaces or double primes.
|
||||
if ( "'" != $apos )
|
||||
$dynamic[ '/(?<!' . $spaces . ')\'(?!\'|' . $spaces . ')/' ] = $apos;
|
||||
|
||||
// Double quote at start, or preceded by (, {, <, [, or spaces, and not followed by spaces.
|
||||
if ( '"' != $opening_quote )
|
||||
$dynamic[ '/(?<=\A|[([{<]|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
|
||||
|
||||
// Any remaining double quotes.
|
||||
if ( '"' != $closing_quote )
|
||||
$dynamic[ '/"/' ] = $closing_quote;
|
||||
|
||||
// Single quotes followed by spaces or a period.
|
||||
if ( "'" != $closing_single_quote )
|
||||
$dynamic[ '/\'(?=\Z|\.|' . $spaces . ')/' ] = $closing_single_quote;
|
||||
|
||||
$dynamic_characters = array_keys( $dynamic );
|
||||
$dynamic_replacements = array_values( $dynamic );
|
||||
@ -134,11 +157,21 @@ function wptexturize($text) {
|
||||
} elseif ( '[' === $first ) {
|
||||
_wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
|
||||
} elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
|
||||
|
||||
// This is not a tag, nor is the texturization disabled static strings
|
||||
$curl = str_replace($static_characters, $static_replacements, $curl);
|
||||
|
||||
// regular expressions
|
||||
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
|
||||
|
||||
// 9x9 (times)
|
||||
if ( 1 === preg_match( '/(?<=\d)x\d/', $text ) ) {
|
||||
// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
|
||||
$curl = preg_replace( '/\b(\d+)x(\d+)\b/', '$1×$2', $curl );
|
||||
}
|
||||
}
|
||||
|
||||
// Replace each & with & unless it already looks like an entity.
|
||||
$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl);
|
||||
}
|
||||
return implode( '', $textarr );
|
||||
|
Loading…
Reference in New Issue
Block a user