Optimize the wptexturize() loop:

* Take the ampersand pattern out of the loop for speed.
* Fix old bugs in the ampersand pattern.
* Refactor `_wptexturize_pushpop_element()` without PCRE for speed.
* Update unit tests.

Props miqrogroove.
Fixes #28623.

Built from https://develop.svn.wordpress.org/trunk@28831


git-svn-id: http://core.svn.wordpress.org/trunk@28635 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Scott Taylor 2014-06-25 17:49:14 +00:00
parent f907f0c57b
commit 8c39368644

View File

@ -178,7 +178,7 @@ function wptexturize($text, $reset = false) {
* *
* @param array $default_no_texturize_tags An array of HTML element names. * @param array $default_no_texturize_tags An array of HTML element names.
*/ */
$no_texturize_tags = '(' . implode( '|', apply_filters( 'no_texturize_tags', $default_no_texturize_tags ) ) . ')'; $no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
/** /**
* Filter the list of shortcodes not to texturize. * Filter the list of shortcodes not to texturize.
* *
@ -186,7 +186,7 @@ function wptexturize($text, $reset = false) {
* *
* @param array $default_no_texturize_shortcodes An array of shortcode names. * @param array $default_no_texturize_shortcodes An array of shortcode names.
*/ */
$no_texturize_shortcodes = '(' . implode( '|', apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ) ) . ')'; $no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes );
$no_texturize_tags_stack = array(); $no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array(); $no_texturize_shortcodes_stack = array();
@ -206,7 +206,7 @@ function wptexturize($text, $reset = false) {
. '(?:' . '(?:'
. '[^\[\]<>]' // Shortcodes do not contain other shortcodes. . '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
. '|' . '|'
. '<.+?>' // HTML elements permitted. Prevents matching ] before >. . '<.+?>' // HTML elements permitted. Prevents matching ] before >.
. ')+' . ')+'
. '\]' // Find end of shortcode. . '\]' // Find end of shortcode.
. '\]?' // Shortcodes may end with ]] . '\]?' // Shortcodes may end with ]]
@ -221,13 +221,13 @@ function wptexturize($text, $reset = false) {
// This is an HTML delimeter. // This is an HTML delimeter.
if ( '<!--' !== substr( $curl, 0, 4 ) ) { if ( '<!--' !== substr( $curl, 0, 4 ) ) {
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>' ); _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
} }
} elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<.+?>)+\]$/', $curl ) ) { } elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<.+?>)+\]$/', $curl ) ) {
// This is a shortcode delimeter. // This is a shortcode delimeter.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']' ); _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<.+?>)+\]\]?$/', $curl ) ) { } elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<.+?>)+\]\]?$/', $curl ) ) {
// This is an escaped shortcode delimeter. // This is an escaped shortcode delimeter.
@ -235,11 +235,11 @@ function wptexturize($text, $reset = false) {
// Do not texturize. // Do not texturize.
// Do not push to the shortcodes stack. // Do not push to the shortcodes stack.
} elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
// This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize. // This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize.
$curl = str_replace($static_characters, $static_replacements, $curl); $curl = str_replace( $static_characters, $static_replacements, $curl );
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); $curl = preg_replace( $dynamic_characters, $dynamic_replacements, $curl );
// 9x9 (times), but never 0x9999 // 9x9 (times), but never 0x9999
if ( 1 === preg_match( '/(?<=\d)x-?\d/', $curl ) ) { if ( 1 === preg_match( '/(?<=\d)x-?\d/', $curl ) ) {
@ -247,31 +247,54 @@ function wptexturize($text, $reset = false) {
$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(-?\d[\d\.,]*)\b/', '$1&#215;$2', $curl ); $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(-?\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
} }
} }
// Replace each & with &#038; unless it already looks like an entity.
$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
} }
return implode( '', $textarr ); $text = implode( '', $textarr );
// Replace each & with &#038; unless it already looks like an entity.
$text = preg_replace('/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $text);
return $text;
} }
/** /**
* Search for disabled element tags. Push element to stack on tag open and pop * Search for disabled element tags. Push element to stack on tag open and pop
* on tag close. Assumes first character of $text is tag opening. * on tag close.
*
* Assumes first char of $text is tag opening and last char is tag closing.
* Assumes second char of $text is optionally '/' to indicate closing as in </html>.
* *
* @since 2.9.0 * @since 2.9.0
* @access private * @access private
* *
* @param string $text Text to check. First character is assumed to be $opening * @param string $text Text to check. Must be a tag like <html> or [shortcode].
* @param array $stack Array used as stack of opened tag elements * @param array $stack List of open tag elements.
* @param string $disabled_elements Tags to match against formatted as regexp sub-expression * @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names.
* @param string $opening Tag opening character, assumed to be 1 character long
* @param string $closing Tag closing character
*/ */
function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') { function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) {
// Check if it is a closing tag -- otherwise assume opening tag // Is it an opening tag or closing tag?
if (strncmp($opening . '/', $text, 2)) { if ( '/' !== $text[1] ) {
// Opening? Check $text+1 against disabled elements $opening_tag = true;
if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) { $name_offset = 1;
} elseif ( 0 == count( $stack ) ) {
// Stack is empty. Just stop.
return;
} else {
$opening_tag = false;
$name_offset = 2;
}
// Parse out the tag name.
$space = strpos( $text, ' ' );
if ( FALSE === $space ) {
$space = -1;
} else {
$space -= $name_offset;
}
$tag = substr( $text, $name_offset, $space );
// Handle disabled tags.
if ( in_array( $tag, $disabled_elements ) ) {
if ( $opening_tag ) {
/* /*
* This disables texturize until we find a closing tag of our type * This disables texturize until we find a closing tag of our type
* (e.g. <pre>) even if there was invalid nesting before that * (e.g. <pre>) even if there was invalid nesting before that
@ -280,20 +303,9 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi
* "baba" won't be texturize * "baba" won't be texturize
*/ */
array_push($stack, $matches[1]); array_push( $stack, $tag );
} } elseif ( end( $stack ) == $tag ) {
} elseif ( 0 == count( $stack ) ) { array_pop( $stack );
// Stack is empty. Just stop.
} else {
// Closing? Check $text+2 against disabled elements
$c = preg_quote($closing, '/');
if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
$last = array_pop($stack);
// Make sure it matches the opening tag
if ( $last != $matches[1] ) {
array_push( $stack, $last );
}
} }
} }
} }