Optimize the `wptexturize()` loop:

* Take the ampersand pattern out of the loop for speed.
* Fix old bugs in the ampersand pattern.
* Refactor `_wptexturize_pushpop_element()` without PCRE for speed.
* Update unit tests.

Props miqrogroove.
Fixes #28623.

Built from https://develop.svn.wordpress.org/trunk@28831


git-svn-id: http://core.svn.wordpress.org/trunk@28635 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Scott Taylor 2014-06-25 17:49:14 +00:00
parent f907f0c57b
commit 8c39368644
1 changed files with 49 additions and 37 deletions

View File

@ -178,7 +178,7 @@ function wptexturize($text, $reset = false) {
*
* @param array $default_no_texturize_tags An array of HTML element names.
*/
$no_texturize_tags = '(' . implode( '|', apply_filters( 'no_texturize_tags', $default_no_texturize_tags ) ) . ')';
$no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags );
/**
* Filter the list of shortcodes not to texturize.
*
@ -186,7 +186,7 @@ function wptexturize($text, $reset = false) {
*
* @param array $default_no_texturize_shortcodes An array of shortcode names.
*/
$no_texturize_shortcodes = '(' . implode( '|', apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ) ) . ')';
$no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes );
$no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array();
@ -206,7 +206,7 @@ function wptexturize($text, $reset = false) {
. '(?:'
. '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
. '|'
. '<.+?>' // HTML elements permitted. Prevents matching ] before >.
. '<.+?>' // HTML elements permitted. Prevents matching ] before >.
. ')+'
. '\]' // Find end of shortcode.
. '\]?' // Shortcodes may end with ]]
@ -221,13 +221,13 @@ function wptexturize($text, $reset = false) {
// This is an HTML delimeter.
if ( '<!--' !== substr( $curl, 0, 4 ) ) {
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>' );
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
}
} elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<.+?>)+\]$/', $curl ) ) {
// This is a shortcode delimeter.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']' );
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<.+?>)+\]\]?$/', $curl ) ) {
// This is an escaped shortcode delimeter.
@ -235,11 +235,11 @@ function wptexturize($text, $reset = false) {
// Do not texturize.
// Do not push to the shortcodes stack.
} elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
// This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize.
$curl = str_replace($static_characters, $static_replacements, $curl);
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
$curl = str_replace( $static_characters, $static_replacements, $curl );
$curl = preg_replace( $dynamic_characters, $dynamic_replacements, $curl );
// 9x9 (times), but never 0x9999
if ( 1 === preg_match( '/(?<=\d)x-?\d/', $curl ) ) {
@ -247,31 +247,54 @@ function wptexturize($text, $reset = false) {
$curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(-?\d[\d\.,]*)\b/', '$1&#215;$2', $curl );
}
}
// Replace each & with &#038; unless it already looks like an entity.
$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
}
return implode( '', $textarr );
$text = implode( '', $textarr );
// Replace each & with &#038; unless it already looks like an entity.
$text = preg_replace('/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&#038;', $text);
return $text;
}
/**
* Search for disabled element tags. Push element to stack on tag open and pop
* on tag close. Assumes first character of $text is tag opening.
* on tag close.
*
* Assumes first char of $text is tag opening and last char is tag closing.
* Assumes second char of $text is optionally '/' to indicate closing as in </html>.
*
* @since 2.9.0
* @access private
*
* @param string $text Text to check. First character is assumed to be $opening
* @param array $stack Array used as stack of opened tag elements
* @param string $disabled_elements Tags to match against formatted as regexp sub-expression
* @param string $opening Tag opening character, assumed to be 1 character long
* @param string $closing Tag closing character
* @param string $text Text to check. Must be a tag like <html> or [shortcode].
* @param array $stack List of open tag elements.
* @param array $disabled_elements The tag names to match against. Spaces are not allowed in tag names.
*/
function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
// Check if it is a closing tag -- otherwise assume opening tag
if (strncmp($opening . '/', $text, 2)) {
// Opening? Check $text+1 against disabled elements
if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) {
function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) {
// Is it an opening tag or closing tag?
if ( '/' !== $text[1] ) {
$opening_tag = true;
$name_offset = 1;
} elseif ( 0 == count( $stack ) ) {
// Stack is empty. Just stop.
return;
} else {
$opening_tag = false;
$name_offset = 2;
}
// Parse out the tag name.
$space = strpos( $text, ' ' );
if ( FALSE === $space ) {
$space = -1;
} else {
$space -= $name_offset;
}
$tag = substr( $text, $name_offset, $space );
// Handle disabled tags.
if ( in_array( $tag, $disabled_elements ) ) {
if ( $opening_tag ) {
/*
* This disables texturize until we find a closing tag of our type
* (e.g. <pre>) even if there was invalid nesting before that
@ -280,20 +303,9 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $openi
* "baba" won't be texturize
*/
array_push($stack, $matches[1]);
}
} elseif ( 0 == count( $stack ) ) {
// Stack is empty. Just stop.
} else {
// Closing? Check $text+2 against disabled elements
$c = preg_quote($closing, '/');
if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
$last = array_pop($stack);
// Make sure it matches the opening tag
if ( $last != $matches[1] ) {
array_push( $stack, $last );
}
array_push( $stack, $tag );
} elseif ( end( $stack ) == $tag ) {
array_pop( $stack );
}
}
}