From c29de44907245279495b3b91f0ee1668406b28d8 Mon Sep 17 00:00:00 2001 From: westi Date: Wed, 21 Oct 2009 21:57:27 +0000 Subject: [PATCH] Improve wptexturize performance. Fixes #10987 props johanee. git-svn-id: http://svn.automattic.com/wordpress/trunk@12084 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 112 ++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 33 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 5ca9435fd1..b9eae6b5fc 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -28,48 +28,67 @@ */ function wptexturize($text) { global $wp_cockneyreplace; + static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements; $output = ''; $curl = ''; $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); $stop = count($textarr); + + // No need to setup these variables more than once + if (!$static_setup) { + /* translators: opening curly quote */ + $opening_quote = _x('“', 'opening curly quote'); + /* translators: closing curly quote */ + $closing_quote = _x('”', 'closing curly quote'); - /* translators: opening curly quote */ - $opening_quote = _x('“', 'opening curly quote'); - /* translators: closing curly quote */ - $closing_quote = _x('”', 'closing curly quote'); + $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); + $default_no_texturize_shortcodes = array('code'); - $no_texturize_tags = apply_filters('no_texturize_tags', array('pre', 'code', 'kbd', 'style', 'script', 'tt')); - $no_texturize_shortcodes = apply_filters('no_texturize_shortcodes', array('code')); - $no_texturize_tags_stack = array(); - $no_texturize_shortcodes_stack = array(); + // if a plugin has provided an autocorrect array, use it + if ( isset($wp_cockneyreplace) ) { + $cockney = array_keys($wp_cockneyreplace); + $cockneyreplace = array_values($wp_cockneyreplace); + } else { + $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause"); + $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); + } - // if a plugin has provided an autocorrect array, use it - if ( isset($wp_cockneyreplace) ) { - $cockney = array_keys($wp_cockneyreplace); - $cockneyreplace = array_values($wp_cockneyreplace); - } else { - $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause"); - $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round","’cause"); + $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney); + $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, '’s', $closing_quote, ' ™'), $cockneyreplace); + + $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/'); + $dynamic_replacements = array('’$1','$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); + + $static_setup = true; } - $static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney); - $static_replacements = array_merge(array('—', ' — ', '–', ' – ', 'xn--', '…', $opening_quote, '’s', $closing_quote, ' ™'), $cockneyreplace); + // Transform into regexp sub-expression used in _wptexturize_pushpop_element + // Must do this everytime in case plugins use these filters in a context sensitive manner + $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags) ) . ')'; + $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes) ) . ')'; - $dynamic_characters = array('/\'(\d\d(?:’|\')?s)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/'); - $dynamic_replacements = array('’$1','$1‘', '$1″', '$1′', '$1’$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '’$1', '$1×$2'); + $no_texturize_tags_stack = array(); + $no_texturize_shortcodes_stack = array(); for ( $i = 0; $i < $stop; $i++ ) { $curl = $textarr[$i]; if ( !empty($curl) && '<' != $curl{0} && '[' != $curl{0} - && empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // If it's not a tag + && empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { + // This is not a tag, nor is the texturization disabled // static strings $curl = str_replace($static_characters, $static_replacements, $curl); // regular expressions $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); - } else { - wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); - wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); + } elseif (!empty($curl)) { + /* + * Only call _wptexturize_pushpop_element if first char is correct + * tag opening + */ + if ('<' == $curl{0}) + _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); + elseif ('[' == $curl{0}) + _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); } $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); @@ -79,17 +98,44 @@ function wptexturize($text) { return $output; } -function wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') { - $o = preg_quote($opening, '/'); - $c = preg_quote($closing, '/'); - foreach($disabled_elements as $element) { - if (preg_match('/^'.$o.$element.'\b/', $text)) array_push($stack, $element); - if (preg_match('/^'.$o.'\/'.$element.$c.'/', $text)) { +/** + * Search for disabled element tags. Push element to stack on tag open and pop + * on tag close. Assumes first character of $text is tag opening. + * + * @access private + * @since 2.9.0 + * + * @param string $text Text to check. First character is assumed to be $opening + * @param array $stack Array used as stack of opened tag elements + * @param string $disabled_elements Tags to match against formatted as regexp sub-expression + * @param string $opening Tag opening character, assumed to be 1 character long + * @param string $opening Tag closing character + * @return object + */ +function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') { + // Check if it is a closing tag -- otherwise assume opening tag + if (strncmp($opening . '/', $text, 2)) { + // Opening? Check $text+1 against disabled elements + if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) { + /* + * This disables texturize until we find a closing tag of our type + * (e.g.
) even if there was invalid nesting before that
+			 * 
+			 * Example: in the case 
sadsadasd"baba"
+ * "baba" won't be texturize + */ + + array_push($stack, $matches[1]); + } + } else { + // Closing? Check $text+2 against disabled elements + $c = preg_quote($closing, '/'); + if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) { $last = array_pop($stack); - // disable texturize until we find a closing tag of our type (e.g.
)
-			// even if there was invalid nesting before that
-			// Example: in the case 
sadsadasd"baba"
"baba" won't be texturized - if ($last != $element) array_push($stack, $last); + + // Make sure it matches the opening tag + if ($last != $matches[1]) + array_push($stack, $last); } } }