Improve wptexturize performance. Fixes #10987 props johanee.

git-svn-id: http://svn.automattic.com/wordpress/trunk@12084 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
westi 2009-10-21 21:57:27 +00:00
parent b5f93c8431
commit c29de44907
1 changed files with 79 additions and 33 deletions

View File

@ -28,48 +28,67 @@
*/
function wptexturize($text) {
global $wp_cockneyreplace;
static $static_setup = false, $opening_quote, $closing_quote, $default_no_texturize_tags, $default_no_texturize_shortcodes, $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements;
$output = '';
$curl = '';
$textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
$stop = count($textarr);
// No need to setup these variables more than once
if (!$static_setup) {
/* translators: opening curly quote */
$opening_quote = _x('&#8220;', 'opening curly quote');
/* translators: closing curly quote */
$closing_quote = _x('&#8221;', 'closing curly quote');
/* translators: opening curly quote */
$opening_quote = _x('&#8220;', 'opening curly quote');
/* translators: closing curly quote */
$closing_quote = _x('&#8221;', 'closing curly quote');
$default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt');
$default_no_texturize_shortcodes = array('code');
$no_texturize_tags = apply_filters('no_texturize_tags', array('pre', 'code', 'kbd', 'style', 'script', 'tt'));
$no_texturize_shortcodes = apply_filters('no_texturize_shortcodes', array('code'));
$no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array();
// if a plugin has provided an autocorrect array, use it
if ( isset($wp_cockneyreplace) ) {
$cockney = array_keys($wp_cockneyreplace);
$cockneyreplace = array_values($wp_cockneyreplace);
} else {
$cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
$cockneyreplace = array("&#8217;tain&#8217;t","&#8217;twere","&#8217;twas","&#8217;tis","&#8217;twill","&#8217;til","&#8217;bout","&#8217;nuff","&#8217;round","&#8217;cause");
}
// if a plugin has provided an autocorrect array, use it
if ( isset($wp_cockneyreplace) ) {
$cockney = array_keys($wp_cockneyreplace);
$cockneyreplace = array_values($wp_cockneyreplace);
} else {
$cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
$cockneyreplace = array("&#8217;tain&#8217;t","&#8217;twere","&#8217;twas","&#8217;tis","&#8217;twill","&#8217;til","&#8217;bout","&#8217;nuff","&#8217;round","&#8217;cause");
$static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn&#8211;', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney);
$static_replacements = array_merge(array('&#8212;', ' &#8212; ', '&#8211;', ' &#8211; ', 'xn--', '&#8230;', $opening_quote, '&#8217;s', $closing_quote, ' &#8482;'), $cockneyreplace);
$dynamic_characters = array('/\'(\d\d(?:&#8217;|\')?s)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/');
$dynamic_replacements = array('&#8217;$1','$1&#8216;', '$1&#8243;', '$1&#8242;', '$1&#8217;$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '&#8217;$1', '$1&#215;$2');
$static_setup = true;
}
$static_characters = array_merge(array('---', ' -- ', '--', ' - ', 'xn&#8211;', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney);
$static_replacements = array_merge(array('&#8212;', ' &#8212; ', '&#8211;', ' &#8211; ', 'xn--', '&#8230;', $opening_quote, '&#8217;s', $closing_quote, ' &#8482;'), $cockneyreplace);
// Transform into regexp sub-expression used in _wptexturize_pushpop_element
// Must do this everytime in case plugins use these filters in a context sensitive manner
$no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags) ) . ')';
$no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes) ) . ')';
$dynamic_characters = array('/\'(\d\d(?:&#8217;|\')?s)/', '/(\s|\A|[([{<]|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A|[([{<])"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/');
$dynamic_replacements = array('&#8217;$1','$1&#8216;', '$1&#8243;', '$1&#8242;', '$1&#8217;$2', '$1' . $opening_quote . '$2', $closing_quote . '$1', '&#8217;$1', '$1&#215;$2');
$no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array();
for ( $i = 0; $i < $stop; $i++ ) {
$curl = $textarr[$i];
if ( !empty($curl) && '<' != $curl{0} && '[' != $curl{0}
&& empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) { // If it's not a tag
&& empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack)) {
// This is not a tag, nor is the texturization disabled
// static strings
$curl = str_replace($static_characters, $static_replacements, $curl);
// regular expressions
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
} else {
wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
} elseif (!empty($curl)) {
/*
* Only call _wptexturize_pushpop_element if first char is correct
* tag opening
*/
if ('<' == $curl{0})
_wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
elseif ('[' == $curl{0})
_wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
}
$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
@ -79,17 +98,44 @@ function wptexturize($text) {
return $output;
}
function wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
$o = preg_quote($opening, '/');
$c = preg_quote($closing, '/');
foreach($disabled_elements as $element) {
if (preg_match('/^'.$o.$element.'\b/', $text)) array_push($stack, $element);
if (preg_match('/^'.$o.'\/'.$element.$c.'/', $text)) {
/**
* Search for disabled element tags. Push element to stack on tag open and pop
* on tag close. Assumes first character of $text is tag opening.
*
* @access private
* @since 2.9.0
*
* @param string $text Text to check. First character is assumed to be $opening
* @param array $stack Array used as stack of opened tag elements
* @param string $disabled_elements Tags to match against formatted as regexp sub-expression
* @param string $opening Tag opening character, assumed to be 1 character long
* @param string $opening Tag closing character
* @return object
*/
function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
// Check if it is a closing tag -- otherwise assume opening tag
if (strncmp($opening . '/', $text, 2)) {
// Opening? Check $text+1 against disabled elements
if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) {
/*
* This disables texturize until we find a closing tag of our type
* (e.g. <pre>) even if there was invalid nesting before that
*
* Example: in the case <pre>sadsadasd</code>"baba"</pre>
* "baba" won't be texturize
*/
array_push($stack, $matches[1]);
}
} else {
// Closing? Check $text+2 against disabled elements
$c = preg_quote($closing, '/');
if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
$last = array_pop($stack);
// disable texturize until we find a closing tag of our type (e.g. <pre>)
// even if there was invalid nesting before that
// Example: in the case <pre>sadsadasd</code>"baba"</pre> "baba" won't be texturized
if ($last != $element) array_push($stack, $last);
// Make sure it matches the opening tag
if ($last != $matches[1])
array_push($stack, $last);
}
}
}