mirror of
https://github.com/WordPress/WordPress.git
synced 2024-12-22 17:18:32 +01:00
Shortcodes/Formatting: Add PCRE Performance Testing
* Move pattern from `wptexturize()` into a separate function. * Move pattern from `wp_html_split()` into a separate function. * Beautify code for `wp_html_split()`. * Remove unnecessary instances of `/s` modifier in patterns that don't use dots. * Add `tests/phpunit/data/formatting/whole-posts.php` for testing larger strings. * Add function `benchmark_pcre_backtracking()`. * Add tests for `wp_html_split()`. * Add tests for `wptexturize()`. * Add tests for `get_shortcode_regex()`. Props miqrogroove. Fixes #34121. Built from https://develop.svn.wordpress.org/trunk@34761 git-svn-id: http://core.svn.wordpress.org/trunk@34726 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
da1c938a5c
commit
7a0f8602f3
@ -219,43 +219,8 @@ function wptexturize( $text, $reset = false ) {
|
|||||||
preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches );
|
preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20]++)@', $text, $matches );
|
||||||
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
|
$tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] );
|
||||||
$found_shortcodes = ! empty( $tagnames );
|
$found_shortcodes = ! empty( $tagnames );
|
||||||
if ( $found_shortcodes ) {
|
$shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : '';
|
||||||
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
$regex = _get_wptexturize_split_regex( $shortcode_regex );
|
||||||
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
|
|
||||||
$shortcode_regex =
|
|
||||||
'\[' // Find start of shortcode.
|
|
||||||
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
|
||||||
. $tagregexp // Only match registered shortcodes, because performance.
|
|
||||||
. '(?:'
|
|
||||||
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
|
|
||||||
. '|'
|
|
||||||
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
|
|
||||||
. ')*+' // Possessive critical.
|
|
||||||
. '\]' // Find end of shortcode.
|
|
||||||
. '\]?'; // Shortcodes may end with ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
$comment_regex =
|
|
||||||
'!' // Start of comment, after the <.
|
|
||||||
. '(?:' // Unroll the loop: Consume everything until --> is found.
|
|
||||||
. '-(?!->)' // Dash not followed by end of comment.
|
|
||||||
. '[^\-]*+' // Consume non-dashes.
|
|
||||||
. ')*+' // Loop possessively.
|
|
||||||
. '(?:-->)?'; // End of comment. If not found, match all input.
|
|
||||||
|
|
||||||
$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
|
|
||||||
'<' // Find start of element.
|
|
||||||
. '(?(?=!--)' // Is this a comment?
|
|
||||||
. $comment_regex // Find end of comment.
|
|
||||||
. '|'
|
|
||||||
. '[^>]*>?' // Find end of element. If not found, match all input.
|
|
||||||
. ')';
|
|
||||||
|
|
||||||
if ( $found_shortcodes ) {
|
|
||||||
$regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/s';
|
|
||||||
} else {
|
|
||||||
$regex = '/(' . $html_regex . ')/s';
|
|
||||||
}
|
|
||||||
|
|
||||||
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||||
|
|
||||||
@ -264,7 +229,7 @@ function wptexturize( $text, $reset = false ) {
|
|||||||
$first = $curl[0];
|
$first = $curl[0];
|
||||||
if ( '<' === $first ) {
|
if ( '<' === $first ) {
|
||||||
if ( '<!--' === substr( $curl, 0, 4 ) ) {
|
if ( '<!--' === substr( $curl, 0, 4 ) ) {
|
||||||
// This is an HTML comment delimeter.
|
// This is an HTML comment delimiter.
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// This is an HTML element delimiter.
|
// This is an HTML element delimiter.
|
||||||
@ -615,6 +580,17 @@ function wpautop( $pee, $br = true ) {
|
|||||||
* @return array The formatted text.
|
* @return array The formatted text.
|
||||||
*/
|
*/
|
||||||
function wp_html_split( $input ) {
|
function wp_html_split( $input ) {
|
||||||
|
return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the regular expression for an HTML element.
|
||||||
|
*
|
||||||
|
* @since 4.4.0
|
||||||
|
*
|
||||||
|
* @return string The regular expression
|
||||||
|
*/
|
||||||
|
function get_html_split_regex() {
|
||||||
static $regex;
|
static $regex;
|
||||||
|
|
||||||
if ( ! isset( $regex ) ) {
|
if ( ! isset( $regex ) ) {
|
||||||
@ -635,22 +611,100 @@ function wp_html_split( $input ) {
|
|||||||
. ')*+' // Loop possessively.
|
. ')*+' // Loop possessively.
|
||||||
. '(?:]]>)?'; // End of comment. If not found, match all input.
|
. '(?:]]>)?'; // End of comment. If not found, match all input.
|
||||||
|
|
||||||
|
$escaped =
|
||||||
|
'(?=' // Is the element escaped?
|
||||||
|
. '!--'
|
||||||
|
. '|'
|
||||||
|
. '!\[CDATA\['
|
||||||
|
. ')'
|
||||||
|
. '(?(?=!-)' // If yes, which type?
|
||||||
|
. $comments
|
||||||
|
. '|'
|
||||||
|
. $cdata
|
||||||
|
. ')';
|
||||||
|
|
||||||
$regex =
|
$regex =
|
||||||
'/(' // Capture the entire match.
|
'/(' // Capture the entire match.
|
||||||
. '<' // Find start of element.
|
. '<' // Find start of element.
|
||||||
. '(?(?=!--)' // Is this a comment?
|
. '(?' // Conditional expression follows.
|
||||||
. $comments // Find end of comment.
|
. $escaped // Find end of escaped element.
|
||||||
. '|'
|
. '|' // ... else ...
|
||||||
. '(?(?=!\[CDATA\[)' // Is this a comment?
|
. '[^>]*>?' // Find end of normal element.
|
||||||
. $cdata // Find end of comment.
|
|
||||||
. '|'
|
|
||||||
. '[^>]*>?' // Find end of element. If not found, match all input.
|
|
||||||
. ')'
|
|
||||||
. ')'
|
. ')'
|
||||||
. ')/s';
|
. ')/';
|
||||||
}
|
}
|
||||||
|
|
||||||
return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE );
|
return $regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the combined regular expression for HTML and shortcodes.
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @ignore
|
||||||
|
* @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
|
||||||
|
* @since 4.4.0
|
||||||
|
*
|
||||||
|
* @param string $shortcode_regex The result from _get_wptexturize_shortcode_regex(). Optional.
|
||||||
|
* @return string The regular expression
|
||||||
|
*/
|
||||||
|
function _get_wptexturize_split_regex( $shortcode_regex = '' ) {
|
||||||
|
static $html_regex;
|
||||||
|
|
||||||
|
if ( ! isset( $html_regex ) ) {
|
||||||
|
$comment_regex =
|
||||||
|
'!' // Start of comment, after the <.
|
||||||
|
. '(?:' // Unroll the loop: Consume everything until --> is found.
|
||||||
|
. '-(?!->)' // Dash not followed by end of comment.
|
||||||
|
. '[^\-]*+' // Consume non-dashes.
|
||||||
|
. ')*+' // Loop possessively.
|
||||||
|
. '(?:-->)?'; // End of comment. If not found, match all input.
|
||||||
|
|
||||||
|
$html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap.
|
||||||
|
'<' // Find start of element.
|
||||||
|
. '(?(?=!--)' // Is this a comment?
|
||||||
|
. $comment_regex // Find end of comment.
|
||||||
|
. '|'
|
||||||
|
. '[^>]*>?' // Find end of element. If not found, match all input.
|
||||||
|
. ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( empty( $shortcode_regex ) ) {
|
||||||
|
$regex = '/(' . $html_regex . ')/';
|
||||||
|
} else {
|
||||||
|
$regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $regex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the regular expression for shortcodes.
|
||||||
|
*
|
||||||
|
* @access private
|
||||||
|
* @ignore
|
||||||
|
* @internal This function will be removed in 4.5.0 per Shortcode API Roadmap.
|
||||||
|
* @since 4.4.0
|
||||||
|
*
|
||||||
|
* @param array $tagnames List of shortcodes to find.
|
||||||
|
* @return string The regular expression
|
||||||
|
*/
|
||||||
|
function _get_wptexturize_shortcode_regex( $tagnames ) {
|
||||||
|
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
||||||
|
$tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex().
|
||||||
|
$regex =
|
||||||
|
'\[' // Find start of shortcode.
|
||||||
|
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
||||||
|
. $tagregexp // Only match registered shortcodes, because performance.
|
||||||
|
. '(?:'
|
||||||
|
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical.
|
||||||
|
. '|'
|
||||||
|
. '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >.
|
||||||
|
. ')*+' // Possessive critical.
|
||||||
|
. '\]' // Find end of shortcode.
|
||||||
|
. '\]?'; // Shortcodes may end with ]]
|
||||||
|
|
||||||
|
return $regex;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -768,7 +822,7 @@ function shortcode_unautop( $pee ) {
|
|||||||
. ')'
|
. ')'
|
||||||
. '(?:' . $spaces . ')*+' // optional trailing whitespace
|
. '(?:' . $spaces . ')*+' // optional trailing whitespace
|
||||||
. '<\\/p>' // closing paragraph
|
. '<\\/p>' // closing paragraph
|
||||||
. '/s';
|
. '/';
|
||||||
|
|
||||||
return preg_replace( $pattern, '$1', $pee );
|
return preg_replace( $pattern, '$1', $pee );
|
||||||
}
|
}
|
||||||
|
@ -168,7 +168,7 @@ function has_shortcode( $content, $tag ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( shortcode_exists( $tag ) ) {
|
if ( shortcode_exists( $tag ) ) {
|
||||||
preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER );
|
preg_match_all( '/' . get_shortcode_regex() . '/', $content, $matches, PREG_SET_ORDER );
|
||||||
if ( empty( $matches ) )
|
if ( empty( $matches ) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -219,7 +219,7 @@ function do_shortcode( $content, $ignore_html = false ) {
|
|||||||
$content = do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames );
|
$content = do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames );
|
||||||
|
|
||||||
$pattern = get_shortcode_regex( $tagnames );
|
$pattern = get_shortcode_regex( $tagnames );
|
||||||
$content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
|
$content = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $content );
|
||||||
|
|
||||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||||
$content = unescape_invalid_shortcodes( $content );
|
$content = unescape_invalid_shortcodes( $content );
|
||||||
@ -378,7 +378,7 @@ function do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ) {
|
|||||||
if ( false === $attributes ) {
|
if ( false === $attributes ) {
|
||||||
// Some plugins are doing things like [name] <[email]>.
|
// Some plugins are doing things like [name] <[email]>.
|
||||||
if ( 1 === preg_match( '%^<\s*\[\[?[^\[\]]+\]%', $element ) ) {
|
if ( 1 === preg_match( '%^<\s*\[\[?[^\[\]]+\]%', $element ) ) {
|
||||||
$element = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $element );
|
$element = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $element );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Looks like we found some crazy unfiltered HTML. Skipping it for sanity.
|
// Looks like we found some crazy unfiltered HTML. Skipping it for sanity.
|
||||||
@ -407,12 +407,12 @@ function do_shortcodes_in_html_tags( $content, $ignore_html, $tagnames ) {
|
|||||||
// In this specific situation we assume KSES did not run because the input
|
// In this specific situation we assume KSES did not run because the input
|
||||||
// was written by an administrator, so we should avoid changing the output
|
// was written by an administrator, so we should avoid changing the output
|
||||||
// and we do not need to run KSES here.
|
// and we do not need to run KSES here.
|
||||||
$attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
|
$attr = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $attr );
|
||||||
} else {
|
} else {
|
||||||
// $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
|
// $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
|
||||||
// We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
|
// We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
|
||||||
$count = 0;
|
$count = 0;
|
||||||
$new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
|
$new_attr = preg_replace_callback( "/$pattern/", 'do_shortcode_tag', $attr, -1, $count );
|
||||||
if ( $count > 0 ) {
|
if ( $count > 0 ) {
|
||||||
// Sanitize the shortcode output using KSES.
|
// Sanitize the shortcode output using KSES.
|
||||||
$new_attr = wp_kses_one_attr( $new_attr, $elname );
|
$new_attr = wp_kses_one_attr( $new_attr, $elname );
|
||||||
@ -572,7 +572,7 @@ function strip_shortcodes( $content ) {
|
|||||||
$content = do_shortcodes_in_html_tags( $content, true, $tagnames );
|
$content = do_shortcodes_in_html_tags( $content, true, $tagnames );
|
||||||
|
|
||||||
$pattern = get_shortcode_regex( $tagnames );
|
$pattern = get_shortcode_regex( $tagnames );
|
||||||
$content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
|
$content = preg_replace_callback( "/$pattern/", 'strip_shortcode_tag', $content );
|
||||||
|
|
||||||
// Always restore square braces so we don't break things like <!--[if IE ]>
|
// Always restore square braces so we don't break things like <!--[if IE ]>
|
||||||
$content = unescape_invalid_shortcodes( $content );
|
$content = unescape_invalid_shortcodes( $content );
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*
|
*
|
||||||
* @global string $wp_version
|
* @global string $wp_version
|
||||||
*/
|
*/
|
||||||
$wp_version = '4.4-alpha-34760';
|
$wp_version = '4.4-alpha-34761';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||||
|
Loading…
Reference in New Issue
Block a user