From 7fcf396c9e45b792bd122547431a55f7d277cb99 Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Wed, 13 Sep 2023 12:55:27 +0000 Subject: [PATCH] HTML API: Skip over contents of RAWTEXT elements such as STYLE. When encountering elements that imply switching into the RAWTEXT parsing state, the Tag Processor should skip processing until exiting the RAWTEXT state. In this patch the Tag Processor does just that, except for the case of the deprecated XMP element which implies further and more complicated rules. There's an implicit assumption that the SCRIPT ENABLED flag in HTML parsing is enabled so that the contents of NOSCRIPT can be skipped. Otherwise, it would be required to parse the contents of that tag. Props dmsnell. Merges [56563] to the 6.3 branch. Fixes #59292. Built from https://develop.svn.wordpress.org/branches/6.3@56564 git-svn-id: http://core.svn.wordpress.org/branches/6.3@56076 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- .../html-api/class-wp-html-tag-processor.php | 52 +++++++++++++++++-- wp-includes/version.php | 2 +- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index b9214f2d4c..4ff4b8d124 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -242,6 +242,8 @@ * unquoted values will appear in the output with double-quotes. * * @since 6.2.0 + * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive. + * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE. */ class WP_HTML_Tag_Processor { /** @@ -568,7 +570,14 @@ class WP_HTML_Tag_Processor { * of the tag name as a pre-check avoids a string allocation when it's not needed. */ $t = $this->html[ $this->tag_name_starts_at ]; - if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) { + if ( + ! $this->is_closing_tag && + ( + 'i' === $t || 'I' === $t || + 'n' === $t || 'N' === $t || + 's' === $t || 'S' === $t || + 't' === $t || 'T' === $t + ) ) { $tag_name = $this->get_tag(); if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { @@ -580,6 +589,25 @@ class WP_HTML_Tag_Processor { ) { $this->bytes_already_parsed = strlen( $this->html ); return false; + } elseif ( + ( + 'IFRAME' === $tag_name || + 'NOEMBED' === $tag_name || + 'NOFRAMES' === $tag_name || + 'NOSCRIPT' === $tag_name || + 'STYLE' === $tag_name + ) && + ! $this->skip_rawtext( $tag_name ) + ) { + /* + * "XMP" should be here too but its rules are more complicated and require the + * complexity of the HTML Processor (it needs to close out any open P element, + * meaning it can't be skipped here or else the HTML Processor will lose its + * place). For now, it can be ignored as it's a rare HTML tag in practice and + * any normative HTML should be using PRE instead. + */ + $this->bytes_already_parsed = strlen( $this->html ); + return false; } } } while ( $already_found < $this->sought_match_offset ); @@ -710,15 +738,33 @@ class WP_HTML_Tag_Processor { return true; } + /** + * Skips contents of generic rawtext elements. + * + * @since 6.3.2 + * + * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm + * + * @param string $tag_name The uppercase tag name which will close the RAWTEXT region. + * @return bool Whether an end to the RAWTEXT region was found before the end of the document. + */ + private function skip_rawtext( $tag_name ) { + /* + * These two functions distinguish themselves on whether character references are + * decoded, and since functionality to read the inner markup isn't supported, it's + * not necessary to implement these two functions separately. + */ + return $this->skip_rcdata( $tag_name ); + } /** - * Skips contents of title and textarea tags. + * Skips contents of RCDATA elements, namely title and textarea tags. * * @since 6.2.0 * * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state * - * @param string $tag_name The lowercase tag name which will close the RCDATA region. + * @param string $tag_name The uppercase tag name which will close the RCDATA region. * @return bool Whether an end to the RCDATA region was found before the end of the document. */ private function skip_rcdata( $tag_name ) { diff --git a/wp-includes/version.php b/wp-includes/version.php index 3a1d0091a5..9eb2bbc1e4 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.3.2-alpha-56539'; +$wp_version = '6.3.2-alpha-56564'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.