From e174cbef2e3b7f1901892cb501f683c6e122231b Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Wed, 27 Nov 2024 14:35:16 +0000 Subject: [PATCH] HTML API: Allow more contexts in `create_fragment`. This changeset modifies `WP_HTML_Processor::create_fragment( $html, $context )` to use a full processor and `create_fragment_at_node` instead of the other way around. This makes more sense and makes the main factory methods more clear, where the state required for fragments is set up in `create_fragment_at_node` instead of in both `create_fragment` and `create_fragment_at_current_node`. This allows for more HTML contexts to be provided to the basic `create_fragment` where the provided context HTML is appended to ``, a full processor is created, the last tag opener is found, and a fragment parser is created at that node via `create_fragment_at_current_node`. The HTML5lib tests are updated accordingly to use this new method to create fragments. Props jonsurrell, dmsnell, bernhard-reiter. Fixes #62584. Built from https://develop.svn.wordpress.org/trunk@59467 git-svn-id: http://core.svn.wordpress.org/trunk@58853 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- .../html-api/class-wp-html-processor.php | 126 ++++++++++++------ wp-includes/version.php | 2 +- 2 files changed, 88 insertions(+), 40 deletions(-) diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 48f7d7fe8c..1be795c5c7 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -279,51 +279,62 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * form is provided because a context element may have attributes that * impact the parse, such as with a SCRIPT tag and its `type` attribute. * - * ## Current HTML Support + * Example: * - * - The only supported context is ``, which is the default value. - * - The only supported document encoding is `UTF-8`, which is the default value. + * // Usually, snippets of HTML ought to be processed in the default `` context: + * $processor = WP_HTML_Processor::create_fragment( '

Hi

' ); + * + * // Some fragments should be processed in the correct context like this SVG: + * $processor = WP_HTML_Processor::create_fragment( '', '' ); + * + * // This fragment with TD tags should be processed in a TR context: + * $processor = WP_HTML_Processor::create_fragment( + * '123', + * '' + * ); + * + * In order to create a fragment processor at the correct location, the + * provided fragment will be processed as part of a full HTML document. + * The processor will search for the last opener tag in the document and + * create a fragment processor at that location. The document will be + * forced into "no-quirks" mode by including the HTML5 doctype. + * + * For advanced usage and precise control over the context element, use + * `WP_HTML_Processor::create_full_processor()` and + * `WP_HTML_Processor::create_fragment_at_current_node()`. + * + * UTF-8 is the only allowed encoding. If working with a document that + * isn't UTF-8, first convert the document to UTF-8, then pass in the + * converted HTML. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. + * @since 6.8.0 Can create fragments with any context element. * * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. + * @param string $context Context element for the fragment. Defaults to ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { - if ( '' !== $context || 'UTF-8' !== $encoding ) { + $context_processor = static::create_full_parser( "{$context}", $encoding ); + if ( null === $context_processor ) { return null; } - $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); - $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - $processor->state->encoding = $encoding; - $processor->state->encoding_confidence = 'certain'; + while ( $context_processor->next_tag() ) { + $context_processor->set_bookmark( 'final_node' ); + } - // @todo Create "fake" bookmarks for non-existent but implied nodes. - $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); - $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); + if ( + ! $context_processor->has_bookmark( 'final_node' ) || + ! $context_processor->seek( 'final_node' ) + ) { + _doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' ); + return null; + } - $root_node = new WP_HTML_Token( - 'root-node', - 'HTML', - false - ); - - $processor->state->stack_of_open_elements->push( $root_node ); - - $context_node = new WP_HTML_Token( - 'context-node', - 'BODY', - false - ); - - $processor->context_node = $context_node; - $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); - - return $processor; + return $context_processor->create_fragment_at_current_node( $html ); } /** @@ -333,9 +344,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * entire HTML document from start to finish. Consider a fragment parser with * a context node of ``. * - * Since UTF-8 is the only currently-accepted charset, if working with a - * document that isn't UTF-8, it's important to convert the document before - * creating the processor: pass in the converted HTML. + * UTF-8 is the only allowed encoding. If working with a document that + * isn't UTF-8, first convert the document to UTF-8, then pass in the + * converted HTML. * * @param string $html Input HTML document to process. * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used @@ -459,35 +470,72 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm * + * @since 6.8.0 + * * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ public function create_fragment_at_current_node( string $html ) { if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { + _doing_it_wrong( + __METHOD__, + __( 'The context element must be a start tag.' ), + '6.8.0' + ); return null; } + $tag_name = $this->current_element->token->node_name; $namespace = $this->current_element->token->namespace; + if ( 'html' === $namespace && self::is_void( $tag_name ) ) { + _doing_it_wrong( + __METHOD__, + sprintf( + // translators: %s: A tag name like INPUT or BR. + __( 'The context element cannot be a void element, found "%s".' ), + $tag_name + ), + '6.8.0' + ); + return null; + } + /* * Prevent creating fragments at nodes that require a special tokenizer state. * This is unsupported by the HTML Processor. */ if ( 'html' === $namespace && - in_array( $this->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) + in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) ) { + _doing_it_wrong( + __METHOD__, + sprintf( + // translators: %s: A tag name like IFRAME or TEXTAREA. + __( 'The context element "%s" is not supported.' ), + $tag_name + ), + '6.8.0' + ); return null; } - $fragment_processor = static::create_fragment( $html ); - if ( null === $fragment_processor ) { - return null; - } + $fragment_processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $fragment_processor->compat_mode = $this->compat_mode; - $fragment_processor->context_node = clone $this->state->current_token; + // @todo Create "fake" bookmarks for non-existent but implied nodes. + $fragment_processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); + $root_node = new WP_HTML_Token( + 'root-node', + 'HTML', + false + ); + $fragment_processor->state->stack_of_open_elements->push( $root_node ); + + $fragment_processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); + $fragment_processor->context_node = clone $this->current_element->token; $fragment_processor->context_node->bookmark_name = 'context-node'; $fragment_processor->context_node->on_destroy = null; diff --git a/wp-includes/version.php b/wp-includes/version.php index d06641e588..50fa57f4fe 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.8-alpha-59466'; +$wp_version = '6.8-alpha-59467'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.