HTML API: Fix token length bug in Tag Processor.

The Tag Processor stores the byte-offsets into its HTML document where
the current token starts and ends, and also for every bookmark. In some
cases for tags, the end offset has been off by one.

In this patch the offset is fixed so that a bookmark always properly
refers to the full span of the token it's bookmarking. Also the current
token byte offsets are properly recorded.

While this is a defect in the Tag Processor, it hasn't been exposed 
through the public interface and has not affected any of the working
of the processor. Only subclasses which rely on the length of a bookmark
have been potentially affected, and these are not supported environments
in the ongoing work.

This fix is important for future work and for ensuring that subclasses
performing custom behaviors remain as reliable as the public interface.

Developed in https://github.com/WordPress/wordpress-develop/pull/6625
Discussed in https://core.trac.wordpress.org/ticket/61301

Props dmsnell, gziolo, jonsurrell, westonruter.
Fixes #61301.

Built from https://develop.svn.wordpress.org/trunk@58233


git-svn-id: http://core.svn.wordpress.org/trunk@57696 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
dmsnell 2024-05-29 11:42:08 +00:00
parent 69d3004bc7
commit b5894d595e
3 changed files with 6 additions and 6 deletions

View File

@ -926,8 +926,8 @@ class WP_HTML_Tag_Processor {
return false;
}
$this->parser_state = self::STATE_MATCHED_TAG;
$this->token_length = $tag_ends_at - $this->token_starts_at;
$this->bytes_already_parsed = $tag_ends_at + 1;
$this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
/*
* For non-DATA sections which might contain text that looks like HTML tags but
@ -1013,7 +1013,7 @@ class WP_HTML_Tag_Processor {
*/
$this->token_starts_at = $was_at;
$this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
$this->text_starts_at = $tag_ends_at + 1;
$this->text_starts_at = $tag_ends_at;
$this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
$this->tag_name_starts_at = $tag_name_starts_at;
$this->tag_name_length = $tag_name_length;
@ -2687,7 +2687,7 @@ class WP_HTML_Tag_Processor {
* <figure />
* ^ this appears one character before the end of the closing ">".
*/
return '/' === $this->html[ $this->token_starts_at + $this->token_length - 1 ];
return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ];
}
/**

View File

@ -107,7 +107,7 @@ final class WP_Interactivity_API_Directives_Processor extends WP_HTML_Tag_Proces
$bookmark = 'append_content_after_template_tag_closer';
$this->set_bookmark( $bookmark );
$after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length + 1;
$after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length;
$this->release_bookmark( $bookmark );
// Appends the new content.
@ -140,7 +140,7 @@ final class WP_Interactivity_API_Directives_Processor extends WP_HTML_Tag_Proces
}
list( $opener_tag, $closer_tag ) = $bookmarks;
$after_opener_tag = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length + 1;
$after_opener_tag = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length;
$before_closer_tag = $this->bookmarks[ $closer_tag ]->start;
if ( $rewind ) {

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.6-alpha-58232';
$wp_version = '6.6-alpha-58233';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.