HTML API: Add support for list elements.

Adds support for the following HTML elements to the HTML Processor:

 - LI, OL, UL.
 - DD, DL, DT.

Previously, these elements were not supported and the HTML Processor would bail when encountering them.
With this patch it will proceed to parse an HTML document when encountering those tags as long as other normal conditions don't cause it to bail (such as complicated format reconstruction).

Props audrasjb, jonsurrell, bernhard-reiter.
Fixes #60215.


Built from https://develop.svn.wordpress.org/trunk@57264


git-svn-id: http://core.svn.wordpress.org/trunk@56770 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
dmsnell 2024-01-10 14:05:17 +00:00
parent 28331b7aae
commit 29dd837333
3 changed files with 139 additions and 10 deletions

View File

@ -129,7 +129,7 @@ class WP_HTML_Open_Elements {
}
if ( in_array( $node->node_name, $termination_list, true ) ) {
return true;
return false;
}
}
@ -166,18 +166,22 @@ class WP_HTML_Open_Elements {
* Returns whether a particular element is in list item scope.
*
* @since 6.4.0
* @since 6.5.0 Implemented: no longer throws on every invocation.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
*
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
*
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_list_item_scope( $tag_name ) {
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );
return false; // The linter requires this unreachable code until the function is implemented and can return.
return $this->has_element_in_specific_scope(
$tag_name,
array(
// There are more elements that belong here which aren't currently supported.
'OL',
'UL',
)
);
}
/**
@ -375,10 +379,22 @@ class WP_HTML_Open_Elements {
* see WP_HTML_Open_Elements::walk_down().
*
* @since 6.4.0
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
*
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
*/
public function walk_up() {
public function walk_up( $above_this_node = null ) {
$has_found_node = null === $above_this_node;
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
yield $this->stack[ $i ];
$node = $this->stack[ $i ];
if ( ! $has_found_node ) {
$has_found_node = $node === $above_this_node;
continue;
}
yield $node;
}
}

View File

@ -105,7 +105,7 @@
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
* - Links: A.
* - Lists: DL.
* - Lists: DD, DL, DT, LI, OL, LI.
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: P.
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
@ -648,10 +648,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case '+MAIN':
case '+MENU':
case '+NAV':
case '+OL':
case '+P':
case '+SEARCH':
case '+SECTION':
case '+SUMMARY':
case '+UL':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
@ -685,9 +687,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case '-MAIN':
case '-MENU':
case '-NAV':
case '-OL':
case '-SEARCH':
case '-SECTION':
case '-SUMMARY':
case '-UL':
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
// @todo Report parse error.
// Ignore the token.
@ -755,6 +759,109 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
return true;
/*
* > A start tag whose tag name is "li"
* > A start tag whose tag name is one of: "dd", "dt"
*/
case '+DD':
case '+DT':
case '+LI':
$this->state->frameset_ok = false;
$node = $this->state->stack_of_open_elements->current_node();
$is_li = 'LI' === $tag_name;
in_body_list_loop:
/*
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
*/
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
$node_name = $is_li ? 'LI' : $node->node_name;
$this->generate_implied_end_tags( $node_name );
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}
$this->state->stack_of_open_elements->pop_until( $node_name );
goto in_body_list_done;
}
if (
'ADDRESS' !== $node->node_name &&
'DIV' !== $node->node_name &&
'P' !== $node->node_name &&
$this->is_special( $node->node_name )
) {
/*
* > If node is in the special category, but is not an address, div,
* > or p element, then jump to the step labeled done below.
*/
goto in_body_list_done;
} else {
/*
* > Otherwise, set node to the previous entry in the stack of open elements
* > and return to the step labeled loop.
*/
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
$node = $item;
break;
}
goto in_body_list_loop;
}
in_body_list_done:
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
$this->insert_html_element( $this->state->current_token );
return true;
/*
* > An end tag whose tag name is "li"
* > An end tag whose tag name is one of: "dd", "dt"
*/
case '-DD':
case '-DT':
case '-LI':
if (
/*
* An end tag whose tag name is "li":
* If the stack of open elements does not have an li element in list item scope,
* then this is a parse error; ignore the token.
*/
(
'LI' === $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
) ||
/*
* An end tag whose tag name is one of: "dd", "dt":
* If the stack of open elements does not have an element in scope that is an
* HTML element with the same tag name as that of the token, then this is a
* parse error; ignore the token.
*/
(
'LI' !== $tag_name &&
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
)
) {
/*
* This is a parse error, ignore the token.
*
* @todo Indicate a parse error once it's possible.
*/
return $this->step();
}
$this->generate_implied_end_tags( $tag_name );
if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
}
$this->state->stack_of_open_elements->pop_until( $tag_name );
return true;
/*
* > An end tag whose tag name is "p"
*/
@ -1223,6 +1330,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private function generate_implied_end_tags( $except_for_this_element = null ) {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);
@ -1248,6 +1358,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private function generate_implied_end_tags_thoroughly() {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'P',
);

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.5-alpha-57263';
$wp_version = '6.5-alpha-57264';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.