Introduce HTML API with HTML Tag Processor

This commit pulls in the HTML Tag Processor from the Gutenbeg repository.

The Tag Processor attempts to be an HTML5-spec-compliant parser that provides the ability in PHP to find specific HTML tags and then add, remove, or update attributes on that tag. It provides a safe and reliable way to modify the attribute on HTML tags.

More information: https://github.com/WordPress/wordpress-develop/pull/3920.

Props: antonvlasenko, bernhard-reiter, costdev, dmsnell, felixarntz, gziolo, hellofromtonya, zieladam, flixos90, ntsekouras, peterwilsoncc, swissspidy, andrewserong, onemaggie, get_dave, aristath, scruffian, justlevine, andraganescu, noisysocks, dlh, soean, cbirdsong, revgeorge, azaozz.
Fixes #57575.
Built from https://develop.svn.wordpress.org/trunk@55203


git-svn-id: http://core.svn.wordpress.org/trunk@54736 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Andrew Ozz 2023-02-03 01:05:17 +00:00
parent 94ede5b18c
commit be73904dc7
6 changed files with 2469 additions and 1 deletions

View File

@ -0,0 +1,89 @@
<?php
/**
* HTML Tag Processor: Attribute token structure class.
*
* @package WordPress
* @subpackage HTML-API
* @since 6.2.0
*/
/**
* Data structure for the attribute token that allows to drastically improve performance.
*
* This class is for internal usage of the WP_HTML_Tag_Processor class.
*
* @access private
* @since 6.2.0
*
* @see WP_HTML_Tag_Processor
*/
class WP_HTML_Attribute_Token {
/**
* Attribute name.
*
* @since 6.2.0
* @var string
*/
public $name;
/**
* Attribute value.
*
* @since 6.2.0
* @var int
*/
public $value_starts_at;
/**
* How many bytes the value occupies in the input HTML.
*
* @since 6.2.0
* @var int
*/
public $value_length;
/**
* The string offset where the attribute name starts.
*
* @since 6.2.0
* @var int
*/
public $start;
/**
* The string offset after the attribute value or its name.
*
* @since 6.2.0
* @var int
*/
public $end;
/**
* Whether the attribute is a boolean attribute with value `true`.
*
* @since 6.2.0
* @var bool
*/
public $is_true;
/**
* Constructor.
*
* @since 6.2.0
*
* @param string $name Attribute name.
* @param int $value_start Attribute value.
* @param int $value_length Number of bytes attribute value spans.
* @param int $start The string offset where the attribute name starts.
* @param int $end The string offset after the attribute value or its name.
* @param bool $is_true Whether the attribute is a boolean attribute with true value.
*/
public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) {
$this->name = $name;
$this->value_starts_at = $value_start;
$this->value_length = $value_length;
$this->start = $start;
$this->end = $end;
$this->is_true = $is_true;
}
}

View File

@ -0,0 +1,52 @@
<?php
/**
* HTML Span: Represents a textual span inside an HTML document.
*
* @package WordPress
* @subpackage HTML-API
* @since 6.2.0
*/
/**
* Represents a textual span inside an HTML document.
*
* This is a two-tuple in disguise, used to avoid the memory
* overhead involved in using an array for the same purpose.
*
* This class is for internal usage of the WP_HTML_Tag_Processor class.
*
* @access private
* @since 6.2.0
*
* @see WP_HTML_Tag_Processor
*/
class WP_HTML_Span {
/**
* Byte offset into document where span begins.
*
* @since 6.2.0
* @var int
*/
public $start;
/**
* Byte offset into document where span ends.
*
* @since 6.2.0
* @var int
*/
public $end;
/**
* Constructor.
*
* @since 6.2.0
*
* @param int $start Byte offset into document where replacement span begins.
* @param int $end Byte offset into document where replacement span ends.
*/
public function __construct( $start, $end ) {
$this->start = $start;
$this->end = $end;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
<?php
/**
* HTML Tag Processor: Text replacement class.
*
* @package WordPress
* @subpackage HTML-API
* @since 6.2.0
*/
/**
* Data structure used to replace existing content from start to end that allows to drastically improve performance.
*
* This class is for internal usage of the WP_HTML_Tag_Processor class.
*
* @access private
* @since 6.2.0
*
* @see WP_HTML_Tag_Processor
*/
class WP_HTML_Text_Replacement {
/**
* Byte offset into document where replacement span begins.
*
* @since 6.2.0
* @var int
*/
public $start;
/**
* Byte offset into document where replacement span ends.
*
* @since 6.2.0
* @var int
*/
public $end;
/**
* Span of text to insert in document to replace existing content from start to end.
*
* @since 6.2.0
* @var string
*/
public $text;
/**
* Constructor.
*
* @since 6.2.0
*
* @param int $start Byte offset into document where replacement span begins.
* @param int $end Byte offset into document where replacement span ends.
* @param string $text Span of text to insert in document to replace existing content from start to end.
*/
public function __construct( $start, $end, $text ) {
$this->start = $start;
$this->end = $end;
$this->text = $text;
}
}

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.2-alpha-55202';
$wp_version = '6.2-alpha-55203';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

View File

@ -234,6 +234,10 @@ require ABSPATH . WPINC . '/class-wp-oembed.php';
require ABSPATH . WPINC . '/class-wp-oembed-controller.php';
require ABSPATH . WPINC . '/media.php';
require ABSPATH . WPINC . '/http.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-attribute-token.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-span.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-text-replacement.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-tag-processor.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';