WordPress/wp-includes/class-wp-block-parser.php
gziolo 1044eb572a Editor: Sync changes from the Gutenberg plugin 14.1 release
Updated WordPress packages necessary for releasing WordPress 6.1 Beta 1:

- @wordpress/a11y@3.17.1
 - @wordpress/annotations@2.17.2
 - @wordpress/api-fetch@6.14.1
 - @wordpress/autop@3.17.1
 - @wordpress/babel-plugin-import-jsx-pragma@4.0.1
 - @wordpress/babel-plugin-makepot@5.1.1
 - @wordpress/babel-preset-default@7.1.1
 - @wordpress/base-styles@4.8.1
 - @wordpress/blob@3.17.1
 - @wordpress/block-directory@3.15.2
 - @wordpress/block-editor@10.0.2
 - @wordpress/block-library@7.14.2
 - @wordpress/block-serialization-default-parser@4.17.1
 - @wordpress/block-serialization-spec-parser@4.17.1
 - @wordpress/blocks@11.16.2
 - @wordpress/browserslist-config@5.0.1
 - @wordpress/components@21.0.2
 - @wordpress/compose@5.15.2
 - @wordpress/core-data@5.0.2
 - @wordpress/create-block-tutorial-template@2.5.1
 - @wordpress/create-block@4.1.1
 - @wordpress/custom-templated-path-webpack-plugin@2.1.3
 - @wordpress/customize-widgets@3.14.2
 - @wordpress/data-controls@2.17.2
 - @wordpress/data@7.1.2
 - @wordpress/date@4.17.1
 - @wordpress/dependency-extraction-webpack-plugin@4.0.2
 - @wordpress/deprecated@3.17.1
 - @wordpress/docgen@1.26.1
 - @wordpress/dom-ready@3.17.1
 - @wordpress/dom@3.17.2
 - @wordpress/e2e-test-utils@8.1.1
 - @wordpress/e2e-tests@5.1.2
 - @wordpress/edit-post@6.14.2
 - @wordpress/edit-site@4.14.2
 - @wordpress/edit-widgets@4.14.2
 - @wordpress/editor@12.16.2
 - @wordpress/element@4.15.1
 - @wordpress/env@5.2.1
 - @wordpress/escape-html@2.17.1
 - @wordpress/eslint-plugin@13.1.1
 - @wordpress/format-library@3.15.2
 - @wordpress/hooks@3.17.1
 - @wordpress/html-entities@3.17.1
 - @wordpress/i18n@4.17.1
 - @wordpress/icons@9.8.1
 - @wordpress/interface@4.16.2
 - @wordpress/is-shallow-equal@4.17.1
 - @wordpress/jest-console@6.0.1
 - @wordpress/jest-preset-default@9.0.1
 - @wordpress/jest-puppeteer-axe@5.0.1
 - @wordpress/keyboard-shortcuts@3.15.2
 - @wordpress/keycodes@3.17.1
 - @wordpress/lazy-import@1.4.3
 - @wordpress/library-export-default-webpack-plugin@2.3.3
 - @wordpress/list-reusable-blocks@3.15.2
 - @wordpress/media-utils@4.8.1
 - @wordpress/notices@3.17.2
 - @wordpress/npm-package-json-lint-config@4.2.1
 - @wordpress/nux@5.15.2
 - @wordpress/plugins@4.15.2
 - @wordpress/postcss-plugins-preset@4.1.1
 - @wordpress/postcss-themes@5.0.1
 - @wordpress/preferences-persistence@1.9.1
 - @wordpress/preferences@2.9.2
 - @wordpress/prettier-config@2.0.1
 - @wordpress/primitives@3.15.1
 - @wordpress/priority-queue@2.17.2
 - @wordpress/project-management-automation@1.16.1
 - @wordpress/react-i18n@3.15.1
 - @wordpress/readable-js-assets-webpack-plugin@2.0.1
 - @wordpress/redux-routine@4.17.1
 - @wordpress/reusable-blocks@3.15.2
 - @wordpress/rich-text@5.15.2
 - @wordpress/scripts@24.1.2
 - @wordpress/server-side-render@3.15.2
 - @wordpress/shortcode@3.17.1
 - @wordpress/style-engine@1.0.1
 - @wordpress/stylelint-config@21.0.1
 - @wordpress/token-list@2.17.1
 - @wordpress/url@3.18.1
 - @wordpress/viewport@4.15.2
 - @wordpress/warning@2.17.1
 - @wordpress/widgets@2.15.2
 - @wordpress/wordcount@3.17.1


Props bernhard-reiter, cbravobernal, czapla, oandregal, isabel_brison, andrewserong, mciampini.
See #56467.


Built from https://develop.svn.wordpress.org/trunk@54257


git-svn-id: http://core.svn.wordpress.org/trunk@53816 1a063a9b-81f0-0310-95a4-ce76da25c4cd
2022-09-20 15:43:29 +00:00

556 lines
15 KiB
PHP

<?php
/**
* Block Serialization Parser
*
* @package WordPress
*/
/**
* Class WP_Block_Parser_Block
*
* Holds the block structure in memory
*
* @since 5.0.0
*/
class WP_Block_Parser_Block {
/**
* Name of block
*
* @example "core/paragraph"
*
* @since 5.0.0
* @var string
*/
public $blockName;
/**
* Optional set of attributes from block comment delimiters
*
* @example null
* @example array( 'columns' => 3 )
*
* @since 5.0.0
* @var array|null
*/
public $attrs;
/**
* List of inner blocks (of this same class)
*
* @since 5.0.0
* @var WP_Block_Parser_Block[]
*/
public $innerBlocks;
/**
* Resultant HTML from inside block comment delimiters
* after removing inner blocks
*
* @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
*
* @since 5.0.0
* @var string
*/
public $innerHTML;
/**
* List of string fragments and null markers where inner blocks were found
*
* @example array(
* 'innerHTML' => 'BeforeInnerAfter',
* 'innerBlocks' => array( block, block ),
* 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
* )
*
* @since 4.2.0
* @var array
*/
public $innerContent;
/**
* Constructor.
*
* Will populate object properties from the provided arguments.
*
* @since 5.0.0
*
* @param string $name Name of block.
* @param array $attrs Optional set of attributes from block comment delimiters.
* @param array $innerBlocks List of inner blocks (of this same class).
* @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks.
* @param array $innerContent List of string fragments and null markers where inner blocks were found.
*/
function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
$this->blockName = $name;
$this->attrs = $attrs;
$this->innerBlocks = $innerBlocks;
$this->innerHTML = $innerHTML;
$this->innerContent = $innerContent;
}
}
/**
* Class WP_Block_Parser_Frame
*
* Holds partial blocks in memory while parsing
*
* @internal
* @since 5.0.0
*/
class WP_Block_Parser_Frame {
/**
* Full or partial block
*
* @since 5.0.0
* @var WP_Block_Parser_Block
*/
public $block;
/**
* Byte offset into document for start of parse token
*
* @since 5.0.0
* @var int
*/
public $token_start;
/**
* Byte length of entire parse token string
*
* @since 5.0.0
* @var int
*/
public $token_length;
/**
* Byte offset into document for after parse token ends
* (used during reconstruction of stack into parse production)
*
* @since 5.0.0
* @var int
*/
public $prev_offset;
/**
* Byte offset into document where leading HTML before token starts
*
* @since 5.0.0
* @var int
*/
public $leading_html_start;
/**
* Constructor
*
* Will populate object properties from the provided arguments.
*
* @since 5.0.0
*
* @param WP_Block_Parser_Block $block Full or partial block.
* @param int $token_start Byte offset into document for start of parse token.
* @param int $token_length Byte length of entire parse token string.
* @param int $prev_offset Byte offset into document for after parse token ends.
* @param int $leading_html_start Byte offset into document where leading HTML before token starts.
*/
function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
$this->block = $block;
$this->token_start = $token_start;
$this->token_length = $token_length;
$this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
$this->leading_html_start = $leading_html_start;
}
}
/**
* Class WP_Block_Parser
*
* Parses a document and constructs a list of parsed block objects
*
* @since 5.0.0
* @since 4.0.0 returns arrays not objects, all attributes are arrays
*/
class WP_Block_Parser {
/**
* Input document being parsed
*
* @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
*
* @since 5.0.0
* @var string
*/
public $document;
/**
* Tracks parsing progress through document
*
* @since 5.0.0
* @var int
*/
public $offset;
/**
* List of parsed blocks
*
* @since 5.0.0
* @var WP_Block_Parser_Block[]
*/
public $output;
/**
* Stack of partially-parsed structures in memory during parse
*
* @since 5.0.0
* @var WP_Block_Parser_Frame[]
*/
public $stack;
/**
* Empty associative array, here due to PHP quirks
*
* @since 4.4.0
* @var array empty associative array
*/
public $empty_attrs;
/**
* Parses a document and returns a list of block structures
*
* When encountering an invalid parse will return a best-effort
* parse. In contrast to the specification parser this does not
* return an error on invalid inputs.
*
* @since 5.0.0
*
* @param string $document Input document being parsed.
* @return array[]
*/
function parse( $document ) {
$this->document = $document;
$this->offset = 0;
$this->output = array();
$this->stack = array();
$this->empty_attrs = json_decode( '{}', true );
do {
// twiddle our thumbs.
} while ( $this->proceed() );
return $this->output;
}
/**
* Processes the next token from the input document
* and returns whether to proceed eating more tokens
*
* This is the "next step" function that essentially
* takes a token as its input and decides what to do
* with that token before descending deeper into a
* nested block tree or continuing along the document
* or breaking out of a level of nesting.
*
* @internal
* @since 5.0.0
* @return bool
*/
function proceed() {
$next_token = $this->next_token();
list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
$stack_depth = count( $this->stack );
// we may have some HTML soup before the next block.
$leading_html_start = $start_offset > $this->offset ? $this->offset : null;
switch ( $token_type ) {
case 'no-more-tokens':
// if not in a block then flush output.
if ( 0 === $stack_depth ) {
$this->add_freeform();
return false;
}
/*
* Otherwise we have a problem
* This is an error
*
* we have options
* - treat it all as freeform text
* - assume an implicit closer (easiest when not nesting)
*/
// for the easy case we'll assume an implicit closer.
if ( 1 === $stack_depth ) {
$this->add_block_from_stack();
return false;
}
/*
* for the nested case where it's more difficult we'll
* have to assume that multiple closers are missing
* and so we'll collapse the whole stack piecewise
*/
while ( 0 < count( $this->stack ) ) {
$this->add_block_from_stack();
}
return false;
case 'void-block':
/*
* easy case is if we stumbled upon a void block
* in the top-level of the document
*/
if ( 0 === $stack_depth ) {
if ( isset( $leading_html_start ) ) {
$this->output[] = (array) $this->freeform(
substr(
$this->document,
$leading_html_start,
$start_offset - $leading_html_start
)
);
}
$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
$this->offset = $start_offset + $token_length;
return true;
}
// otherwise we found an inner block.
$this->add_inner_block(
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
$start_offset,
$token_length
);
$this->offset = $start_offset + $token_length;
return true;
case 'block-opener':
// track all newly-opened blocks on the stack.
array_push(
$this->stack,
new WP_Block_Parser_Frame(
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
$start_offset,
$token_length,
$start_offset + $token_length,
$leading_html_start
)
);
$this->offset = $start_offset + $token_length;
return true;
case 'block-closer':
/*
* if we're missing an opener we're in trouble
* This is an error
*/
if ( 0 === $stack_depth ) {
/*
* we have options
* - assume an implicit opener
* - assume _this_ is the opener
* - give up and close out the document
*/
$this->add_freeform();
return false;
}
// if we're not nesting then this is easy - close the block.
if ( 1 === $stack_depth ) {
$this->add_block_from_stack( $start_offset );
$this->offset = $start_offset + $token_length;
return true;
}
/*
* otherwise we're nested and we have to close out the current
* block and add it as a new innerBlock to the parent
*/
$stack_top = array_pop( $this->stack );
$html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
$stack_top->block->innerHTML .= $html;
$stack_top->block->innerContent[] = $html;
$stack_top->prev_offset = $start_offset + $token_length;
$this->add_inner_block(
$stack_top->block,
$stack_top->token_start,
$stack_top->token_length,
$start_offset + $token_length
);
$this->offset = $start_offset + $token_length;
return true;
default:
// This is an error.
$this->add_freeform();
return false;
}
}
/**
* Scans the document from where we last left off
* and finds the next valid token to parse if it exists
*
* Returns the type of the find: kind of find, block information, attributes
*
* @internal
* @since 5.0.0
* @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
* @return array
*/
function next_token() {
$matches = null;
/*
* aye the magic
* we're using a single RegExp to tokenize the block comment delimiters
* we're also using a trick here because the only difference between a
* block opener and a block closer is the leading `/` before `wp:` (and
* a closer has no attributes). we can trap them both and process the
* match back in PHP to see which one it was.
*/
$has_match = preg_match(
'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
$this->document,
$matches,
PREG_OFFSET_CAPTURE,
$this->offset
);
// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
if ( false === $has_match ) {
return array( 'no-more-tokens', null, null, null, null );
}
// we have no more tokens.
if ( 0 === $has_match ) {
return array( 'no-more-tokens', null, null, null, null );
}
list( $match, $started_at ) = $matches[0];
$length = strlen( $match );
$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
$is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1];
$namespace = $matches['namespace'];
$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
$name = $namespace . $matches['name'][0];
$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
/*
* Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
* are associative arrays. If we use `array()` we get a JSON `[]`
*/
$attrs = $has_attrs
? json_decode( $matches['attrs'][0], /* as-associative */ true )
: $this->empty_attrs;
/*
* This state isn't allowed
* This is an error
*/
if ( $is_closer && ( $is_void || $has_attrs ) ) {
// we can ignore them since they don't hurt anything.
}
if ( $is_void ) {
return array( 'void-block', $name, $attrs, $started_at, $length );
}
if ( $is_closer ) {
return array( 'block-closer', $name, null, $started_at, $length );
}
return array( 'block-opener', $name, $attrs, $started_at, $length );
}
/**
* Returns a new block object for freeform HTML
*
* @internal
* @since 3.9.0
*
* @param string $innerHTML HTML content of block.
* @return WP_Block_Parser_Block freeform block object.
*/
function freeform( $innerHTML ) {
return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
}
/**
* Pushes a length of text from the input document
* to the output list as a freeform block.
*
* @internal
* @since 5.0.0
* @param null $length how many bytes of document text to output.
*/
function add_freeform( $length = null ) {
$length = $length ? $length : strlen( $this->document ) - $this->offset;
if ( 0 === $length ) {
return;
}
$this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
}
/**
* Given a block structure from memory pushes
* a new block to the output list.
*
* @internal
* @since 5.0.0
* @param WP_Block_Parser_Block $block The block to add to the output.
* @param int $token_start Byte offset into the document where the first token for the block starts.
* @param int $token_length Byte length of entire block from start of opening token to end of closing token.
* @param int|null $last_offset Last byte offset into document if continuing form earlier output.
*/
function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
$parent = $this->stack[ count( $this->stack ) - 1 ];
$parent->block->innerBlocks[] = (array) $block;
$html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
if ( ! empty( $html ) ) {
$parent->block->innerHTML .= $html;
$parent->block->innerContent[] = $html;
}
$parent->block->innerContent[] = null;
$parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length;
}
/**
* Pushes the top block from the parsing stack to the output list.
*
* @internal
* @since 5.0.0
* @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
*/
function add_block_from_stack( $end_offset = null ) {
$stack_top = array_pop( $this->stack );
$prev_offset = $stack_top->prev_offset;
$html = isset( $end_offset )
? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
: substr( $this->document, $prev_offset );
if ( ! empty( $html ) ) {
$stack_top->block->innerHTML .= $html;
$stack_top->block->innerContent[] = $html;
}
if ( isset( $stack_top->leading_html_start ) ) {
$this->output[] = (array) $this->freeform(
substr(
$this->document,
$stack_top->leading_html_start,
$stack_top->token_start - $stack_top->leading_html_start
)
);
}
$this->output[] = (array) $stack_top->block;
}
}