Formatting: Account for HTML entities in wp_extract_urls().

Prevent `wp_extract_urls()` trimming HTML entities within URLs. Correctly escaped URLs such as https://youtube.com/watch?v=dQw4w9WgXcQ&t=1 will now be extracted as https://youtube.com/watch?v=dQw4w9WgXcQ&t=1 rather than truncated.

Props trex005, voldemortensen, johnbillion, ironprogrammer, costdev, hellofromtonya.
Fixes #30580


Built from https://develop.svn.wordpress.org/trunk@53044


git-svn-id: http://core.svn.wordpress.org/trunk@52633 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Peter Wilson 2022-04-01 03:40:02 +00:00
parent be3aa9a149
commit 33b6697191
2 changed files with 14 additions and 3 deletions

View File

@ -820,6 +820,7 @@ function xmlrpc_removepostdata( $content ) {
* Use RegEx to extract URLs from arbitrary content. * Use RegEx to extract URLs from arbitrary content.
* *
* @since 3.7.0 * @since 3.7.0
* @since 6.0.0 Fixes support for HTML entities (Trac 30580).
* *
* @param string $content Content to extract URLs from. * @param string $content Content to extract URLs from.
* @return string[] Array of URLs found in passed string. * @return string[] Array of URLs found in passed string.
@ -833,7 +834,7 @@ function wp_extract_urls( $content ) {
. '(?:' . '(?:'
. '\([\w\d]+\)|' . '\([\w\d]+\)|'
. '(?:' . '(?:'
. "[^`!()\[\]{};:'\".,<>«»“”‘’\s]|" . "[^`!()\[\]{}:'\".,<>«»“”‘’\s]|"
. '(?:[:]\d+)?/?' . '(?:[:]\d+)?/?'
. ')+' . ')+'
. ')' . ')'
@ -842,7 +843,17 @@ function wp_extract_urls( $content ) {
$post_links $post_links
); );
$post_links = array_unique( array_map( 'html_entity_decode', $post_links[2] ) ); $post_links = array_unique(
array_map(
static function( $link ) {
// Decode to replace valid entities, like &amp;.
$link = html_entity_decode( $link );
// Maintain backward compatibility by removing extraneous semi-colons (`;`).
return str_replace( ';', '', $link );
},
$post_links[2]
)
);
return array_values( $post_links ); return array_values( $post_links );
} }

View File

@ -16,7 +16,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '6.0-alpha-53043'; $wp_version = '6.0-alpha-53044';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.