Formatting: Account for HTML entities in wp_extract_urls().

Prevent `wp_extract_urls()` trimming HTML entities within URLs. Correctly escaped URLs such as https://youtube.com/watch?v=dQw4w9WgXcQ&t=1 will now be extracted as https://youtube.com/watch?v=dQw4w9WgXcQ&t=1 rather than truncated.

Props trex005, voldemortensen, johnbillion, ironprogrammer, costdev, hellofromtonya.
Fixes #30580


Built from https://develop.svn.wordpress.org/trunk@53044


git-svn-id: http://core.svn.wordpress.org/trunk@52633 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Peter Wilson 2022-04-01 03:40:02 +00:00
parent be3aa9a149
commit 33b6697191
2 changed files with 14 additions and 3 deletions

View File

@ -820,6 +820,7 @@ function xmlrpc_removepostdata( $content ) {
* Use RegEx to extract URLs from arbitrary content.
*
* @since 3.7.0
* @since 6.0.0 Fixes support for HTML entities (Trac 30580).
*
* @param string $content Content to extract URLs from.
* @return string[] Array of URLs found in passed string.
@ -833,7 +834,7 @@ function wp_extract_urls( $content ) {
. '(?:'
. '\([\w\d]+\)|'
. '(?:'
. "[^`!()\[\]{};:'\".,<>«»“”‘’\s]|"
. "[^`!()\[\]{}:'\".,<>«»“”‘’\s]|"
. '(?:[:]\d+)?/?'
. ')+'
. ')'
@ -842,7 +843,17 @@ function wp_extract_urls( $content ) {
$post_links
);
$post_links = array_unique( array_map( 'html_entity_decode', $post_links[2] ) );
$post_links = array_unique(
array_map(
static function( $link ) {
// Decode to replace valid entities, like &amp;.
$link = html_entity_decode( $link );
// Maintain backward compatibility by removing extraneous semi-colons (`;`).
return str_replace( ';', '', $link );
},
$post_links[2]
)
);
return array_values( $post_links );
}

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.0-alpha-53043';
$wp_version = '6.0-alpha-53044';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.