From 23b0fdc142dc1e5fb305d737cf741174b642ea50 Mon Sep 17 00:00:00 2001 From: Aaron Jorbin Date: Thu, 18 Jul 2024 18:04:11 +0000 Subject: [PATCH] General: Provide _is_utf8_charset() in compat.php for early use #61182 introduced is_utf8_charset() as a way of standardizing checks for charset slugs referring to UTF-8. This is called by _mb_strlen() inside of compat.php, but is_utf8_charset() is defined in functions.php, which isn't loaded early on. Code calling mb_strlen() early on before functions.php loads in hosts without the multibyte extension therefore may crash. Props dmsnell, jonsurrell, joemcgill, jorbin. Fixes #61681. Built from https://develop.svn.wordpress.org/trunk@58763 git-svn-id: http://core.svn.wordpress.org/trunk@58165 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/compat.php | 41 +++++++++++++++++++++++++++++++++++++-- wp-includes/functions.php | 18 ++++------------- wp-includes/version.php | 2 +- 3 files changed, 44 insertions(+), 17 deletions(-) diff --git a/wp-includes/compat.php b/wp-includes/compat.php index c50fc69a04..900a7994a1 100644 --- a/wp-includes/compat.php +++ b/wp-includes/compat.php @@ -40,6 +40,43 @@ function _wp_can_use_pcre_u( $set = null ) { return $utf8_pcre; } +/** + * Indicates if a given slug for a character set represents the UTF-8 text encoding. + * + * A charset is considered to represent UTF-8 if it is a case-insensitive match + * of "UTF-8" with or without the hyphen. + * + * Example: + * + * true === _is_utf8_charset( 'UTF-8' ); + * true === _is_utf8_charset( 'utf8' ); + * false === _is_utf8_charset( 'latin1' ); + * false === _is_utf8_charset( 'UTF 8' ); + * + * // Only strings match. + * false === _is_utf8_charset( [ 'charset' => 'utf-8' ] ); + * + * `is_utf8_charset` should be used outside of this file. + * + * @ignore + * @since 6.6.1 + * + * @param string $charset_slug Slug representing a text character encoding, or "charset". + * E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS". + * + * @return bool Whether the slug represents the UTF-8 encoding. + */ +function _is_utf8_charset( $charset_slug ) { + if ( ! is_string( $charset_slug ) ) { + return false; + } + + return ( + 0 === strcasecmp( 'UTF-8', $charset_slug ) || + 0 === strcasecmp( 'UTF8', $charset_slug ) + ); +} + if ( ! function_exists( 'mb_substr' ) ) : /** * Compat function to mimic mb_substr(). @@ -91,7 +128,7 @@ function _mb_substr( $str, $start, $length = null, $encoding = null ) { * The solution below works only for UTF-8, so in case of a different * charset just use built-in substr(). */ - if ( ! is_utf8_charset( $encoding ) ) { + if ( ! _is_utf8_charset( $encoding ) ) { return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length ); } @@ -176,7 +213,7 @@ function _mb_strlen( $str, $encoding = null ) { * The solution below works only for UTF-8, so in case of a different charset * just use built-in strlen(). */ - if ( ! is_utf8_charset( $encoding ) ) { + if ( ! _is_utf8_charset( $encoding ) ) { return strlen( $str ); } diff --git a/wp-includes/functions.php b/wp-includes/functions.php index 94155249fe..e821f6f2b0 100644 --- a/wp-includes/functions.php +++ b/wp-includes/functions.php @@ -7496,6 +7496,9 @@ function get_tag_regex( $tag ) { * $is_utf8 = is_utf8_charset(); * * @since 6.6.0 + * @since 6.6.1 A wrapper for _is_utf8_charset + * + * @see _is_utf8_charset * * @param string|null $blog_charset Optional. Slug representing a text character encoding, or "charset". * E.g. "UTF-8", "Windows-1252", "ISO-8859-1", "SJIS". @@ -7503,20 +7506,7 @@ function get_tag_regex( $tag ) { * @return bool Whether the slug represents the UTF-8 encoding. */ function is_utf8_charset( $blog_charset = null ) { - $charset_to_examine = $blog_charset ?? get_option( 'blog_charset' ); - - /* - * Only valid string values count: the absence of a charset - * does not imply any charset, let alone UTF-8. - */ - if ( ! is_string( $charset_to_examine ) ) { - return false; - } - - return ( - 0 === strcasecmp( 'UTF-8', $charset_to_examine ) || - 0 === strcasecmp( 'UTF8', $charset_to_examine ) - ); + return _is_utf8_charset( $blog_charset ?? get_option( 'blog_charset' ) ); } /** diff --git a/wp-includes/version.php b/wp-includes/version.php index 66610f4ba8..5ffc996084 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58757'; +$wp_version = '6.7-alpha-58763'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.