From 69147eb345859b76b5208cbab39bb9f3852d7a56 Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Wed, 1 Jun 2016 02:38:29 +0000 Subject: [PATCH] Database: Split the logic of `wpdb::init_charset()` into a separate method. The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As `init_charset()` has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic. This commit splits the logic part of `init_charset()` out into a new method, `wpdb::determine_charset()`, along with appropriate unit tests. See #32105, #37522. Fixes #36917. Built from https://develop.svn.wordpress.org/trunk@37601 git-svn-id: http://core.svn.wordpress.org/trunk@37569 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/version.php | 2 +- wp-includes/wp-db.php | 49 +++++++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/wp-includes/version.php b/wp-includes/version.php index cd30903a3e..51376d00f7 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -4,7 +4,7 @@ * * @global string $wp_version */ -$wp_version = '4.6-alpha-37600'; +$wp_version = '4.6-alpha-37601'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. diff --git a/wp-includes/wp-db.php b/wp-includes/wp-db.php index d261254926..50d71c6c13 100644 --- a/wp-includes/wp-db.php +++ b/wp-includes/wp-db.php @@ -735,41 +735,62 @@ class wpdb { */ public function init_charset() { if ( function_exists('is_multisite') && is_multisite() ) { - $this->charset = 'utf8'; + $charset = 'utf8'; if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) { - $this->collate = DB_COLLATE; + $collate = DB_COLLATE; } else { - $this->collate = 'utf8_general_ci'; + $collate = 'utf8_general_ci'; } } elseif ( defined( 'DB_COLLATE' ) ) { - $this->collate = DB_COLLATE; + $collate = DB_COLLATE; } if ( defined( 'DB_CHARSET' ) ) { - $this->charset = DB_CHARSET; + $charset = DB_CHARSET; } + $charset_collate = $this->determine_charset( $charset, $collate ); + + $this->charset = $charset_collate['charset']; + $this->collate = $charset_collate['collate']; + } + + /** + * Given a charset and collation, determine the best charset and collation to use. + * + * For example, when able, utf8mb4 should be used instead of utf8. + * + * @since 4.6.0 + * + * @param string $charset The character set to check. + * @param string $collate The collation to check. + * + * @return array The most appropriate character set and collation to use. + */ + public function determine_charset( $charset, $collate ) { if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) { - return; + return compact( 'charset', 'collate' ); } - if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { - $this->charset = 'utf8mb4'; + if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) { + $charset = 'utf8mb4'; } - if ( 'utf8mb4' === $this->charset ) { + if ( 'utf8mb4' === $charset ) { // _general_ is outdated, so we can upgrade it to _unicode_, instead. - if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) { - $this->collate = 'utf8mb4_unicode_ci'; + if ( ! $collate || 'utf8_general_ci' === $collate ) { + $collate = 'utf8mb4_unicode_ci'; } else { - $this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate ); + $collate = str_replace( 'utf8_', 'utf8mb4_', $collate ); } } // _unicode_520_ is a better collation, we should use that when it's available. - if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) { - $this->collate = 'utf8mb4_unicode_520_ci'; + if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) { + $collate = 'utf8mb4_unicode_520_ci'; } + + return compact( 'charset', 'collate' ); } /**