Database: Add %i placeholder support to $wpdb->prepare to escape table and column names, take 2.

[53575] during the 6.1 cycle was reverted in [54734] to address issues around multiple `%` placeholders not being properly quoted as reported in #56933.  Since then, this issue has been resolved and the underlying code improved significantly.  Additionally, the unit tests have been expanded and the inline docs have been improved as well.

This change reintroduces `%i` placeholder support in `$wpdb->prepare()` to give extenders the ability to safely escape table and column names in database queries.

Follow-up to [53575] and [54734].

Props craigfrancis, jrf, xknown, costdev, ironprogrammer, SergeyBiryukov.
Fixes #52506.
Built from https://develop.svn.wordpress.org/trunk@55151


git-svn-id: http://core.svn.wordpress.org/trunk@54684 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
davidbaumwald 2023-01-27 18:49:16 +00:00
parent d4085bc90d
commit d25ae5dd33
2 changed files with 283 additions and 37 deletions

View File

@ -654,6 +654,45 @@ class wpdb {
'ANSI',
);
/**
* Backward compatibility, where wpdb::prepare() has not quoted formatted/argnum placeholders.
*
* This is often used for table/field names (before %i was supported), and sometimes string formatting, e.g.
*
* $wpdb->prepare( 'WHERE `%1$s` = "%2$s something %3$s" OR %1$s = "%4$-10s"', 'field_1', 'a', 'b', 'c' );
*
* But it's risky, e.g. forgetting to add quotes, resulting in SQL Injection vulnerabilities:
*
* $wpdb->prepare( 'WHERE (id = %1s) OR (id = %2$s)', $_GET['id'], $_GET['id'] ); // ?id=id
*
* This feature is preserved while plugin authors update their code to use safer approaches:
*
* $_GET['key'] = 'a`b';
*
* $wpdb->prepare( 'WHERE %1s = %s', $_GET['key'], $_GET['value'] ); // WHERE a`b = 'value'
* $wpdb->prepare( 'WHERE `%1$s` = "%2$s"', $_GET['key'], $_GET['value'] ); // WHERE `a`b` = "value"
*
* $wpdb->prepare( 'WHERE %i = %s', $_GET['key'], $_GET['value'] ); // WHERE `a``b` = 'value'
*
* While changing to false will be fine for queries not using formatted/argnum placeholders,
* any remaining cases are most likely going to result in SQL errors (good, in a way):
*
* $wpdb->prepare( 'WHERE %1$s = "%2$-10s"', 'my_field', 'my_value' );
* true = WHERE my_field = "my_value "
* false = WHERE 'my_field' = "'my_value '"
*
* But there may be some queries that result in an SQL Injection vulnerability:
*
* $wpdb->prepare( 'WHERE id = %1$s', $_GET['id'] ); // ?id=id
*
* So there may need to be a `_doing_it_wrong()` phase, after we know everyone can use
* identifier placeholders (%i), but before this feature is disabled or removed.
*
* @since 6.2.0
* @var bool
*/
private $allow_unsafe_unquoted_parameters = true;
/**
* Whether to use mysqli over mysql. Default false.
*
@ -763,6 +802,7 @@ class wpdb {
'col_meta',
'table_charset',
'check_current_query',
'allow_unsafe_unquoted_parameters',
);
if ( in_array( $name, $protected_members, true ) ) {
return;
@ -1362,6 +1402,36 @@ class wpdb {
}
}
/**
* Quotes an identifier for a MySQL database, e.g. table/field names.
*
* @since 6.2.0
*
* @param string $identifier Identifier to escape.
* @return string Escaped identifier.
*/
public function quote_identifier( $identifier ) {
return '`' . $this->_escape_identifier_value( $identifier ) . '`';
}
/**
* Escapes an identifier value without adding the surrounding quotes.
*
* - Permitted characters in quoted identifiers include the full Unicode
* Basic Multilingual Plane (BMP), except U+0000.
* - To quote the identifier itself, you need to double the character, e.g. `a``b`.
*
* @since 6.2.0
*
* @link https://dev.mysql.com/doc/refman/8.0/en/identifiers.html
*
* @param string $identifier Identifier to escape.
* @return string Escaped identifier.
*/
private function _escape_identifier_value( $identifier ) {
return str_replace( '`', '``', $identifier );
}
/**
* Prepares a SQL query for safe execution.
*
@ -1370,6 +1440,7 @@ class wpdb {
* - %d (integer)
* - %f (float)
* - %s (string)
* - %i (identifier, e.g. table/field names)
*
* All placeholders MUST be left unquoted in the query string. A corresponding argument
* MUST be passed for each placeholder.
@ -1402,6 +1473,10 @@ class wpdb {
* @since 5.3.0 Formalized the existing and already documented `...$args` parameter
* by updating the function signature. The second parameter was changed
* from `$args` to `...$args`.
* @since 6.2.0 Added `%i` for identifiers, e.g. table or field names.
* Check support via `wpdb::has_cap( 'identifier_placeholders' )`.
* This preserves compatibility with sprintf(), as the C version uses
* `%d` and `$i` as a signed integer, whereas PHP only supports `%d`.
*
* @link https://www.php.net/sprintf Description of syntax.
*
@ -1433,28 +1508,6 @@ class wpdb {
);
}
// If args were passed as an array (as in vsprintf), move them up.
$passed_as_array = false;
if ( isset( $args[0] ) && is_array( $args[0] ) && 1 === count( $args ) ) {
$passed_as_array = true;
$args = $args[0];
}
foreach ( $args as $arg ) {
if ( ! is_scalar( $arg ) && ! is_null( $arg ) ) {
wp_load_translations_early();
_doing_it_wrong(
'wpdb::prepare',
sprintf(
/* translators: %s: Value type. */
__( 'Unsupported value type (%s).' ),
gettype( $arg )
),
'4.8.2'
);
}
}
/*
* Specify the formatting allowed in a placeholder. The following are allowed:
*
@ -1475,20 +1528,169 @@ class wpdb {
*/
$query = str_replace( "'%s'", '%s', $query ); // Strip any existing single quotes.
$query = str_replace( '"%s"', '%s', $query ); // Strip any existing double quotes.
$query = preg_replace( '/(?<!%)%s/', "'%s'", $query ); // Quote the strings, avoiding escaped strings like %%s.
$query = preg_replace( "/(?<!%)(%($allowed_format)?f)/", '%\\2F', $query ); // Force floats to be locale-unaware.
// Escape any unescaped percents (i.e. anything unrecognised).
$query = preg_replace( "/%(?:%|$|(?!($allowed_format)?[sdfFi]))/", '%%\\1', $query );
$query = preg_replace( "/%(?:%|$|(?!($allowed_format)?[sdF]))/", '%%\\1', $query ); // Escape any unescaped percents.
// Extract placeholders from the query.
$split_query = preg_split( "/(^|[^%]|(?:%%)+)(%(?:$allowed_format)?[sdfFi])/", $query, -1, PREG_SPLIT_DELIM_CAPTURE );
// Count the number of valid placeholders in the query.
$placeholders = preg_match_all( "/(^|[^%]|(%%)+)%($allowed_format)?[sdF]/", $query, $matches );
$split_query_count = count( $split_query );
/*
* Split always returns with 1 value before the first placeholder (even with $query = "%s"),
* then 3 additional values per placeholder.
*/
$placeholder_count = ( ( $split_query_count - 1 ) / 3 );
// If args were passed as an array, as in vsprintf(), move them up.
$passed_as_array = ( isset( $args[0] ) && is_array( $args[0] ) && 1 === count( $args ) );
if ( $passed_as_array ) {
$args = $args[0];
}
$new_query = '';
$key = 2; // Keys 0 and 1 in $split_query contain values before the first placeholder.
$arg_id = 0;
$arg_identifiers = array();
$arg_strings = array();
while ( $key < $split_query_count ) {
$placeholder = $split_query[ $key ];
$format = substr( $placeholder, 1, -1 );
$type = substr( $placeholder, -1 );
if ( 'f' === $type && true === $this->allow_unsafe_unquoted_parameters && str_ends_with( $split_query[ $key - 1 ], '%' ) ) {
/*
* Before WP 6.2 the "force floats to be locale-unaware" RegEx didn't
* convert "%%%f" to "%%%F" (note the uppercase F).
* This was because it didn't check to see if the leading "%" was escaped.
* And because the "Escape any unescaped percents" RegEx used "[sdF]" in its
* negative lookahead assertion, when there was an odd number of "%", it added
* an extra "%", to give the fully escaped "%%%%f" (not a placeholder).
*/
$s = $split_query[ $key - 2 ] . $split_query[ $key - 1 ];
$k = 1;
$l = strlen( $s );
while ( $k <= $l && '%' === $s[ $l - $k ] ) {
$k++;
}
$placeholder = '%' . ( $k % 2 ? '%' : '' ) . $format . $type;
--$placeholder_count;
} else {
// Force floats to be locale-unaware.
if ( 'f' === $type ) {
$type = 'F';
$placeholder = '%' . $format . $type;
}
if ( 'i' === $type ) {
$placeholder = '`%' . $format . 's`';
// Using a simple strpos() due to previous checking (e.g. $allowed_format).
$argnum_pos = strpos( $format, '$' );
if ( false !== $argnum_pos ) {
// sprintf() argnum starts at 1, $arg_id from 0.
$arg_identifiers[] = ( ( (int) substr( $format, 0, $argnum_pos ) ) - 1 );
} else {
$arg_identifiers[] = $arg_id;
}
} elseif ( 'd' !== $type && 'F' !== $type ) {
/*
* i.e. ( 's' === $type ), where 'd' and 'F' keeps $placeholder unchanged,
* and we ensure string escaping is used as a safe default (e.g. even if 'x').
*/
$argnum_pos = strpos( $format, '$' );
if ( false !== $argnum_pos ) {
$arg_strings[] = ( ( (int) substr( $format, 0, $argnum_pos ) ) - 1 );
} else {
$arg_strings[] = $arg_id;
}
/*
* Unquoted strings for backward compatibility (dangerous).
* First, "numbered or formatted string placeholders (eg, %1$s, %5s)".
* Second, if "%s" has a "%" before it, even if it's unrelated (e.g. "LIKE '%%%s%%'").
*/
if ( true !== $this->allow_unsafe_unquoted_parameters || ( '' === $format && ! str_ends_with( $split_query[ $key - 1 ], '%' ) ) ) {
$placeholder = "'%" . $format . "s'";
}
}
}
// Glue (-2), any leading characters (-1), then the new $placeholder.
$new_query .= $split_query[ $key - 2 ] . $split_query[ $key - 1 ] . $placeholder;
$key += 3;
$arg_id++;
}
// Replace $query; and add remaining $query characters, or index 0 if there were no placeholders.
$query = $new_query . $split_query[ $key - 2 ];
$dual_use = array_intersect( $arg_identifiers, $arg_strings );
if ( count( $dual_use ) > 0 ) {
wp_load_translations_early();
$used_placeholders = array();
$key = 2;
$arg_id = 0;
// Parse again (only used when there is an error).
while ( $key < $split_query_count ) {
$placeholder = $split_query[ $key ];
$format = substr( $placeholder, 1, -1 );
$argnum_pos = strpos( $format, '$' );
if ( false !== $argnum_pos ) {
$arg_pos = ( ( (int) substr( $format, 0, $argnum_pos ) ) - 1 );
} else {
$arg_pos = $arg_id;
}
$used_placeholders[ $arg_pos ][] = $placeholder;
$key += 3;
$arg_id++;
}
$conflicts = array();
foreach ( $dual_use as $arg_pos ) {
$conflicts[] = implode( ' and ', $used_placeholders[ $arg_pos ] );
}
_doing_it_wrong(
'wpdb::prepare',
sprintf(
/* translators: %s: A list of placeholders found to be a problem. */
__( 'Arguments cannot be prepared as both an Identifier and Value. Found the following conflicts: %s' ),
implode( ', ', $conflicts )
),
'6.2.0'
);
return;
}
$args_count = count( $args );
if ( $args_count !== $placeholders ) {
if ( 1 === $placeholders && $passed_as_array ) {
// If the passed query only expected one argument, but the wrong number of arguments were sent as an array, bail.
if ( $args_count !== $placeholder_count ) {
if ( 1 === $placeholder_count && $passed_as_array ) {
/*
* If the passed query only expected one argument,
* but the wrong number of arguments was sent as an array, bail.
*/
wp_load_translations_early();
_doing_it_wrong(
'wpdb::prepare',
@ -1509,7 +1711,7 @@ class wpdb {
sprintf(
/* translators: 1: Number of placeholders, 2: Number of arguments passed. */
__( 'The query does not contain the correct number of placeholders (%1$d) for the number of arguments passed (%2$d).' ),
$placeholders,
$placeholder_count,
$args_count
),
'4.8.3'
@ -1519,8 +1721,17 @@ class wpdb {
* If we don't have enough arguments to match the placeholders,
* return an empty string to avoid a fatal error on PHP 8.
*/
if ( $args_count < $placeholders ) {
$max_numbered_placeholder = ! empty( $matches[3] ) ? max( array_map( 'intval', $matches[3] ) ) : 0;
if ( $args_count < $placeholder_count ) {
$max_numbered_placeholder = 0;
for ( $i = 2, $l = $split_query_count; $i < $l; $i += 3 ) {
// Assume a leading number is for a numbered placeholder, e.g. '%3$s'.
$argnum = (int) substr( $split_query[ $i ], 1 );
if ( $max_numbered_placeholder < $argnum ) {
$max_numbered_placeholder = $argnum;
}
}
if ( ! $max_numbered_placeholder || $args_count < $max_numbered_placeholder ) {
return '';
@ -1529,8 +1740,35 @@ class wpdb {
}
}
array_walk( $args, array( $this, 'escape_by_ref' ) );
$query = vsprintf( $query, $args );
$args_escaped = array();
foreach ( $args as $i => $value ) {
if ( in_array( $i, $arg_identifiers, true ) ) {
$args_escaped[] = $this->_escape_identifier_value( $value );
} elseif ( is_int( $value ) || is_float( $value ) ) {
$args_escaped[] = $value;
} else {
if ( ! is_scalar( $value ) && ! is_null( $value ) ) {
wp_load_translations_early();
_doing_it_wrong(
'wpdb::prepare',
sprintf(
/* translators: %s: Value type. */
__( 'Unsupported value type (%s).' ),
gettype( $value )
),
'4.8.2'
);
// Preserving old behavior, where values are escaped as strings.
$value = '';
}
$args_escaped[] = $this->_real_escape( $value );
}
}
$query = vsprintf( $query, $args_escaped );
return $this->add_placeholder_escape( $query );
}
@ -3779,11 +4017,13 @@ class wpdb {
* @since 2.7.0
* @since 4.1.0 Added support for the 'utf8mb4' feature.
* @since 4.6.0 Added support for the 'utf8mb4_520' feature.
* @since 6.2.0 Added support for the 'identifier_placeholders' feature.
*
* @see wpdb::db_version()
*
* @param string $db_cap The feature to check for. Accepts 'collation', 'group_concat',
* 'subqueries', 'set_charset', 'utf8mb4', or 'utf8mb4_520'.
* 'subqueries', 'set_charset', 'utf8mb4', 'utf8mb4_520',
* or 'identifier_placeholders'.
* @return bool True when the database feature is supported, false otherwise.
*/
public function has_cap( $db_cap ) {
@ -3828,6 +4068,12 @@ class wpdb {
}
case 'utf8mb4_520': // @since 4.6.0
return version_compare( $db_version, '5.6', '>=' );
case 'identifier_placeholders': // @since 6.2.0
/*
* As of WordPress 6.2, wpdb::prepare() supports identifiers via '%i',
* e.g. table/field names.
*/
return true;
}
return false;

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.2-alpha-55150';
$wp_version = '6.2-alpha-55151';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.