From 19848b9d90b63469d3c70b7e79400d10795601d0 Mon Sep 17 00:00:00 2001 From: azaozz Date: Sun, 4 Jan 2009 22:25:50 +0000 Subject: [PATCH] Refactor filters to avoid potential XSS attacks, props sambauers and DD32, see #8767 git-svn-id: http://svn.automattic.com/wordpress/trunk@10297 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/compat.php | 11 ++- wp-includes/formatting.php | 165 +++++++++++++++++++++++++++++++------ 2 files changed, 149 insertions(+), 27 deletions(-) diff --git a/wp-includes/compat.php b/wp-includes/compat.php index aa5ab941aa..d4ef4f11e8 100644 --- a/wp-includes/compat.php +++ b/wp-includes/compat.php @@ -96,11 +96,14 @@ function _mb_strcut( $str, $start, $length=null, $encoding=null ) { return implode( '', $chars ); } -// from php.net -if ( !function_exists('htmlspecialchars_decode') ) { +if ( !function_exists( 'htmlspecialchars_decode' ) ) { + // Added in PHP 5.1.0 + // from php.net (modified by Sam Bauers to deal with some quirks in HTML_SPECIALCHARS constant) function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) { - return strtr( $str, array_flip( get_html_translation_table(HTML_SPECIALCHARS, $quote_style) ) ); - } + $table = array_flip( get_html_translation_table( HTML_SPECIALCHARS, $quote_style ) ); + $table = array_merge( array( ''' => "'" ), $table, array( '&' => "&", '&' => "&" ) ); + return strtr( $str, $table ); + } } ?> diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index a18c6b7a3d..a0f7c9108a 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -186,34 +186,152 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr /** * Converts a number of special characters into their HTML entities. * - * Differs from htmlspecialchars as existing HTML entities will not be encoded. * Specifically changes: & to &, < to < and > to >. * - * $quotes can be set to 'single' to encode ' to ', 'double' to encode " to - * ", or '1' to do both. Default is 0 where no quotes are encoded. + * $quote_style can be set to ENT_COMPAT to encode " to + * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. * * @since 1.2.2 * - * @param string $text The text which is to be encoded. - * @param mixed $quotes Optional. Converts single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default 0. + * @param string $string The text which is to be encoded. + * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. + * @param string $charset Optional. The character encoding of the string. Default is false. + * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. * @return string The encoded text with HTML entities. */ -function wp_specialchars( $text, $quotes = 0 ) { - // Like htmlspecialchars except don't double-encode HTML entities - $text = str_replace('&&', '&&', $text); - $text = str_replace('&&', '&&', $text); - $text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&$1', $text); - $text = str_replace('<', '<', $text); - $text = str_replace('>', '>', $text); - if ( 'double' === $quotes ) { - $text = str_replace('"', '"', $text); - } elseif ( 'single' === $quotes ) { - $text = str_replace("'", ''', $text); - } elseif ( $quotes ) { - $text = str_replace('"', '"', $text); - $text = str_replace("'", ''', $text); +function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { + if ( 0 === strlen( $string ) ) { + return ''; + } + + if ( !$charset ) { + $alloptions = wp_load_alloptions(); + $charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; + } + if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { + $charset = 'UTF-8'; + } + + // Backwards compatibility + switch ( $quote_style ) { + // Handle expected values first for speed + case ENT_NOQUOTES: + $_quote_style = ENT_NOQUOTES; + break; + case ENT_COMPAT: + $_quote_style = ENT_COMPAT; + break; + case ENT_QUOTES: + $_quote_style = ENT_QUOTES; + break; + // Old values + case false: + case 0: + case '': + case null: + case 'single': + $_quote_style = ENT_NOQUOTES; + break; + case 'double': + $_quote_style = ENT_COMPAT; + break; + default: + $_quote_style = ENT_QUOTES; + break; + } + + if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { + $string = htmlspecialchars( $string, $_quote_style, $charset, $double_encode ); + } else { + // Handle double encoding for PHP versions that don't support it in htmlspecialchars() + if ( !$double_encode ) { + $string = htmlspecialchars_decode( $string, $_quote_style ); + // Backwards compatibility + if ( 'single' === $quote_style ) { + $string = str_replace( array( ''', ''' ), "'", $string ); + } + } + $string = htmlspecialchars( $string, $_quote_style, $charset ); + } + + // Backwards compatibility + if ( 'single' === $quote_style ) { + $string = str_replace( "'", ''', $string ); + } + + return $string; +} + +/** + * Converts all special characters into their HTML entities. + * + * $quote_style can be set to ENT_COMPAT to encode " to + * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. + * + * @since 2.8 + * + * @param string $string The text which is to be encoded. + * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Default is ENT_NOQUOTES. + * @param string $charset Optional. The character encoding of the string. Default is false. + * @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false. + * @return string The encoded text with HTML entities. + */ +function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { + if ( 0 === strlen( $string ) ) { + return ''; + } + + if ( !$charset ) { + $charset = get_option( 'blog_charset' ); + } + if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) { + $charset = 'UTF-8'; + } + + if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) { + $string = htmlentities( $string, $quote_style, $charset, $double_encode ); + } else { + // Handle double encoding for PHP versions that don't support it in htmlentities() + if ( !$double_encode ) { + // Multi-byte charsets are not supported below PHP 5.0.0 + // 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2 + $string = html_entity_decode( $string, $quote_style, $charset ); + } + // 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2 + $string = htmlentities( $string, $quote_style, $charset ); + } + + return $string; +} + +/** + * Checks for invalid UTF8 in a string. + * + * @since 2.8 + * + * @param string $string The text which is to be checked. + * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. + * @return string The checked text. + */ +function wp_check_invalid_utf8( $string, $strip = false ) { + if ( 0 === strlen( $string ) ) { + return ''; + } + + if ( !in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) { + return $string; + } + + // preg_match fails when it encounters invalid UTF8 in $string + if ( 1 === @preg_match( '@^.@us', $string ) ) { + return $string; + } + + if ( $strip && function_exists( 'iconv' ) ) { + return iconv( 'utf-8', 'utf-8', $string ); + } else { + return ''; } - return $text; } /** @@ -1756,9 +1874,10 @@ function js_escape($text) { * @param string $text * @return string */ -function attribute_escape($text) { - $safe_text = wp_specialchars($text, true); - return apply_filters('attribute_escape', $safe_text, $text); +function attribute_escape( $text ) { + $safe_text = wp_check_invalid_utf8( $text ); + $safe_text = wp_entities( $safe_text, ENT_QUOTES ); + return apply_filters( 'attribute_escape', $safe_text, $text ); } /**