Refactor filters to avoid potential XSS attacks, props sambauers and DD32, see #8767

git-svn-id: http://svn.automattic.com/wordpress/trunk@10297 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
azaozz 2009-01-04 22:25:50 +00:00
parent 60de32b047
commit 19848b9d90
2 changed files with 149 additions and 27 deletions

View File

@ -96,11 +96,14 @@ function _mb_strcut( $str, $start, $length=null, $encoding=null ) {
return implode( '', $chars );
}
// from php.net
if ( !function_exists('htmlspecialchars_decode') ) {
if ( !function_exists( 'htmlspecialchars_decode' ) ) {
// Added in PHP 5.1.0
// from php.net (modified by Sam Bauers to deal with some quirks in HTML_SPECIALCHARS constant)
function htmlspecialchars_decode( $str, $quote_style = ENT_COMPAT ) {
return strtr( $str, array_flip( get_html_translation_table(HTML_SPECIALCHARS, $quote_style) ) );
}
$table = array_flip( get_html_translation_table( HTML_SPECIALCHARS, $quote_style ) );
$table = array_merge( array( ''' => "'" ), $table, array( '&' => "&", '&' => "&" ) );
return strtr( $str, $table );
}
}
?>

View File

@ -186,34 +186,152 @@ function seems_utf8($Str) { # by bmorel at ssi dot fr
/**
* Converts a number of special characters into their HTML entities.
*
* Differs from htmlspecialchars as existing HTML entities will not be encoded.
* Specifically changes: & to &#038;, < to &lt; and > to &gt;.
*
* $quotes can be set to 'single' to encode ' to &#039;, 'double' to encode " to
* &quot;, or '1' to do both. Default is 0 where no quotes are encoded.
* $quote_style can be set to ENT_COMPAT to encode " to
* &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
*
* @since 1.2.2
*
* @param string $text The text which is to be encoded.
* @param mixed $quotes Optional. Converts single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default 0.
* @param string $string The text which is to be encoded.
* @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
* @param string $charset Optional. The character encoding of the string. Default is false.
* @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
* @return string The encoded text with HTML entities.
*/
function wp_specialchars( $text, $quotes = 0 ) {
// Like htmlspecialchars except don't double-encode HTML entities
$text = str_replace('&&', '&#038;&', $text);
$text = str_replace('&&', '&#038;&', $text);
$text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&#038;$1', $text);
$text = str_replace('<', '&lt;', $text);
$text = str_replace('>', '&gt;', $text);
if ( 'double' === $quotes ) {
$text = str_replace('"', '&quot;', $text);
} elseif ( 'single' === $quotes ) {
$text = str_replace("'", '&#039;', $text);
} elseif ( $quotes ) {
$text = str_replace('"', '&quot;', $text);
$text = str_replace("'", '&#039;', $text);
function wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
if ( 0 === strlen( $string ) ) {
return '';
}
if ( !$charset ) {
$alloptions = wp_load_alloptions();
$charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
}
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) {
$charset = 'UTF-8';
}
// Backwards compatibility
switch ( $quote_style ) {
// Handle expected values first for speed
case ENT_NOQUOTES:
$_quote_style = ENT_NOQUOTES;
break;
case ENT_COMPAT:
$_quote_style = ENT_COMPAT;
break;
case ENT_QUOTES:
$_quote_style = ENT_QUOTES;
break;
// Old values
case false:
case 0:
case '':
case null:
case 'single':
$_quote_style = ENT_NOQUOTES;
break;
case 'double':
$_quote_style = ENT_COMPAT;
break;
default:
$_quote_style = ENT_QUOTES;
break;
}
if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) {
$string = htmlspecialchars( $string, $_quote_style, $charset, $double_encode );
} else {
// Handle double encoding for PHP versions that don't support it in htmlspecialchars()
if ( !$double_encode ) {
$string = htmlspecialchars_decode( $string, $_quote_style );
// Backwards compatibility
if ( 'single' === $quote_style ) {
$string = str_replace( array( '&#039;', '&#39;' ), "'", $string );
}
}
$string = htmlspecialchars( $string, $_quote_style, $charset );
}
// Backwards compatibility
if ( 'single' === $quote_style ) {
$string = str_replace( "'", '&#039;', $string );
}
return $string;
}
/**
* Converts all special characters into their HTML entities.
*
* $quote_style can be set to ENT_COMPAT to encode " to
* &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
*
* @since 2.8
*
* @param string $string The text which is to be encoded.
* @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Default is ENT_NOQUOTES.
* @param string $charset Optional. The character encoding of the string. Default is false.
* @param boolean $double_encode Optional. Whether or not to encode existing html entities. Default is false.
* @return string The encoded text with HTML entities.
*/
function wp_entities( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
if ( 0 === strlen( $string ) ) {
return '';
}
if ( !$charset ) {
$charset = get_option( 'blog_charset' );
}
if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) {
$charset = 'UTF-8';
}
if ( version_compare( PHP_VERSION, '5.2.3', '>=' ) ) {
$string = htmlentities( $string, $quote_style, $charset, $double_encode );
} else {
// Handle double encoding for PHP versions that don't support it in htmlentities()
if ( !$double_encode ) {
// Multi-byte charsets are not supported below PHP 5.0.0
// 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2
$string = html_entity_decode( $string, $quote_style, $charset );
}
// 'cp866', 'cp1251', 'KOI8-R' charsets are not supported below PHP 4.3.2
$string = htmlentities( $string, $quote_style, $charset );
}
return $string;
}
/**
* Checks for invalid UTF8 in a string.
*
* @since 2.8
*
* @param string $string The text which is to be checked.
* @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
* @return string The checked text.
*/
function wp_check_invalid_utf8( $string, $strip = false ) {
if ( 0 === strlen( $string ) ) {
return '';
}
if ( !in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
return $string;
}
// preg_match fails when it encounters invalid UTF8 in $string
if ( 1 === @preg_match( '@^.@us', $string ) ) {
return $string;
}
if ( $strip && function_exists( 'iconv' ) ) {
return iconv( 'utf-8', 'utf-8', $string );
} else {
return '';
}
return $text;
}
/**
@ -1756,9 +1874,10 @@ function js_escape($text) {
* @param string $text
* @return string
*/
function attribute_escape($text) {
$safe_text = wp_specialchars($text, true);
return apply_filters('attribute_escape', $safe_text, $text);
function attribute_escape( $text ) {
$safe_text = wp_check_invalid_utf8( $text );
$safe_text = wp_entities( $safe_text, ENT_QUOTES );
return apply_filters( 'attribute_escape', $safe_text, $text );
}
/**