Whitelist entities. Props miqrogroove. see #12284

git-svn-id: http://svn.automattic.com/wordpress/trunk@13358 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
ryan 2010-02-24 05:37:20 +00:00
parent efe8a967be
commit 0a81feaaeb
2 changed files with 279 additions and 3 deletions

View File

@ -334,6 +334,12 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals
// Handle double encoding ourselves
if ( !$double_encode ) {
$string = wp_specialchars_decode( $string, $_quote_style );
/* Critical */
// The previous line decodes &phrase; into &phrase; We must guarantee that &phrase; is valid before proceeding.
$string = wp_kses_normalize_entities($string);
// Now proceed with custom double-encoding silliness
$string = preg_replace( '/&(#?x?[0-9a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
}

View File

@ -333,6 +333,255 @@ if (!CUSTOM_TAGS) {
// 'u' => array(),
// 'ul' => array(),
);
$allowedentitynames = array(
'nbsp',
'iexcl',
'cent',
'pound',
'curren',
'yen',
'brvbar',
'sect',
'uml',
'copy',
'ordf',
'laquo',
'not',
'shy',
'reg',
'macr',
'deg',
'plusmn',
'acute',
'micro',
'para',
'middot',
'cedil',
'ordm',
'raquo',
'iquest',
'Agrave',
'Aacute',
'Acirc',
'Atilde',
'Auml',
'Aring',
'AElig',
'Ccedil',
'Egrave',
'Eacute',
'Ecirc',
'Euml',
'Igrave',
'Iacute',
'Icirc',
'Iuml',
'ETH',
'Ntilde',
'Ograve',
'Oacute',
'Ocirc',
'Otilde',
'Ouml',
'times',
'Oslash',
'Ugrave',
'Uacute',
'Ucirc',
'Uuml',
'Yacute',
'THORN',
'szlig',
'agrave',
'aacute',
'acirc',
'atilde',
'auml',
'aring',
'aelig',
'ccedil',
'egrave',
'eacute',
'ecirc',
'euml',
'igrave',
'iacute',
'icirc',
'iuml',
'eth',
'ntilde',
'ograve',
'oacute',
'ocirc',
'otilde',
'ouml',
'divide',
'oslash',
'ugrave',
'uacute',
'ucirc',
'uuml',
'yacute',
'thorn',
'yuml',
'quot',
'amp',
'lt',
'gt',
'apos',
'OElig',
'oelig',
'Scaron',
'scaron',
'Yuml',
'circ',
'tilde',
'ensp',
'emsp',
'thinsp',
'zwnj',
'zwj',
'lrm',
'rlm',
'ndash',
'mdash',
'lsquo',
'rsquo',
'sbquo',
'ldquo',
'rdquo',
'bdquo',
'dagger',
'Dagger',
'permil',
'lsaquo',
'rsaquo',
'euro',
'fnof',
'Alpha',
'Beta',
'Gamma',
'Delta',
'Epsilon',
'Zeta',
'Eta',
'Theta',
'Iota',
'Kappa',
'Lambda',
'Mu',
'Nu',
'Xi',
'Omicron',
'Pi',
'Rho',
'Sigma',
'Tau',
'Upsilon',
'Phi',
'Chi',
'Psi',
'Omega',
'alpha',
'beta',
'gamma',
'delta',
'epsilon',
'zeta',
'eta',
'theta',
'iota',
'kappa',
'lambda',
'mu',
'nu',
'xi',
'omicron',
'pi',
'rho',
'sigmaf',
'sigma',
'tau',
'upsilon',
'phi',
'chi',
'psi',
'omega',
'thetasym',
'upsih',
'piv',
'bull',
'hellip',
'prime',
'Prime',
'oline',
'frasl',
'weierp',
'image',
'real',
'trade',
'alefsym',
'larr',
'uarr',
'rarr',
'darr',
'harr',
'crarr',
'lArr',
'uArr',
'rArr',
'dArr',
'hArr',
'forall',
'part',
'exist',
'empty',
'nabla',
'isin',
'notin',
'ni',
'prod',
'sum',
'minus',
'lowast',
'radic',
'prop',
'infin',
'ang',
'and',
'or',
'cap',
'cup',
'int',
'sim',
'cong',
'asymp',
'ne',
'equiv',
'le',
'ge',
'sub',
'sup',
'nsub',
'sube',
'supe',
'oplus',
'otimes',
'perp',
'sdot',
'lceil',
'rceil',
'lfloor',
'rfloor',
'lang',
'rang',
'loz',
'spades',
'clubs',
'hearts',
'diams'
);
}
/**
@ -952,13 +1201,34 @@ function wp_kses_normalize_entities($string) {
# Change back the allowed entities in our entity whitelist
$string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
$string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string);
$string = preg_replace_callback('/&#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
$string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
return $string;
}
/**
* Callback for wp_kses_normalize_entities() regular expression.
*
* This function only accepts valid named entity references, which are finite,
* case-sensitive, and highly scrutinized by HTML and XML validators.
*
* @since 3.0.0
*
* @param array $matches preg_replace_callback() matches array
* @return string Correctly encoded entity
*/
function wp_kses_named_entities($matches) {
global $allowedentitynames;
if ( empty($matches[1]) )
return '';
$i = $matches[1];
return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" );
}
/**
* Callback for wp_kses_normalize_entities() regular expression.
*
@ -972,7 +1242,7 @@ function wp_kses_normalize_entities($string) {
* @return string Correctly encoded entity
*/
function wp_kses_normalize_entities2($matches) {
if ( ! isset($matches[1]) || empty($matches[1]) )
if ( empty($matches[1]) )
return '';
$i = $matches[1];
@ -991,7 +1261,7 @@ function wp_kses_normalize_entities2($matches) {
* @return string Correctly encoded entity
*/
function wp_kses_normalize_entities3($matches) {
if ( ! isset($matches[2]) || empty($matches[2]) )
if ( empty($matches[2]) )
return '';
$hexchars = $matches[2];