From 0a81feaaebb33a67184240af9cf04483631e4667 Mon Sep 17 00:00:00 2001 From: ryan Date: Wed, 24 Feb 2010 05:37:20 +0000 Subject: [PATCH] Whitelist entities. Props miqrogroove. see #12284 git-svn-id: http://svn.automattic.com/wordpress/trunk@13358 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 6 + wp-includes/kses.php | 276 ++++++++++++++++++++++++++++++++++++- 2 files changed, 279 insertions(+), 3 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 5a009bae64..2509e04ad2 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -334,6 +334,12 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals // Handle double encoding ourselves if ( !$double_encode ) { $string = wp_specialchars_decode( $string, $_quote_style ); + + /* Critical */ + // The previous line decodes &phrase; into &phrase; We must guarantee that &phrase; is valid before proceeding. + $string = wp_kses_normalize_entities($string); + + // Now proceed with custom double-encoding silliness $string = preg_replace( '/&(#?x?[0-9a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string ); } diff --git a/wp-includes/kses.php b/wp-includes/kses.php index b2f4748a32..eac8191a64 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -333,6 +333,255 @@ if (!CUSTOM_TAGS) { // 'u' => array(), // 'ul' => array(), ); + + $allowedentitynames = array( + 'nbsp', + 'iexcl', + 'cent', + 'pound', + 'curren', + 'yen', + 'brvbar', + 'sect', + 'uml', + 'copy', + 'ordf', + 'laquo', + 'not', + 'shy', + 'reg', + 'macr', + 'deg', + 'plusmn', + 'acute', + 'micro', + 'para', + 'middot', + 'cedil', + 'ordm', + 'raquo', + 'iquest', + 'Agrave', + 'Aacute', + 'Acirc', + 'Atilde', + 'Auml', + 'Aring', + 'AElig', + 'Ccedil', + 'Egrave', + 'Eacute', + 'Ecirc', + 'Euml', + 'Igrave', + 'Iacute', + 'Icirc', + 'Iuml', + 'ETH', + 'Ntilde', + 'Ograve', + 'Oacute', + 'Ocirc', + 'Otilde', + 'Ouml', + 'times', + 'Oslash', + 'Ugrave', + 'Uacute', + 'Ucirc', + 'Uuml', + 'Yacute', + 'THORN', + 'szlig', + 'agrave', + 'aacute', + 'acirc', + 'atilde', + 'auml', + 'aring', + 'aelig', + 'ccedil', + 'egrave', + 'eacute', + 'ecirc', + 'euml', + 'igrave', + 'iacute', + 'icirc', + 'iuml', + 'eth', + 'ntilde', + 'ograve', + 'oacute', + 'ocirc', + 'otilde', + 'ouml', + 'divide', + 'oslash', + 'ugrave', + 'uacute', + 'ucirc', + 'uuml', + 'yacute', + 'thorn', + 'yuml', + 'quot', + 'amp', + 'lt', + 'gt', + 'apos', + 'OElig', + 'oelig', + 'Scaron', + 'scaron', + 'Yuml', + 'circ', + 'tilde', + 'ensp', + 'emsp', + 'thinsp', + 'zwnj', + 'zwj', + 'lrm', + 'rlm', + 'ndash', + 'mdash', + 'lsquo', + 'rsquo', + 'sbquo', + 'ldquo', + 'rdquo', + 'bdquo', + 'dagger', + 'Dagger', + 'permil', + 'lsaquo', + 'rsaquo', + 'euro', + 'fnof', + 'Alpha', + 'Beta', + 'Gamma', + 'Delta', + 'Epsilon', + 'Zeta', + 'Eta', + 'Theta', + 'Iota', + 'Kappa', + 'Lambda', + 'Mu', + 'Nu', + 'Xi', + 'Omicron', + 'Pi', + 'Rho', + 'Sigma', + 'Tau', + 'Upsilon', + 'Phi', + 'Chi', + 'Psi', + 'Omega', + 'alpha', + 'beta', + 'gamma', + 'delta', + 'epsilon', + 'zeta', + 'eta', + 'theta', + 'iota', + 'kappa', + 'lambda', + 'mu', + 'nu', + 'xi', + 'omicron', + 'pi', + 'rho', + 'sigmaf', + 'sigma', + 'tau', + 'upsilon', + 'phi', + 'chi', + 'psi', + 'omega', + 'thetasym', + 'upsih', + 'piv', + 'bull', + 'hellip', + 'prime', + 'Prime', + 'oline', + 'frasl', + 'weierp', + 'image', + 'real', + 'trade', + 'alefsym', + 'larr', + 'uarr', + 'rarr', + 'darr', + 'harr', + 'crarr', + 'lArr', + 'uArr', + 'rArr', + 'dArr', + 'hArr', + 'forall', + 'part', + 'exist', + 'empty', + 'nabla', + 'isin', + 'notin', + 'ni', + 'prod', + 'sum', + 'minus', + 'lowast', + 'radic', + 'prop', + 'infin', + 'ang', + 'and', + 'or', + 'cap', + 'cup', + 'int', + 'sim', + 'cong', + 'asymp', + 'ne', + 'equiv', + 'le', + 'ge', + 'sub', + 'sup', + 'nsub', + 'sube', + 'supe', + 'oplus', + 'otimes', + 'perp', + 'sdot', + 'lceil', + 'rceil', + 'lfloor', + 'rfloor', + 'lang', + 'rang', + 'loz', + 'spades', + 'clubs', + 'hearts', + 'diams' + ); } /** @@ -952,13 +1201,34 @@ function wp_kses_normalize_entities($string) { # Change back the allowed entities in our entity whitelist - $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); + $string = preg_replace_callback('/&([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string); $string = preg_replace_callback('/&#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string); $string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string); return $string; } +/** + * Callback for wp_kses_normalize_entities() regular expression. + * + * This function only accepts valid named entity references, which are finite, + * case-sensitive, and highly scrutinized by HTML and XML validators. + * + * @since 3.0.0 + * + * @param array $matches preg_replace_callback() matches array + * @return string Correctly encoded entity + */ +function wp_kses_named_entities($matches) { + global $allowedentitynames; + + if ( empty($matches[1]) ) + return ''; + + $i = $matches[1]; + return ( ( ! in_array($i, $allowedentitynames) ) ? "&$i;" : "&$i;" ); +} + /** * Callback for wp_kses_normalize_entities() regular expression. * @@ -972,7 +1242,7 @@ function wp_kses_normalize_entities($string) { * @return string Correctly encoded entity */ function wp_kses_normalize_entities2($matches) { - if ( ! isset($matches[1]) || empty($matches[1]) ) + if ( empty($matches[1]) ) return ''; $i = $matches[1]; @@ -991,7 +1261,7 @@ function wp_kses_normalize_entities2($matches) { * @return string Correctly encoded entity */ function wp_kses_normalize_entities3($matches) { - if ( ! isset($matches[2]) || empty($matches[2]) ) + if ( empty($matches[2]) ) return ''; $hexchars = $matches[2];