From a6580f16f5dbcf0f6af40f56691b8e97aa8368fa Mon Sep 17 00:00:00 2001 From: ryan Date: Wed, 20 May 2009 21:13:14 +0000 Subject: [PATCH] seems_utf8() cleanup. Props hakre. fixes #9692 git-svn-id: http://svn.automattic.com/wordpress/trunk@11414 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 9b44ec5772..f69dc4f0c4 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -172,26 +172,29 @@ function wpautop($pee, $br = 1) { /** * Checks to see if a string is utf8 encoded. * - * @author bmorel at ssi dot fr + * NOTE: This function checks for 5-Byte sequences, UTF8 + * has Bytes Sequences with a maximum length of 4. * + * @author bmorel at ssi dot fr (modified) * @since 1.2.1 * - * @param string $Str The string to be checked - * @return bool True if $Str fits a UTF-8 model, false otherwise. + * @param string $str The string to be checked + * @return bool True if $str fits a UTF-8 model, false otherwise. */ -function seems_utf8($Str) { # by bmorel at ssi dot fr - $length = strlen($Str); +function seems_utf8(&$str) { + $length = strlen($str); for ($i=0; $i < $length; $i++) { - if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb - elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb - elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb - elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb - elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb - elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b + $c = ord($str[$i]); + if ($c < 0x80) $n = 0; # 0bbbbbbb + elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb + elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb + elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb + elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb + elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b else return false; # Does not match any model for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? - if ((++$i == $length) || ((ord($Str[$i]) & 0xC0) != 0x80)) - return false; + if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) + return false; } } return true;