diff --git a/wp-admin/includes/dashboard.php b/wp-admin/includes/dashboard.php index 5ba11a4a17..0287cd2852 100644 --- a/wp-admin/includes/dashboard.php +++ b/wp-admin/includes/dashboard.php @@ -335,10 +335,7 @@ function wp_dashboard_incoming_links_output() { $content = $item['atom_content']; else $content = __( 'something' ); - $content = strip_tags( $content ); - if ( 50 < strlen($content) ) - $content = substr($content, 0, 50) . ' ...'; - $content = wp_specialchars( $content ); + $content = wp_html_excerpt($content, 50) . ' ...'; if ( $link ) $text = _c( '%1$s linked here saying, "%3$s"|feed_display' ); else diff --git a/wp-includes/comment.php b/wp-includes/comment.php index 383b03d1ad..3c7ec2d95d 100644 --- a/wp-includes/comment.php +++ b/wp-includes/comment.php @@ -1046,11 +1046,7 @@ function do_trackbacks($post_id) { else $excerpt = apply_filters('the_excerpt', $post->post_excerpt); $excerpt = str_replace(']]>', ']]>', $excerpt); - $excerpt = strip_tags($excerpt); - if ( function_exists('mb_strcut') ) // For international trackbacks - $excerpt = mb_strcut($excerpt, 0, 252, get_option('blog_charset')) . '...'; - else - $excerpt = substr($excerpt, 0, 252) . '...'; + $excerpt = wp_html_excerpt($excerpt, 252) . '...'; $post_title = apply_filters('the_title', $post->post_title); $post_title = strip_tags($post_title); diff --git a/wp-includes/compat.php b/wp-includes/compat.php index c657f1f32a..9eb18d4db9 100644 --- a/wp-includes/compat.php +++ b/wp-includes/compat.php @@ -77,4 +77,23 @@ function hash_hmac($algo, $data, $key, $raw_output = false) { } endif; +if ( ! function_exists('mb_strcut') ): + function mb_strcut( $str, $start, $length=null, $encoding=null ) { + return _mb_strcut($str, $start, $length, $encoding); + } +endif; + +function _mb_strcut( $str, $start, $length=null, $encoding=null ) { + // the solution below, works only for utf-8, so in case of a different + // charset, just use built-in substr + $charset = get_option( 'blog_charset' ); + if ( !in_array( $charset, array('utf8', 'utf-8', 'UTF8', 'UTF-8') ) ) { + return is_null( $length )? substr( $str, $start ) : substr( $str, $start, $length); + } + // use the regex unicode support to separate the UTF-8 characters into an array + preg_match_all( '/./us', $str, $match ); + $chars = is_null( $length )? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length ); + return implode( '', $chars ); +} + ?> diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index abdd5bd8e0..6934a69bd1 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -1370,4 +1370,26 @@ function wp_sprintf_l($pattern, $args) { return $result . substr($pattern, 2); } +/** + * Safely extracts not more than the first $count characters from html string + * + * UTF-8, tags and entities safe prefix extraction. Entities inside will be + * counted as one character. As a side effect, all entities will be converted to + * their decimal form. + * + * @param integer $str String to get the excerpt from + * @param integer $count Maximum number of visible characters to take + * @eaturn string the excerpt + */ +function wp_html_excerpt( $str, $count ) { + $str = strip_tags( $str ); + $str = html_entity_decode( $str, ENT_QUOTES); + $str = mb_strcut( $str, 0, $count ); + // remove part of an entity at the end + $str = preg_replace( '/&[^;\s]{0,6}$/', '', $str ); + // we decoded some entities we should put back + $str = wp_specialchars( $str ); + return $str; +} + ?> diff --git a/wp-settings.php b/wp-settings.php index 74459f93f8..9c6249d347 100644 --- a/wp-settings.php +++ b/wp-settings.php @@ -15,6 +15,7 @@ if ( !defined('WP_MEMORY_LIMIT') ) if ( function_exists('memory_get_usage') && ( (int) @ini_get('memory_limit') < abs(intval(WP_MEMORY_LIMIT)) ) ) @ini_set('memory_limit', WP_MEMORY_LIMIT); + /** * wp_unregister_GLOBALS() - Turn register globals off * @@ -350,6 +351,15 @@ if ( get_option('active_plugins') ) { require (ABSPATH . WPINC . '/pluggable.php'); +/* + * In most cases the default internal encoding is latin1, which is of no use, + * since we want to use the mb_ functions for utf-8 strings + */ +if ( function_exists('mb_internal_encoding') ) + mb_internal_encoding( get_option( 'blog_charset' ) ); + + + if ( defined('WP_CACHE') && function_exists('wp_cache_postload') ) wp_cache_postload(); diff --git a/wp-trackback.php b/wp-trackback.php index 9dfdebe839..2c68acfccf 100644 --- a/wp-trackback.php +++ b/wp-trackback.php @@ -72,15 +72,8 @@ if ( !empty($tb_url) && !empty($title) ) { if ( !pings_open($tb_id) ) trackback_response(1, 'Sorry, trackbacks are closed for this item.'); - $title = wp_specialchars( strip_tags( $title ) ); - $excerpt = strip_tags($excerpt); - if ( function_exists('mb_strcut') ) { // For international trackbacks - $excerpt = mb_strcut($excerpt, 0, 252, get_option('blog_charset')) . '...'; - $title = mb_strcut($title, 0, 250, get_option('blog_charset')) . '...'; - } else { - $excerpt = (strlen($excerpt) > 255) ? substr($excerpt, 0, 252) . '...' : $excerpt; - $title = (strlen($title) > 250) ? substr($title, 0, 250) . '...' : $title; - } + $title = wp_html_excerpt( $title, 250 ).'...'; + $excerpt = wp_html_excerpt( $excerpt, 252 ).'...'; $comment_post_ID = (int) $tb_id; $comment_author = $blog_name; @@ -100,4 +93,4 @@ if ( !empty($tb_url) && !empty($title) ) { do_action('trackback_post', $wpdb->insert_id); trackback_response(0); } -?> \ No newline at end of file +?>