From c49fecf3f056e6ef52442996934116da3e37450b Mon Sep 17 00:00:00 2001
From: saxmatt
Date: Mon, 9 Feb 2004 06:57:02 +0000
Subject: [PATCH] Breaking out some of the formatting functions into a
different file.
git-svn-id: http://svn.automattic.com/wordpress/trunk@846 1a063a9b-81f0-0310-95a4-ce76da25c4cd
---
wp-includes/functions-formatting.php | 400 +++++++++++++++++++++++++
wp-includes/functions.php | 428 +--------------------------
2 files changed, 404 insertions(+), 424 deletions(-)
create mode 100644 wp-includes/functions-formatting.php
diff --git a/wp-includes/functions-formatting.php b/wp-includes/functions-formatting.php
new file mode 100644
index 0000000000..e52f038fd6
--- /dev/null
+++ b/wp-includes/functions-formatting.php
@@ -0,0 +1,400 @@
+)/Us", $text, -1, PREG_SPLIT_DELIM_CAPTURE); // capture the tags as well as in between
+ $stop = count($textarr); $next = true; // loop stuff
+ for ($i = 0; $i < $stop; $i++) {
+ $curl = $textarr[$i];
+ if (!strstr($_SERVER['HTTP_USER_AGENT'], 'Gecko')) {
+ $curl = str_replace('', '“', $curl);
+ $curl = str_replace('
', '”', $curl);
+ }
+ if (isset($curl{0}) && '<' != $curl{0} && $next) { // If it's not a tag
+ $curl = str_replace('---', '—', $curl);
+ $curl = str_replace('--', '–', $curl);
+ $curl = str_replace("...", '…', $curl);
+ $curl = str_replace('``', '“', $curl);
+
+ // This is a hack, look at this more later. It works pretty well though.
+ $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round");
+ $cockneyreplace = array("’tain’t","’twere","’twas","’tis","’twill","’til","’bout","’nuff","’round");
+ $curl = str_replace($cockney, $cockneyreplace, $curl);
+
+ $curl = preg_replace("/'s/", "’s", $curl);
+ $curl = preg_replace("/'(\d\d(?:’|')?s)/", "’$1", $curl);
+ $curl = preg_replace('/(\s|\A|")\'/', '$1‘', $curl);
+ $curl = preg_replace("/(\d+)\"/", "$1″", $curl);
+ $curl = preg_replace("/(\d+)'/", "$1′", $curl);
+ $curl = preg_replace("/(\S)'([^'\s])/", "$1’$2", $curl);
+ $curl = preg_replace('/"([\s.,!?;:&\']|\Z)/', '”$1', $curl);
+ $curl = preg_replace('/(\s|\A)"/', '$1“', $curl);
+ $curl = preg_replace("/'([\s.]|\Z)/", '’$1', $curl);
+ $curl = preg_replace("/\(tm\)/i", '™', $curl);
+ $curl = preg_replace("/\(c\)/i", '©', $curl);
+ $curl = preg_replace("/\(r\)/i", '®', $curl);
+ $curl = preg_replace('/&([^#])(?![a-z]{1,8};)/', '&$1', $curl);
+ $curl = str_replace("''", '”', $curl);
+
+ $curl = preg_replace('/(d+)x(\d+)/', "$1×$2", $curl);
+
+ } elseif (strstr($curl, '
\s*
|', "\n\n", $pee);
+ $pee = preg_replace('!(<(?:table|tr|td|th|div|ul|ol|li|pre|select|form|blockquote|p|h[1-6])[^>]*>)!', "\n$1", $pee); // Space things out a little
+ $pee = preg_replace('!((?:table|tr|td|th|div|ul|ol|li|pre|select|form|blockquote|p|h[1-6])>)!', "$1\n", $pee); // Space things out a little
+ $pee = preg_replace("/(\r\n|\r)/", "\n", $pee); // cross-platform newlines
+ $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
+ $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "\t$1
\n", $pee); // make paragraphs, including one at the end
+ $pee = preg_replace('|\s*?
|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
+ $pee = preg_replace('!\s*(?(?:table|tr|td|th|div|ul|ol|li|pre|select|form|blockquote|p|h[1-6])[^>]*>)\s*
!', "$1", $pee); // don't pee all over a tag
+ $pee = preg_replace("|(
|", "$1", $pee); // problem with nested lists
+ $pee = preg_replace('|]*)>|i', "", $pee);
+ $pee = str_replace('
', '', $pee);
+ $pee = preg_replace('!\s*(?(?:table|tr|td|th|div|ul|ol|li|pre|select|form|blockquote|p|h[1-6])[^>]*>)!', "$1", $pee);
+ $pee = preg_replace('!(?(?:table|tr|td|th|div|ul|ol|li|pre|select|form|blockquote|p|h[1-6])[^>]*>)\s*
!', "$1", $pee);
+ if ($br) $pee = preg_replace('|(?)\s*\n|', "
\n", $pee); // optionally make line breaks
+ $pee = preg_replace('!(?(?:table|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|p|h[1-6])[^>]*>)\s*
!', "$1", $pee);
+ $pee = preg_replace('!
(\s*?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $pee);
+ $pee = preg_replace('/&([^#])(?![a-z]{1,8};)/', '&$1', $pee);
+
+ return $pee;
+}
+
+function sanitize_title($title) {
+ $title = strtolower($title);
+ $title = preg_replace('/&.+?;/', '', $title); // kill entities
+ $title = preg_replace('/[^a-z0-9 -]/', '', $title);
+ $title = preg_replace('/\s+/', ' ', $title);
+ $title = trim($title);
+ $title = str_replace(' ', '-', $title);
+ $title = preg_replace('|-+|', '-', $title);
+ return $title;
+}
+
+function convert_chars($content, $flag='obsolete attribute left there for backwards compatibility') { // html/unicode entities output
+ global $use_htmltrans, $wp_htmltrans, $wp_htmltranswinuni;
+
+ // removes metadata tags
+ $content = preg_replace('/(.+?)<\/title>/','',$content);
+ $content = preg_replace('/(.+?)<\/category>/','',$content);
+
+ if ($use_htmltrans) {
+ // converts lone & characters into & (a.k.a. &)
+ $content = preg_replace('/&[^#](?![a-z]*;)/ie', '"&".substr("\0",1)', $content);
+ // converts HTML-entities to their display values in order to convert them again later
+ $content = preg_replace('/['.chr(127).'-'.chr(255).']/e', '"".ord(\'\0\').";"', $content );
+ $content = strtr($content, $wp_htmltrans);
+ // now converting: Windows CP1252 => Unicode (valid HTML)
+ // (if you've ever pasted text from MSWord, you'll understand)
+ $content = strtr($content, $wp_htmltranswinuni);
+ }
+
+ // you can delete these 2 lines if you don't like
and
+ $content = str_replace("
","
",$content);
+ $content = str_replace("
","
",$content);
+
+ return $content;
+}
+
+/**
+ ** sanitise HTML attributes, remove frame/applet/*script/mouseovers,etc. tags
+ ** so that this kind of thing cannot be done:
+ ** This is how we can do bad stuff!
+ **/
+function sanitise_html_attributes($text) {
+ $text = preg_replace('#(([\s"\'])on[a-z]{1,}|style|class|id)="(.*?)"#i', '$1', $text);
+ $text = preg_replace('#(([\s"\'])on[a-z]{1,}|style|class|id)=\'(.*?)\'#i', '$1', $text);
+ $text = preg_replace('#(([\s"\'])on[a-z]{1,}|style|class|id)[ \t]*=[ \t]*([^ \t\>]*?)#i', '$1', $text);
+ $text = preg_replace('#([a-z]{1,})="(( |\t)*?)(javascript|vbscript|about):(.*?)"#i', '$1=""', $text);
+ $text = preg_replace('#([a-z]{1,})=\'(( |\t)*?)(javascript|vbscript|about):(.*?)\'#i', '$1=""', $text);
+ $text = preg_replace('#\<(\/{0,1})([a-z]{0,2})(frame|applet)(.*?)\>#i', '', $text);
+ return $text;
+}
+
+/*
+ balanceTags
+
+ Balances Tags of string using a modified stack.
+
+ @param text Text to be balanced
+ @return Returns balanced text
+ @author Leonard Lin (leonard@acm.org)
+ @version v1.1
+ @date November 4, 2001
+ @license GPL v2.0
+ @notes
+ @changelog
+ 1.2 ***TODO*** Make better - change loop condition to $text
+ 1.1 Fixed handling of append/stack pop order of end text
+ Added Cleaning Hooks
+ 1.0 First Version
+*/
+function balanceTags($text, $is_comment = 0) {
+ global $use_balanceTags;
+
+ if ($is_comment) {
+ $text = sanitise_html_attributes($text);
+ }
+
+ if ($use_balanceTags == 0) {
+ return $text;
+ }
+
+ $tagstack = array(); $stacksize = 0; $tagqueue = ''; $newtext = '';
+
+ # WP bug fix for comments - in case you REALLY meant to type '< !--'
+ $text = str_replace('< !--', '< !--', $text);
+ # WP bug fix for LOVE <3 (and other situations with '<' before a number)
+ $text = preg_replace('#<([0-9]{1})#', '<$1', $text);
+
+ while (preg_match("/<(\/?\w*)\s*([^>]*)>/",$text,$regex)) {
+ $newtext = $newtext . $tagqueue;
+
+ $i = strpos($text,$regex[0]);
+ $l = strlen($tagqueue) + strlen($regex[0]);
+
+ // clear the shifter
+ $tagqueue = '';
+ // Pop or Push
+ if ($regex[1][0] == "/") { // End Tag
+ $tag = strtolower(substr($regex[1],1));
+ // if too many closing tags
+ if($stacksize <= 0) {
+ $tag = '';
+ //or close to be safe $tag = '/' . $tag;
+ }
+ // if stacktop value = tag close value then pop
+ else if ($tagstack[$stacksize - 1] == $tag) { // found closing tag
+ $tag = '' . $tag . '>'; // Close Tag
+ // Pop
+ array_pop ($tagstack);
+ $stacksize--;
+ } else { // closing tag not at top, search for it
+ for ($j=$stacksize-1;$j>=0;$j--) {
+ if ($tagstack[$j] == $tag) {
+ // add tag to tagqueue
+ for ($k=$stacksize-1;$k>=$j;$k--){
+ $tagqueue .= '' . array_pop ($tagstack) . '>';
+ $stacksize--;
+ }
+ break;
+ }
+ }
+ $tag = '';
+ }
+ } else { // Begin Tag
+ $tag = strtolower($regex[1]);
+
+ // Tag Cleaning
+
+ // Push if not img or br or hr
+ if($tag != 'br' && $tag != 'img' && $tag != 'hr') {
+ $stacksize = array_push ($tagstack, $tag);
+ }
+
+ // Attributes
+ // $attributes = $regex[2];
+ $attributes = $regex[2];
+ if($attributes) {
+ $attributes = ' '.$attributes;
+ }
+ $tag = '<'.$tag.$attributes.'>';
+ }
+ $newtext .= substr($text,0,$i) . $tag;
+ $text = substr($text,$i+$l);
+ }
+
+ // Clear Tag Queue
+ $newtext = $newtext . $tagqueue;
+
+ // Add Remaining text
+ $newtext .= $text;
+
+ // Empty Stack
+ while($x = array_pop($tagstack)) {
+ $newtext = $newtext . '' . $x . '>'; // Add remaining tags to close
+ }
+
+ # WP fix for the bug with HTML comments
+ $newtext = str_replace("< !--","