diff --git a/wp-includes/class-snoopy.php b/wp-includes/class-snoopy.php index 9a9ac16104..89c3594bd1 100644 --- a/wp-includes/class-snoopy.php +++ b/wp-includes/class-snoopy.php @@ -5,7 +5,7 @@ Snoopy - the PHP net client Author: Monte Ohrt Copyright (c): 1999-2000 ispi, all rights reserved -Version: 1.0 +Version: 1.01 * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -31,7 +31,7 @@ CTO, ispi Lincoln, NE 68510 The latest version of Snoopy can be obtained from: -http://snoopy.sourceforge.net +http://snoopy.sourceforge.net/ *************************************************/ @@ -46,7 +46,10 @@ class Snoopy var $port = 80; // port we are connecting to var $proxy_host = ""; // proxy host to use var $proxy_port = ""; // proxy port to use - var $agent = "Snoopy v1.0"; // agent we masquerade as + var $proxy_user = ""; // proxy user to use + var $proxy_pass = ""; // proxy password to use + + var $agent = "Snoopy v1.2.3"; // agent we masquerade as var $referer = ""; // referer info to pass var $cookies = array(); // array of cookies to pass // $cookies["username"]="joe"; @@ -59,7 +62,7 @@ class Snoopy var $maxframes = 0; // frame content depth maximum. 0 = disallow var $expandlinks = true; // expand links to fully qualified URLs. // this only applies to fetchlinks() - // or submitlinks() + // submitlinks(), and submittext() var $passcookies = true; // pass set cookies back through redirects // NOTE: this currently does not respect // dates, domains or paths. @@ -81,8 +84,12 @@ class Snoopy // set to 0 to disallow timeouts var $timed_out = false; // if a read operation timed out var $status = 0; // http request status - - var $curl_path = "/usr/bin/curl"; + + var $temp_dir = "/tmp"; // temporary directory that the webserver + // has permission to write to. + // under Windows, this should be C:\temp + + var $curl_path = "/usr/local/bin/curl"; // Snoopy will use cURL for fetching // SSL content if a full system path to // the cURL binary is supplied here. @@ -94,9 +101,6 @@ class Snoopy // as these functions are not stable // as of this Snoopy release. - // send Accept-encoding: gzip? - var $use_gzip = true; - /**** Private variables ****/ var $_maxlinelen = 4096; // max line length (headers) @@ -132,8 +136,12 @@ class Snoopy $this->user = $URI_PARTS["user"]; if (!empty($URI_PARTS["pass"])) $this->pass = $URI_PARTS["pass"]; + if (empty($URI_PARTS["query"])) + $URI_PARTS["query"] = ''; + if (empty($URI_PARTS["path"])) + $URI_PARTS["path"] = ''; - switch($URI_PARTS["scheme"]) + switch(strtolower($URI_PARTS["scheme"])) { case "http": $this->host = $URI_PARTS["host"]; @@ -148,7 +156,7 @@ class Snoopy } else { - $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); + $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); // no proxy, send only the path $this->_httprequest($path, $fp, $URI, $this->_httpmethod); } @@ -195,10 +203,11 @@ class Snoopy return true; break; case "https": - if(!$this->curl_path || (!is_executable($this->curl_path))) { - $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; + if(!$this->curl_path) return false; - } + if(function_exists("is_executable")) + if (!is_executable($this->curl_path)) + return false; $this->host = $URI_PARTS["host"]; if(!empty($URI_PARTS["port"])) $this->port = $URI_PARTS["port"]; @@ -257,7 +266,346 @@ class Snoopy return true; } +/*======================================================================*\ + Function: submit + Purpose: submit an http form + Input: $URI the location to post the data + $formvars the formvars to use. + format: $formvars["var"] = "val"; + $formfiles an array of files to submit + format: $formfiles["var"] = "/dir/filename.ext"; + Output: $this->results the text output from the post +\*======================================================================*/ + function submit($URI, $formvars="", $formfiles="") + { + unset($postdata); + + $postdata = $this->_prepare_post_body($formvars, $formfiles); + + $URI_PARTS = parse_url($URI); + if (!empty($URI_PARTS["user"])) + $this->user = $URI_PARTS["user"]; + if (!empty($URI_PARTS["pass"])) + $this->pass = $URI_PARTS["pass"]; + if (empty($URI_PARTS["query"])) + $URI_PARTS["query"] = ''; + if (empty($URI_PARTS["path"])) + $URI_PARTS["path"] = ''; + + switch(strtolower($URI_PARTS["scheme"])) + { + case "http": + $this->host = $URI_PARTS["host"]; + if(!empty($URI_PARTS["port"])) + $this->port = $URI_PARTS["port"]; + if($this->_connect($fp)) + { + if($this->_isproxy) + { + // using proxy, send entire URI + $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata); + } + else + { + $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); + // no proxy, send only the path + $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); + } + + $this->_disconnect($fp); + + if($this->_redirectaddr) + { + /* url was redirected, check if we've hit the max depth */ + if($this->maxredirs > $this->_redirectdepth) + { + if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) + $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); + + // only follow redirect if it's on this site, or offsiteok is true + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) + { + /* follow the redirect */ + $this->_redirectdepth++; + $this->lastredirectaddr=$this->_redirectaddr; + if( strpos( $this->_redirectaddr, "?" ) > 0 ) + $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get + else + $this->submit($this->_redirectaddr,$formvars, $formfiles); + } + } + } + + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) + { + $frameurls = $this->_frameurls; + $this->_frameurls = array(); + + while(list(,$frameurl) = each($frameurls)) + { + if($this->_framedepth < $this->maxframes) + { + $this->fetch($frameurl); + $this->_framedepth++; + } + else + break; + } + } + + } + else + { + return false; + } + return true; + break; + case "https": + if(!$this->curl_path) + return false; + if(function_exists("is_executable")) + if (!is_executable($this->curl_path)) + return false; + $this->host = $URI_PARTS["host"]; + if(!empty($URI_PARTS["port"])) + $this->port = $URI_PARTS["port"]; + if($this->_isproxy) + { + // using proxy, send entire URI + $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); + } + else + { + $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); + // no proxy, send only the path + $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); + } + + if($this->_redirectaddr) + { + /* url was redirected, check if we've hit the max depth */ + if($this->maxredirs > $this->_redirectdepth) + { + if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) + $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]); + + // only follow redirect if it's on this site, or offsiteok is true + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) + { + /* follow the redirect */ + $this->_redirectdepth++; + $this->lastredirectaddr=$this->_redirectaddr; + if( strpos( $this->_redirectaddr, "?" ) > 0 ) + $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get + else + $this->submit($this->_redirectaddr,$formvars, $formfiles); + } + } + } + + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) + { + $frameurls = $this->_frameurls; + $this->_frameurls = array(); + + while(list(,$frameurl) = each($frameurls)) + { + if($this->_framedepth < $this->maxframes) + { + $this->fetch($frameurl); + $this->_framedepth++; + } + else + break; + } + } + return true; + break; + + default: + // not a valid protocol + $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; + return false; + break; + } + return true; + } + +/*======================================================================*\ + Function: fetchlinks + Purpose: fetch the links from a web page + Input: $URI where you are fetching from + Output: $this->results an array of the URLs +\*======================================================================*/ + + function fetchlinks($URI) + { + if ($this->fetch($URI)) + { + if($this->lastredirectaddr) + $URI = $this->lastredirectaddr; + if(is_array($this->results)) + { + for($x=0;$xresults);$x++) + $this->results[$x] = $this->_striplinks($this->results[$x]); + } + else + $this->results = $this->_striplinks($this->results); + + if($this->expandlinks) + $this->results = $this->_expandlinks($this->results, $URI); + return true; + } + else + return false; + } + +/*======================================================================*\ + Function: fetchform + Purpose: fetch the form elements from a web page + Input: $URI where you are fetching from + Output: $this->results the resulting html form +\*======================================================================*/ + + function fetchform($URI) + { + + if ($this->fetch($URI)) + { + + if(is_array($this->results)) + { + for($x=0;$xresults);$x++) + $this->results[$x] = $this->_stripform($this->results[$x]); + } + else + $this->results = $this->_stripform($this->results); + + return true; + } + else + return false; + } + + +/*======================================================================*\ + Function: fetchtext + Purpose: fetch the text from a web page, stripping the links + Input: $URI where you are fetching from + Output: $this->results the text from the web page +\*======================================================================*/ + + function fetchtext($URI) + { + if($this->fetch($URI)) + { + if(is_array($this->results)) + { + for($x=0;$xresults);$x++) + $this->results[$x] = $this->_striptext($this->results[$x]); + } + else + $this->results = $this->_striptext($this->results); + return true; + } + else + return false; + } + +/*======================================================================*\ + Function: submitlinks + Purpose: grab links from a form submission + Input: $URI where you are submitting from + Output: $this->results an array of the links from the post +\*======================================================================*/ + + function submitlinks($URI, $formvars="", $formfiles="") + { + if($this->submit($URI,$formvars, $formfiles)) + { + if($this->lastredirectaddr) + $URI = $this->lastredirectaddr; + if(is_array($this->results)) + { + for($x=0;$xresults);$x++) + { + $this->results[$x] = $this->_striplinks($this->results[$x]); + if($this->expandlinks) + $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); + } + } + else + { + $this->results = $this->_striplinks($this->results); + if($this->expandlinks) + $this->results = $this->_expandlinks($this->results,$URI); + } + return true; + } + else + return false; + } + +/*======================================================================*\ + Function: submittext + Purpose: grab text from a form submission + Input: $URI where you are submitting from + Output: $this->results the text from the web page +\*======================================================================*/ + + function submittext($URI, $formvars = "", $formfiles = "") + { + if($this->submit($URI,$formvars, $formfiles)) + { + if($this->lastredirectaddr) + $URI = $this->lastredirectaddr; + if(is_array($this->results)) + { + for($x=0;$xresults);$x++) + { + $this->results[$x] = $this->_striptext($this->results[$x]); + if($this->expandlinks) + $this->results[$x] = $this->_expandlinks($this->results[$x],$URI); + } + } + else + { + $this->results = $this->_striptext($this->results); + if($this->expandlinks) + $this->results = $this->_expandlinks($this->results,$URI); + } + return true; + } + else + return false; + } + + + +/*======================================================================*\ + Function: set_submit_multipart + Purpose: Set the form submission content type to + multipart/form-data +\*======================================================================*/ + function set_submit_multipart() + { + $this->_submit_type = "multipart/form-data"; + } + + +/*======================================================================*\ + Function: set_submit_normal + Purpose: Set the form submission content type to + application/x-www-form-urlencoded +\*======================================================================*/ + function set_submit_normal() + { + $this->_submit_type = "application/x-www-form-urlencoded"; + } + + + /*======================================================================*\ Private functions @@ -273,7 +621,7 @@ class Snoopy function _striplinks($document) { - preg_match_all("'<\s*a\s+.*href\s*=\s* # find ]+)) # if quote found, match up to next matching # quote, otherwise match up to next space @@ -335,16 +683,27 @@ class Snoopy $search = array("']*?>.*?'si", // strip out javascript "'<[\/\!]*?[^<>]*?>'si", // strip out html tags "'([\r\n])[\s]+'", // strip out white space - "'&(quote|#34);'i", // replace html entities - "'&(amp|#38);'i", - "'&(lt|#60);'i", - "'&(gt|#62);'i", - "'&(nbsp|#160);'i", + "'&(quot|#34|#034|#x22);'i", // replace html entities + "'&(amp|#38|#038|#x26);'i", // added hexadecimal values + "'&(lt|#60|#060|#x3c);'i", + "'&(gt|#62|#062|#x3e);'i", + "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", - "'&(copy|#169);'i" - ); + "'&(copy|#169);'i", + "'&(reg|#174);'i", + "'&(deg|#176);'i", + "'&(#39|#039|#x27);'", + "'&(euro|#8364);'i", // europe + "'&a(uml|UML);'", // german + "'&o(uml|UML);'", + "'&u(uml|UML);'", + "'&A(uml|UML);'", + "'&O(uml|UML);'", + "'&U(uml|UML);'", + "'ß'i", + ); $replace = array( "", "", "\\1", @@ -356,7 +715,19 @@ class Snoopy chr(161), chr(162), chr(163), - chr(169)); + chr(169), + chr(174), + chr(176), + chr(39), + chr(128), + "ä", + "ö", + "ü", + "Ä", + "Ö", + "Ü", + "ß", + ); $text = preg_replace($search,$replace,$document); @@ -377,14 +748,20 @@ class Snoopy preg_match("/^[^\?]+/",$URI,$match); $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); + $match = preg_replace("|/$|","",$match); + $match_part = parse_url($match); + $match_root = + $match_part["scheme"]."://".$match_part["host"]; $search = array( "|^http://".preg_quote($this->host)."|i", - "|^(?!http://)(\/)?(?!mailto:)|i", + "|^(\/)|i", + "|^(?!http://)(?!mailto:)|i", "|/\./|", "|/[^\/]+/\.\./|" ); $replace = array( "", + $match_root."/", $match."/", "/", "/" @@ -407,6 +784,7 @@ class Snoopy function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") { + $cookie_headers = ''; if($this->passcookies && $this->_redirectaddr) $this->setcookies(); @@ -416,25 +794,14 @@ class Snoopy $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; if(!empty($this->agent)) $headers .= "User-Agent: ".$this->agent."\r\n"; - if(!empty($this->host) && !isset($this->rawheaders['Host'])) - $headers .= "Host: ".$this->host."\r\n"; + if(!empty($this->host) && !isset($this->rawheaders['Host'])) { + $headers .= "Host: ".$this->host; + if(!empty($this->port)) + $headers .= ":".$this->port; + $headers .= "\r\n"; + } if(!empty($this->accept)) $headers .= "Accept: ".$this->accept."\r\n"; - - if($this->use_gzip) { - // make sure PHP was built with --with-zlib - // and we can handle gzipp'ed data - if ( function_exists(gzinflate) ) { - $headers .= "Accept-encoding: gzip\r\n"; - } - else { - trigger_error( - "use_gzip is on, but PHP was built without zlib support.". - " Requesting file(s) without gzip encoding.", - E_USER_NOTICE); - } - } - if(!empty($this->referer)) $headers .= "Referer: ".$this->referer."\r\n"; if(!empty($this->cookies)) @@ -467,7 +834,12 @@ class Snoopy if(!empty($body)) $headers .= "Content-length: ".strlen($body)."\r\n"; if(!empty($this->user) || !empty($this->pass)) - $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; + $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; + + //add proxy auth headers + if(!empty($this->proxy_user)) + $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n"; + $headers .= "\r\n"; @@ -480,9 +852,6 @@ class Snoopy $this->_redirectaddr = false; unset($this->headers); - - // content was returned gzip encoded? - $is_gzipped = false; while($currentHeader = fgets($fp,$this->_maxlinelen)) { @@ -492,15 +861,14 @@ class Snoopy return false; } - // if($currentHeader == "\r\n") - if(preg_match("/^\r?\n$/", $currentHeader) ) - break; + if($currentHeader == "\r\n") + break; // if a header begins with Location: or URI:, set the redirect if(preg_match("/^(Location:|URI:)/i",$currentHeader)) { // get URL portion of the redirect - preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); + preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches); // look for :// in the Location header to see if hostname is included if(!preg_match("|\:\/\/|",$matches[2])) { @@ -524,31 +892,19 @@ class Snoopy } $this->response_code = $currentHeader; } - - if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { - $is_gzipped = true; - } - + $this->headers[] = $currentHeader; } - # $results = fread($fp, $this->maxlength); - $results = ""; - while ( $data = fread($fp, $this->maxlength) ) { - $results .= $data; - if ( - strlen($results) > $this->maxlength ) { - break; - } - } - - // gunzip - if ( $is_gzipped ) { - // per http://www.php.net/manual/en/function.gzencode.php - $results = substr($results, 10); - $results = gzinflate($results); - } - + $results = ''; + do { + $_data = fread($fp, $this->maxlength); + if (strlen($_data) == 0) { + break; + } + $results .= $_data; + } while(true); + if ($this->read_timeout > 0 && $this->_check_timeout($fp)) { $this->status=-100; @@ -557,7 +913,8 @@ class Snoopy // check if there is a a redirect meta tag - if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) + if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) + { $this->_redirectaddr = $this->_expandlinks($match[1],$URI); } @@ -603,7 +960,10 @@ class Snoopy if(!empty($this->agent)) $headers[] = "User-Agent: ".$this->agent; if(!empty($this->host)) - $headers[] = "Host: ".$this->host; + if(!empty($this->port)) + $headers[] = "Host: ".$this->host.":".$this->port; + else + $headers[] = "Host: ".$this->host; if(!empty($this->accept)) $headers[] = "Accept: ".$this->accept; if(!empty($this->referer)) @@ -640,8 +1000,10 @@ class Snoopy if(!empty($this->user) || !empty($this->pass)) $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); - for($curr_header = 0; $curr_header < count($headers); $curr_header++) - $cmdline_params .= " -H \"".$headers[$curr_header]."\""; + for($curr_header = 0; $curr_header < count($headers); $curr_header++) { + $safer_header = strtr( $headers[$curr_header], "\"", " " ); + $cmdline_params .= " -H \"".$safer_header."\""; + } if(!empty($body)) $cmdline_params .= " -d \"$body\""; @@ -649,11 +1011,10 @@ class Snoopy if($this->read_timeout > 0) $cmdline_params .= " -m ".$this->read_timeout; - $headerfile = uniqid(time()); - - # accept self-signed certs - $cmdline_params .= " -k"; - exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return); + $headerfile = tempnam($temp_dir, "sno"); + + $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access + exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return); if($return) { @@ -664,7 +1025,7 @@ class Snoopy $results = implode("\r\n",$results); - $result_headers = file("/tmp/$headerfile"); + $result_headers = file("$headerfile"); $this->_redirectaddr = false; unset($this->headers); @@ -676,7 +1037,7 @@ class Snoopy if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) { // get URL portion of the redirect - preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); + preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches); // look for :// in the Location header to see if hostname is included if(!preg_match("|\:\/\/|",$matches[2])) { @@ -693,19 +1054,14 @@ class Snoopy } if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) - { - $this->response_code = $result_headers[$currentHeader]; - if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) - { - $this->status= $match[1]; - } - } + $this->response_code = $result_headers[$currentHeader]; + $this->headers[] = $result_headers[$currentHeader]; } // check if there is a a redirect meta tag - if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) + if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) { $this->_redirectaddr = $this->_expandlinks($match[1],$URI); } @@ -724,7 +1080,7 @@ class Snoopy else $this->results = $results; - unlink("/tmp/$headerfile"); + unlink("$headerfile"); return true; } @@ -738,8 +1094,8 @@ class Snoopy { for($x=0; $xheaders); $x++) { - if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) - $this->cookies[$match[1]] = $match[2]; + if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match)) + $this->cookies[$match[1]] = urldecode($match[2]); } } @@ -773,6 +1129,7 @@ class Snoopy if(!empty($this->proxy_host) && !empty($this->proxy_port)) { $this->_isproxy = true; + $host = $this->proxy_host; $port = $this->proxy_port; } @@ -838,6 +1195,7 @@ class Snoopy { settype($formvars, "array"); settype($formfiles, "array"); + $postdata = ''; if (count($formvars) == 0 && count($formfiles) == 0) return; @@ -898,4 +1256,4 @@ class Snoopy } endif; -?> \ No newline at end of file +?>