<?php
/**
 * SimplePie
 *
 * A PHP-Based RSS and Atom Feed Framework.
 * Takes the hard work out of managing a complete RSS/Atom solution.
 *
 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are
 * permitted provided that the following conditions are met:
 *
 * 	* Redistributions of source code must retain the above copyright notice, this list of
 * 	  conditions and the following disclaimer.
 *
 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
 * 	  of conditions and the following disclaimer in the documentation and/or other materials
 * 	  provided with the distribution.
 *
 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
 * 	  to endorse or promote products derived from this software without specific prior
 * 	  written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * @package SimplePie
 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
 * @author Ryan Parman
 * @author Sam Sneddon
 * @author Ryan McCue
 * @link http://simplepie.org/ SimplePie
 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 */


/**
 * HTTP Response Parser
 *
 * @package SimplePie
 * @subpackage HTTP
 */
class SimplePie_HTTP_Parser
{
	/**
	 * HTTP Version
	 *
	 * @var float
	 */
	public $http_version = 0.0;

	/**
	 * Status code
	 *
	 * @var int
	 */
	public $status_code = 0;

	/**
	 * Reason phrase
	 *
	 * @var string
	 */
	public $reason = '';

	/**
	 * Key/value pairs of the headers
	 *
	 * @var array
	 */
	public $headers = array();

	/**
	 * Body of the response
	 *
	 * @var string
	 */
	public $body = '';

	/**
	 * Current state of the state machine
	 *
	 * @var string
	 */
	protected $state = 'http_version';

	/**
	 * Input data
	 *
	 * @var string
	 */
	protected $data = '';

	/**
	 * Input data length (to avoid calling strlen() everytime this is needed)
	 *
	 * @var int
	 */
	protected $data_length = 0;

	/**
	 * Current position of the pointer
	 *
	 * @var int
	 */
	protected $position = 0;

	/**
	 * Name of the hedaer currently being parsed
	 *
	 * @var string
	 */
	protected $name = '';

	/**
	 * Value of the hedaer currently being parsed
	 *
	 * @var string
	 */
	protected $value = '';

	/**
	 * Create an instance of the class with the input data
	 *
	 * @param string $data Input data
	 */
	public function __construct($data)
	{
		$this->data = $data;
		$this->data_length = strlen($this->data);
	}

	/**
	 * Parse the input data
	 *
	 * @return bool true on success, false on failure
	 */
	public function parse()
	{
		while ($this->state && $this->state !== 'emit' && $this->has_data())
		{
			$state = $this->state;
			$this->$state();
		}
		$this->data = '';
		if ($this->state === 'emit' || $this->state === 'body')
		{
			return true;
		}

		$this->http_version = '';
		$this->status_code = '';
		$this->reason = '';
		$this->headers = array();
		$this->body = '';
		return false;
	}

	/**
	 * Check whether there is data beyond the pointer
	 *
	 * @return bool true if there is further data, false if not
	 */
	protected function has_data()
	{
		return (bool) ($this->position < $this->data_length);
	}

	/**
	 * See if the next character is LWS
	 *
	 * @return bool true if the next character is LWS, false if not
	 */
	protected function is_linear_whitespace()
	{
		return (bool) ($this->data[$this->position] === "\x09"
			|| $this->data[$this->position] === "\x20"
			|| ($this->data[$this->position] === "\x0A"
				&& isset($this->data[$this->position + 1])
				&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
	}

	/**
	 * Parse the HTTP version
	 */
	protected function http_version()
	{
		if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
		{
			$len = strspn($this->data, '0123456789.', 5);
			$this->http_version = substr($this->data, 5, $len);
			$this->position += 5 + $len;
			if (substr_count($this->http_version, '.') <= 1)
			{
				$this->http_version = (float) $this->http_version;
				$this->position += strspn($this->data, "\x09\x20", $this->position);
				$this->state = 'status';
			}
			else
			{
				$this->state = false;
			}
		}
		else
		{
			$this->state = false;
		}
	}

	/**
	 * Parse the status code
	 */
	protected function status()
	{
		if ($len = strspn($this->data, '0123456789', $this->position))
		{
			$this->status_code = (int) substr($this->data, $this->position, $len);
			$this->position += $len;
			$this->state = 'reason';
		}
		else
		{
			$this->state = false;
		}
	}

	/**
	 * Parse the reason phrase
	 */
	protected function reason()
	{
		$len = strcspn($this->data, "\x0A", $this->position);
		$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
		$this->position += $len + 1;
		$this->state = 'new_line';
	}

	/**
	 * Deal with a new line, shifting data around as needed
	 */
	protected function new_line()
	{
		$this->value = trim($this->value, "\x0D\x20");
		if ($this->name !== '' && $this->value !== '')
		{
			$this->name = strtolower($this->name);
			// We should only use the last Content-Type header. c.f. issue #1
			if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
			{
				$this->headers[$this->name] .= ', ' . $this->value;
			}
			else
			{
				$this->headers[$this->name] = $this->value;
			}
		}
		$this->name = '';
		$this->value = '';
		if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
		{
			$this->position += 2;
			$this->state = 'body';
		}
		elseif ($this->data[$this->position] === "\x0A")
		{
			$this->position++;
			$this->state = 'body';
		}
		else
		{
			$this->state = 'name';
		}
	}

	/**
	 * Parse a header name
	 */
	protected function name()
	{
		$len = strcspn($this->data, "\x0A:", $this->position);
		if (isset($this->data[$this->position + $len]))
		{
			if ($this->data[$this->position + $len] === "\x0A")
			{
				$this->position += $len;
				$this->state = 'new_line';
			}
			else
			{
				$this->name = substr($this->data, $this->position, $len);
				$this->position += $len + 1;
				$this->state = 'value';
			}
		}
		else
		{
			$this->state = false;
		}
	}

	/**
	 * Parse LWS, replacing consecutive LWS characters with a single space
	 */
	protected function linear_whitespace()
	{
		do
		{
			if (substr($this->data, $this->position, 2) === "\x0D\x0A")
			{
				$this->position += 2;
			}
			elseif ($this->data[$this->position] === "\x0A")
			{
				$this->position++;
			}
			$this->position += strspn($this->data, "\x09\x20", $this->position);
		} while ($this->has_data() && $this->is_linear_whitespace());
		$this->value .= "\x20";
	}

	/**
	 * See what state to move to while within non-quoted header values
	 */
	protected function value()
	{
		if ($this->is_linear_whitespace())
		{
			$this->linear_whitespace();
		}
		else
		{
			switch ($this->data[$this->position])
			{
				case '"':
					// Workaround for ETags: we have to include the quotes as
					// part of the tag.
					if (strtolower($this->name) === 'etag')
					{
						$this->value .= '"';
						$this->position++;
						$this->state = 'value_char';
						break;
					}
					$this->position++;
					$this->state = 'quote';
					break;

				case "\x0A":
					$this->position++;
					$this->state = 'new_line';
					break;

				default:
					$this->state = 'value_char';
					break;
			}
		}
	}

	/**
	 * Parse a header value while outside quotes
	 */
	protected function value_char()
	{
		$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
		$this->value .= substr($this->data, $this->position, $len);
		$this->position += $len;
		$this->state = 'value';
	}

	/**
	 * See what state to move to while within quoted header values
	 */
	protected function quote()
	{
		if ($this->is_linear_whitespace())
		{
			$this->linear_whitespace();
		}
		else
		{
			switch ($this->data[$this->position])
			{
				case '"':
					$this->position++;
					$this->state = 'value';
					break;

				case "\x0A":
					$this->position++;
					$this->state = 'new_line';
					break;

				case '\\':
					$this->position++;
					$this->state = 'quote_escaped';
					break;

				default:
					$this->state = 'quote_char';
					break;
			}
		}
	}

	/**
	 * Parse a header value while within quotes
	 */
	protected function quote_char()
	{
		$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
		$this->value .= substr($this->data, $this->position, $len);
		$this->position += $len;
		$this->state = 'value';
	}

	/**
	 * Parse an escaped character within quotes
	 */
	protected function quote_escaped()
	{
		$this->value .= $this->data[$this->position];
		$this->position++;
		$this->state = 'quote';
	}

	/**
	 * Parse the body
	 */
	protected function body()
	{
		$this->body = substr($this->data, $this->position);
		if (!empty($this->headers['transfer-encoding']))
		{
			unset($this->headers['transfer-encoding']);
			$this->state = 'chunked';
		}
		else
		{
			$this->state = 'emit';
		}
	}

	/**
	 * Parsed a "Transfer-Encoding: chunked" body
	 */
	protected function chunked()
	{
		if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
		{
			$this->state = 'emit';
			return;
		}

		$decoded = '';
		$encoded = $this->body;

		while (true)
		{
			$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
			if (!$is_chunked)
			{
				// Looks like it's not chunked after all
				$this->state = 'emit';
				return;
			}

			$length = hexdec(trim($matches[1]));
			if ($length === 0)
			{
				// Ignore trailer headers
				$this->state = 'emit';
				$this->body = $decoded;
				return;
			}

			$chunk_length = strlen($matches[0]);
			$decoded .= $part = substr($encoded, $chunk_length, $length);
			$encoded = substr($encoded, $chunk_length + $length + 2);

			if (trim($encoded) === '0' || empty($encoded))
			{
				$this->state = 'emit';
				$this->body = $decoded;
				return;
			}
		}
	}

	/**
	 * Prepare headers (take care of proxies headers)
	 *
	 * @param string  $headers Raw headers
	 * @param integer $count   Redirection count. Default to 1.
	 *
	 * @return string
	 */
	static public function prepareHeaders($headers, $count = 1)
	{
		$data = explode("\r\n\r\n", $headers, $count);
		$data = array_pop($data);
		if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n\r\n")) {
			$data = str_ireplace("HTTP/1.0 200 Connection established\r\n\r\n", '', $data);
		}
		if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n\r\n")) {
			$data = str_ireplace("HTTP/1.1 200 Connection established\r\n\r\n", '', $data);
		}
		return $data;
	}
}