Http.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. <?php
  2. namespace PhpXmlRpc\Helper;
  3. use PhpXmlRpc\Exception\HttpException;
  4. use PhpXmlRpc\PhpXmlRpc;
  5. use PhpXmlRpc\Traits\LoggerAware;
  6. class Http
  7. {
  8. use LoggerAware;
  9. /**
  10. * Decode a string that is encoded with "chunked" transfer encoding as defined in RFC 2068 par. 19.4.6.
  11. * Code shamelessly stolen from nusoap library by Dietrich Ayala.
  12. * @internal this function will become protected in the future
  13. *
  14. * @param string $buffer the string to be decoded
  15. * @return string
  16. */
  17. public static function decodeChunked($buffer)
  18. {
  19. // length := 0
  20. $length = 0;
  21. $new = '';
  22. // read chunk-size, chunk-extension (if any) and crlf
  23. // get the position of the linebreak
  24. $chunkEnd = strpos($buffer, "\r\n") + 2;
  25. $temp = substr($buffer, 0, $chunkEnd);
  26. $chunkSize = hexdec(trim($temp));
  27. $chunkStart = $chunkEnd;
  28. while ($chunkSize > 0) {
  29. $chunkEnd = strpos($buffer, "\r\n", $chunkStart + $chunkSize);
  30. // just in case we got a broken connection
  31. if ($chunkEnd == false) {
  32. $chunk = substr($buffer, $chunkStart);
  33. // append chunk-data to entity-body
  34. $new .= $chunk;
  35. $length += strlen($chunk);
  36. break;
  37. }
  38. // read chunk-data and crlf
  39. $chunk = substr($buffer, $chunkStart, $chunkEnd - $chunkStart);
  40. // append chunk-data to entity-body
  41. $new .= $chunk;
  42. // length := length + chunk-size
  43. $length += strlen($chunk);
  44. // read chunk-size and crlf
  45. $chunkStart = $chunkEnd + 2;
  46. $chunkEnd = strpos($buffer, "\r\n", $chunkStart) + 2;
  47. if ($chunkEnd == false) {
  48. break; // just in case we got a broken connection
  49. }
  50. $temp = substr($buffer, $chunkStart, $chunkEnd - $chunkStart);
  51. $chunkSize = hexdec(trim($temp));
  52. $chunkStart = $chunkEnd;
  53. }
  54. return $new;
  55. }
  56. /**
  57. * Parses HTTP an http response's headers and separates them from the body.
  58. *
  59. * @param string $data the http response, headers and body. It will be stripped of headers
  60. * @param bool $headersProcessed when true, we assume that response inflating and dechunking has been already carried out
  61. * @param int $debug when > 0, logs to screen messages detailing info about the parsed data
  62. * @return array with keys 'headers', 'cookies', 'raw_data' and 'status_code'
  63. * @throws HttpException
  64. *
  65. * @todo if $debug is < 0, we could avoid populating 'raw_data' in the returned value - but that would
  66. * be a weird API... (note that we still need to always have headers parsed for content charset)
  67. */
  68. public function parseResponseHeaders(&$data, $headersProcessed = false, $debug = 0)
  69. {
  70. $httpResponse = array('raw_data' => $data, 'headers'=> array(), 'cookies' => array(), 'status_code' => null);
  71. // Support "web-proxy-tunnelling" connections for https through proxies
  72. if (preg_match('/^HTTP\/1\.[0-1] 200 Connection established/', $data)) {
  73. // Look for CR/LF or simple LF as line separator (even though it is not valid http)
  74. $pos = strpos($data, "\r\n\r\n");
  75. if ($pos || is_int($pos)) {
  76. $bd = $pos + 4;
  77. } else {
  78. $pos = strpos($data, "\n\n");
  79. if ($pos || is_int($pos)) {
  80. $bd = $pos + 2;
  81. } else {
  82. // No separation between response headers and body: fault?
  83. $bd = 0;
  84. }
  85. }
  86. if ($bd) {
  87. // this filters out all http headers from proxy. maybe we could take them into account, too?
  88. $data = substr($data, $bd);
  89. } else {
  90. $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': HTTPS via proxy error, tunnel connection possibly failed');
  91. throw new HttpException(PhpXmlRpc::$xmlrpcstr['http_error'] . ' (HTTPS via proxy error, tunnel connection possibly failed)', PhpXmlRpc::$xmlrpcerr['http_error']);
  92. }
  93. }
  94. // Strip HTTP 1.1 100 Continue header if present
  95. while (preg_match('/^HTTP\/1\.1 1[0-9]{2} /', $data)) {
  96. $pos = strpos($data, 'HTTP', 12);
  97. // server sent a Continue header without any (valid) content following...
  98. // give the client a chance to know it
  99. if (!$pos && !is_int($pos)) {
  100. /// @todo this construct works fine in php 3, 4 and 5 - 8; would it not be enough to have !== false now ?
  101. break;
  102. }
  103. $data = substr($data, $pos);
  104. }
  105. // When using Curl to query servers using Digest Auth, we get back a double set of http headers.
  106. // Same when following redirects
  107. // We strip out the 1st...
  108. /// @todo we should let the caller know that there was a redirect involved
  109. if ($headersProcessed && preg_match('/^HTTP\/[0-9](?:\.[0-9])? (?:401|30[1278]) /', $data)) {
  110. if (preg_match('/(\r?\n){2}HTTP\/[0-9](?:\.[0-9])? 200 /', $data)) {
  111. $data = preg_replace('/^HTTP\/[0-9](?:\.[0-9])? (?:401|30[1278]) .+?(?:\r?\n){2}(HTTP\/[0-9.]+ 200 )/s', '$1', $data, 1);
  112. }
  113. }
  114. if (preg_match('/^HTTP\/([0-9](?:\.[0-9])?) ([0-9]{3}) /', $data, $matches)) {
  115. $httpResponse['protocol_version'] = $matches[1];
  116. $httpResponse['status_code'] = $matches[2];
  117. }
  118. if ($httpResponse['status_code'] !== '200') {
  119. $errstr = substr($data, 0, strpos($data, "\n") - 1);
  120. $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': HTTP error, got response: ' . $errstr);
  121. throw new HttpException(PhpXmlRpc::$xmlrpcstr['http_error'] . ' (' . $errstr . ')', PhpXmlRpc::$xmlrpcerr['http_error'], null, $httpResponse['status_code']);
  122. }
  123. // be tolerant to usage of \n instead of \r\n to separate headers and data (even though it is not valid http)
  124. $pos = strpos($data, "\r\n\r\n");
  125. if ($pos || is_int($pos)) {
  126. $bd = $pos + 4;
  127. } else {
  128. $pos = strpos($data, "\n\n");
  129. if ($pos || is_int($pos)) {
  130. $bd = $pos + 2;
  131. } else {
  132. // No separation between response headers and body: fault?
  133. // we could take some action here instead of going on...
  134. $bd = 0;
  135. }
  136. }
  137. // be tolerant to line endings, and extra empty lines
  138. $ar = preg_split("/\r?\n/", trim(substr($data, 0, $pos)));
  139. foreach ($ar as $line) {
  140. // take care of (multi-line) headers and cookies
  141. $arr = explode(':', $line, 2);
  142. if (count($arr) > 1) {
  143. /// @todo according to https://www.rfc-editor.org/rfc/rfc7230#section-3.2.4, we should reject with error
  144. /// 400 any messages where a space is present between the header name and colon
  145. $headerName = strtolower(trim($arr[0]));
  146. if ($headerName == 'set-cookie') {
  147. $cookie = $arr[1];
  148. // glue together all received cookies, using a comma to separate them (same as php does with getallheaders())
  149. if (isset($httpResponse['headers'][$headerName])) {
  150. $httpResponse['headers'][$headerName] .= ', ' . trim($cookie);
  151. } else {
  152. $httpResponse['headers'][$headerName] = trim($cookie);
  153. }
  154. // parse cookie attributes, in case user wants to correctly honour them
  155. // @todo support for server sending multiple time cookie with same name, but using different PATHs
  156. $cookie = explode(';', $cookie);
  157. foreach ($cookie as $pos => $val) {
  158. $val = explode('=', $val, 2);
  159. $tag = trim($val[0]);
  160. $val = isset($val[1]) ? trim($val[1]) : '';
  161. if ($pos === 0) {
  162. $cookieName = $tag;
  163. // if present, we have strip leading and trailing " chars from $val
  164. if (preg_match('/^"(.*)"$/', $val, $matches)) {
  165. $val = $matches[1];
  166. }
  167. $httpResponse['cookies'][$cookieName] = array('value' => urldecode($val));
  168. } else {
  169. $httpResponse['cookies'][$cookieName][$tag] = $val;
  170. }
  171. }
  172. } else {
  173. /// @todo some other headers (the ones that allow a CSV list of values) do allow many values to be
  174. /// passed using multiple header lines.
  175. /// We should add content to $xmlrpc->_xh['headers'][$headerName] instead of replacing it for those...
  176. $httpResponse['headers'][$headerName] = trim($arr[1]);
  177. }
  178. } elseif (isset($headerName)) {
  179. /// @todo improve this: 1. check that the line starts with a space or tab; 2. according to
  180. /// https://www.rfc-editor.org/rfc/rfc7230#section-3.2.4, we should flat out refuse these messages
  181. $httpResponse['headers'][$headerName] .= ' ' . trim($line);
  182. }
  183. }
  184. $data = substr($data, $bd);
  185. if ($debug && count($httpResponse['headers'])) {
  186. $msg = '';
  187. foreach ($httpResponse['headers'] as $header => $value) {
  188. $msg .= "HEADER: $header: $value\n";
  189. }
  190. foreach ($httpResponse['cookies'] as $header => $value) {
  191. $msg .= "COOKIE: $header={$value['value']}\n";
  192. }
  193. $this->getLogger()->debug($msg);
  194. }
  195. // if CURL was used for the call, http headers have been processed, and dechunking + reinflating have been carried out
  196. if (!$headersProcessed) {
  197. // Decode chunked encoding sent by http 1.1 servers
  198. if (isset($httpResponse['headers']['transfer-encoding']) && $httpResponse['headers']['transfer-encoding'] == 'chunked') {
  199. if (!$data = static::decodeChunked($data)) {
  200. $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': errors occurred when trying to rebuild the chunked data received from server');
  201. throw new HttpException(PhpXmlRpc::$xmlrpcstr['dechunk_fail'], PhpXmlRpc::$xmlrpcerr['dechunk_fail'], null, $httpResponse['status_code']);
  202. }
  203. }
  204. // Decode gzip-compressed stuff
  205. // code shamelessly inspired from nusoap library by Dietrich Ayala
  206. if (isset($httpResponse['headers']['content-encoding'])) {
  207. $httpResponse['headers']['content-encoding'] = str_replace('x-', '', $httpResponse['headers']['content-encoding']);
  208. if ($httpResponse['headers']['content-encoding'] == 'deflate' || $httpResponse['headers']['content-encoding'] == 'gzip') {
  209. // if decoding works, use it. else assume data wasn't gzencoded
  210. if (function_exists('gzinflate')) {
  211. if ($httpResponse['headers']['content-encoding'] == 'deflate' && $degzdata = @gzuncompress($data)) {
  212. $data = $degzdata;
  213. if ($debug) {
  214. $this->getLogger()->debug("---INFLATED RESPONSE---[" . strlen($data) . " chars]---\n$data\n---END---");
  215. }
  216. } elseif ($httpResponse['headers']['content-encoding'] == 'gzip' && $degzdata = @gzinflate(substr($data, 10))) {
  217. $data = $degzdata;
  218. if ($debug) {
  219. $this->getLogger()->debug("---INFLATED RESPONSE---[" . strlen($data) . " chars]---\n$data\n---END---");
  220. }
  221. } else {
  222. $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': errors occurred when trying to decode the deflated data received from server');
  223. throw new HttpException(PhpXmlRpc::$xmlrpcstr['decompress_fail'], PhpXmlRpc::$xmlrpcerr['decompress_fail'], null, $httpResponse['status_code']);
  224. }
  225. } else {
  226. $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': the server sent deflated data. Your php install must have the Zlib extension compiled in to support this.');
  227. throw new HttpException(PhpXmlRpc::$xmlrpcstr['cannot_decompress'], PhpXmlRpc::$xmlrpcerr['cannot_decompress'], null, $httpResponse['status_code']);
  228. }
  229. }
  230. }
  231. } // end of 'if needed, de-chunk, re-inflate response'
  232. return $httpResponse;
  233. }
  234. /**
  235. * Parses one of the http headers which can have a list of values with quality param.
  236. * @see https://www.rfc-editor.org/rfc/rfc7231#section-5.3.1
  237. *
  238. * @param string $header
  239. * @return string[]
  240. */
  241. public function parseAcceptHeader($header)
  242. {
  243. $accepted = array();
  244. foreach(explode(',', $header) as $c) {
  245. if (preg_match('/^([^;]+); *q=([0-9.]+)/', $c, $matches)) {
  246. $c = $matches[1];
  247. $w = $matches[2];
  248. } else {
  249. $c = preg_replace('/;.*/', '', $c);
  250. $w = 1;
  251. }
  252. $accepted[(trim($c))] = $w;
  253. }
  254. arsort($accepted);
  255. return array_keys($accepted);
  256. }
  257. }