ffc095b66aca1f1e6ffea01ee5ef34066c7ff19b
[friendica.git/.git] / src / Network / HTTPRequest.php
1 <?php
2 /**
3  * @copyright Copyright (C) 2020, Friendica
4  *
5  * @license GNU APGL version 3 or any later version
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Affero General Public License as
9  * published by the Free Software Foundation, either version 3 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Affero General Public License for more details.
16  *
17  * You should have received a copy of the GNU Affero General Public License
18  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  */
21
22 namespace Friendica\Network;
23
24 use DOMDocument;
25 use DomXPath;
26 use Friendica\App;
27 use Friendica\Core\Config\IConfig;
28 use Friendica\Core\System;
29 use Friendica\Util\Network;
30 use Friendica\Util\Profiler;
31 use GuzzleHttp\Client;
32 use GuzzleHttp\Exception\RequestException;
33 use Psr\Http\Message\RequestInterface;
34 use Psr\Http\Message\ResponseInterface;
35 use Psr\Http\Message\UriInterface;
36 use Psr\Log\LoggerInterface;
37
38 /**
39  * Performs HTTP requests to a given URL
40  */
41 class HTTPRequest implements IHTTPRequest
42 {
43         /** @var LoggerInterface */
44         private $logger;
45         /** @var Profiler */
46         private $profiler;
47         /** @var IConfig */
48         private $config;
49         /** @var string */
50         private $baseUrl;
51
52         public function __construct(LoggerInterface $logger, Profiler $profiler, IConfig $config, App\BaseURL $baseUrl)
53         {
54                 $this->logger   = $logger;
55                 $this->profiler = $profiler;
56                 $this->config   = $config;
57                 $this->baseUrl  = $baseUrl->get();
58         }
59
60         /**
61          * {@inheritDoc}
62          */
63         public function get(string $url, bool $binary = false, array $opts = [])
64         {
65                 $stamp1 = microtime(true);
66
67                 if (strlen($url) > 1000) {
68                         $this->logger->debug('URL is longer than 1000 characters.', ['url' => $url, 'callstack' => System::callstack(20)]);
69                         return CurlResult::createErrorCurl(substr($url, 0, 200));
70                 }
71
72                 $parts2     = [];
73                 $parts      = parse_url($url);
74                 $path_parts = explode('/', $parts['path'] ?? '');
75                 foreach ($path_parts as $part) {
76                         if (strlen($part) <> mb_strlen($part)) {
77                                 $parts2[] = rawurlencode($part);
78                         } else {
79                                 $parts2[] = $part;
80                         }
81                 }
82                 $parts['path'] = implode('/', $parts2);
83                 $url           = Network::unparseURL($parts);
84
85                 if (Network::isUrlBlocked($url)) {
86                         $this->logger->info('Domain is blocked.', ['url' => $url]);
87                         return CurlResult::createErrorCurl($url);
88                 }
89
90                 $curlOptions = [];
91
92                 $curlOptions[CURLOPT_HEADER] = true;
93
94                 if (!empty($opts['cookiejar'])) {
95                         $curlOptions[CURLOPT_COOKIEJAR] = $opts["cookiejar"];
96                         $curlOptions[CURLOPT_COOKIEFILE] = $opts["cookiejar"];
97                 }
98
99                 // These settings aren't needed. We're following the location already.
100                 //      $curlOptions[CURLOPT_FOLLOWLOCATION] =true;
101                 //      $curlOptions[CURLOPT_MAXREDIRS] = 5;
102
103                 if (!empty($opts['accept_content'])) {
104                         if (empty($curlOptions[CURLOPT_HTTPHEADER])) {
105                                 $curlOptions[CURLOPT_HTTPHEADER] = [];
106                         }
107                         array_push($curlOptions[CURLOPT_HTTPHEADER], 'Accept: ' . $opts['accept_content']);
108                 }
109
110                 if (!empty($opts['header'])) {
111                         if (empty($curlOptions[CURLOPT_HTTPHEADER])) {
112                                 $curlOptions[CURLOPT_HTTPHEADER] = [];
113                         }
114                         $curlOptions[CURLOPT_HTTPHEADER] = array_merge($opts['header'], $curlOptions[CURLOPT_HTTPHEADER]);
115                 }
116
117                 $curlOptions[CURLOPT_RETURNTRANSFER] = true;
118                 $curlOptions[CURLOPT_USERAGENT] = $this->getUserAgent();
119
120                 $range = intval($this->config->get('system', 'curl_range_bytes', 0));
121
122                 if ($range > 0) {
123                         $curlOptions[CURLOPT_RANGE] = '0-' . $range;
124                 }
125
126                 // Without this setting it seems as if some webservers send compressed content
127                 // This seems to confuse curl so that it shows this uncompressed.
128                 /// @todo  We could possibly set this value to "gzip" or something similar
129                 $curlOptions[CURLOPT_ENCODING] = '';
130
131                 if (!empty($opts['headers'])) {
132                         if (empty($curlOptions[CURLOPT_HTTPHEADER])) {
133                                 $curlOptions[CURLOPT_HTTPHEADER] = [];
134                         }
135                         $curlOptions[CURLOPT_HTTPHEADER] = array_merge($opts['headers'], $curlOptions[CURLOPT_HTTPHEADER]);
136                 }
137
138                 if (!empty($opts['nobody'])) {
139                         $curlOptions[CURLOPT_NOBODY] = $opts['nobody'];
140                 }
141
142                 $curlOptions[CURLOPT_CONNECTTIMEOUT] = 10;
143
144                 if (!empty($opts['timeout'])) {
145                         $curlOptions[CURLOPT_TIMEOUT] = $opts['timeout'];
146                 } else {
147                         $curl_time = $this->config->get('system', 'curl_timeout', 60);
148                         $curlOptions[CURLOPT_TIMEOUT] = intval($curl_time);
149                 }
150
151                 // by default we will allow self-signed certs
152                 // but you can override this
153
154                 $check_cert = $this->config->get('system', 'verifyssl');
155                 $curlOptions[CURLOPT_SSL_VERIFYPEER] = ($check_cert) ? true : false;
156
157                 if ($check_cert) {
158                         $curlOptions[CURLOPT_SSL_VERIFYHOST] = 2;
159                 }
160
161                 $proxy = $this->config->get('system', 'proxy');
162
163                 if (!empty($proxy)) {
164                         $curlOptions[CURLOPT_HTTPPROXYTUNNEL] = 1;
165                         $curlOptions[CURLOPT_PROXY] = $proxy;
166                         $proxyuser = $this->config->get('system', 'proxyuser');
167
168                         if (!empty($proxyuser)) {
169                                 $curlOptions[CURLOPT_PROXYUSERPWD] = $proxyuser;
170                         }
171                 }
172
173                 if ($this->config->get('system', 'ipv4_resolve', false)) {
174                         $curlOptions[CURLOPT_IPRESOLVE] = CURL_IPRESOLVE_V4;
175                 }
176
177                 if ($binary) {
178                         $curlOptions[CURLOPT_BINARYTRANSFER] = 1;
179                 }
180
181                 $onRedirect = function(
182                         RequestInterface $request,
183                         ResponseInterface $response,
184                         UriInterface $uri
185                 ) {
186                         $this->logger->notice('Curl redirect.', ['url' => $request->getUri(), 'to' => $uri]);
187                 };
188
189                 $client = new Client([
190                         'allow_redirect' => [
191                                 'max' => 8,
192                                 'on_redirect' => $onRedirect,
193                                 'track_redirect' => true,
194                                 'strict' => true,
195                                 'referer' => true,
196                         ],
197                         'on_headers' => $onHeaders,
198                         'sink' => tempnam(get_temppath(), 'guzzle'),
199                         'curl' => $curlOptions
200                 ]);
201
202                 try {
203                         $response = $client->get($url);
204                         return new GuzzleResponse($response, $url);
205                 } catch (RequestException $exception) {
206                         if ($exception->hasResponse()) {
207                                 return new GuzzleResponse($exception->getResponse(), $url, $exception->getCode(), $exception->getMessage());
208                         } else {
209                                 return new CurlResult($url, '', ['http_code' => $exception->getCode()], $exception->getCode(), $exception->getMessage());
210                         }
211                 } finally {
212                         $this->profiler->saveTimestamp($stamp1, 'network');
213                 }
214         }
215
216         /**
217          * {@inheritDoc}
218          *
219          * @param int $redirects The recursion counter for internal use - default 0
220          *
221          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
222          */
223         public function post(string $url, $params, array $headers = [], int $timeout = 0, int &$redirects = 0)
224         {
225                 $stamp1 = microtime(true);
226
227                 if (Network::isUrlBlocked($url)) {
228                         $this->logger->info('Domain is blocked.' . ['url' => $url]);
229                         return CurlResult::createErrorCurl($url);
230                 }
231
232                 $ch = curl_init($url);
233
234                 if (($redirects > 8) || (!$ch)) {
235                         return CurlResult::createErrorCurl($url);
236                 }
237
238                 $this->logger->debug('Post_url: start.', ['url' => $url]);
239
240                 curl_setopt($ch, CURLOPT_HEADER, true);
241                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
242                 curl_setopt($ch, CURLOPT_POST, 1);
243                 curl_setopt($ch, CURLOPT_POSTFIELDS, $params);
244                 curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
245
246                 if ($this->config->get('system', 'ipv4_resolve', false)) {
247                         curl_setopt($ch, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
248                 }
249
250                 @curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
251
252                 if (intval($timeout)) {
253                         curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
254                 } else {
255                         $curl_time = $this->config->get('system', 'curl_timeout', 60);
256                         curl_setopt($ch, CURLOPT_TIMEOUT, intval($curl_time));
257                 }
258
259                 if (!empty($headers)) {
260                         curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
261                 }
262
263                 $check_cert = $this->config->get('system', 'verifyssl');
264                 curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, (($check_cert) ? true : false));
265
266                 if ($check_cert) {
267                         @curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
268                 }
269
270                 $proxy = $this->config->get('system', 'proxy');
271
272                 if (!empty($proxy)) {
273                         curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1);
274                         curl_setopt($ch, CURLOPT_PROXY, $proxy);
275                         $proxyuser = $this->config->get('system', 'proxyuser');
276                         if (!empty($proxyuser)) {
277                                 curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);
278                         }
279                 }
280
281                 // don't let curl abort the entire application
282                 // if it throws any errors.
283
284                 $s = @curl_exec($ch);
285
286                 $curl_info = curl_getinfo($ch);
287
288                 $curlResponse = new CurlResult($url, $s, $curl_info, curl_errno($ch), curl_error($ch));
289
290                 if (!Network::isRedirectBlocked($url) && $curlResponse->isRedirectUrl()) {
291                         $redirects++;
292                         $this->logger->info('Post redirect.', ['url' => $url, 'to' => $curlResponse->getRedirectUrl()]);
293                         curl_close($ch);
294                         return $this->post($curlResponse->getRedirectUrl(), $params, $headers, $redirects, $timeout);
295                 }
296
297                 curl_close($ch);
298
299                 $this->profiler->saveTimestamp($stamp1, 'network');
300
301                 // Very old versions of Lighttpd don't like the "Expect" header, so we remove it when needed
302                 if ($curlResponse->getReturnCode() == 417) {
303                         $redirects++;
304
305                         if (empty($headers)) {
306                                 $headers = ['Expect:'];
307                         } else {
308                                 if (!in_array('Expect:', $headers)) {
309                                         array_push($headers, 'Expect:');
310                                 }
311                         }
312                         $this->logger->info('Server responds with 417, applying workaround', ['url' => $url]);
313                         return $this->post($url, $params, $headers, $redirects, $timeout);
314                 }
315
316                 $this->logger->debug('Post_url: End.', ['url' => $url]);
317
318                 return $curlResponse;
319         }
320
321         /**
322          * {@inheritDoc}
323          */
324         public function finalUrl(string $url, int $depth = 1, bool $fetchbody = false)
325         {
326                 if (Network::isUrlBlocked($url)) {
327                         $this->logger->info('Domain is blocked.', ['url' => $url]);
328                         return $url;
329                 }
330
331                 if (Network::isRedirectBlocked($url)) {
332                         $this->logger->info('Domain should not be redirected.', ['url' => $url]);
333                         return $url;
334                 }
335
336                 $url = Network::stripTrackingQueryParams($url);
337
338                 if ($depth > 10) {
339                         return $url;
340                 }
341
342                 $url = trim($url, "'");
343
344                 $stamp1 = microtime(true);
345
346                 $ch = curl_init();
347                 curl_setopt($ch, CURLOPT_URL, $url);
348                 curl_setopt($ch, CURLOPT_HEADER, 1);
349                 curl_setopt($ch, CURLOPT_NOBODY, 1);
350                 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
351                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
352                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
353                 curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
354
355                 curl_exec($ch);
356                 $curl_info = @curl_getinfo($ch);
357                 $http_code = $curl_info['http_code'];
358                 curl_close($ch);
359
360                 $this->profiler->saveTimestamp($stamp1, "network");
361
362                 if ($http_code == 0) {
363                         return $url;
364                 }
365
366                 if (in_array($http_code, ['301', '302'])) {
367                         if (!empty($curl_info['redirect_url'])) {
368                                 return $this->finalUrl($curl_info['redirect_url'], ++$depth, $fetchbody);
369                         } elseif (!empty($curl_info['location'])) {
370                                 return $this->finalUrl($curl_info['location'], ++$depth, $fetchbody);
371                         }
372                 }
373
374                 // Check for redirects in the meta elements of the body if there are no redirects in the header.
375                 if (!$fetchbody) {
376                         return $this->finalUrl($url, ++$depth, true);
377                 }
378
379                 // if the file is too large then exit
380                 if ($curl_info["download_content_length"] > 1000000) {
381                         return $url;
382                 }
383
384                 // if it isn't a HTML file then exit
385                 if (!empty($curl_info["content_type"]) && !strstr(strtolower($curl_info["content_type"]), "html")) {
386                         return $url;
387                 }
388
389                 $stamp1 = microtime(true);
390
391                 $ch = curl_init();
392                 curl_setopt($ch, CURLOPT_URL, $url);
393                 curl_setopt($ch, CURLOPT_HEADER, 0);
394                 curl_setopt($ch, CURLOPT_NOBODY, 0);
395                 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
396                 curl_setopt($ch, CURLOPT_TIMEOUT, 10);
397                 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
398                 curl_setopt($ch, CURLOPT_USERAGENT, $this->getUserAgent());
399
400                 $body = curl_exec($ch);
401                 curl_close($ch);
402
403                 $this->profiler->saveTimestamp($stamp1, "network");
404
405                 if (trim($body) == "") {
406                         return $url;
407                 }
408
409                 // Check for redirect in meta elements
410                 $doc = new DOMDocument();
411                 @$doc->loadHTML($body);
412
413                 $xpath = new DomXPath($doc);
414
415                 $list = $xpath->query("//meta[@content]");
416                 foreach ($list as $node) {
417                         $attr = [];
418                         if ($node->attributes->length) {
419                                 foreach ($node->attributes as $attribute) {
420                                         $attr[$attribute->name] = $attribute->value;
421                                 }
422                         }
423
424                         if (@$attr["http-equiv"] == 'refresh') {
425                                 $path = $attr["content"];
426                                 $pathinfo = explode(";", $path);
427                                 foreach ($pathinfo as $value) {
428                                         if (substr(strtolower($value), 0, 4) == "url=") {
429                                                 return $this->finalUrl(substr($value, 4), ++$depth);
430                                         }
431                                 }
432                         }
433                 }
434
435                 return $url;
436         }
437
438         /**
439          * {@inheritDoc}
440          *
441          * @param int $redirects The recursion counter for internal use - default 0
442          *
443          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
444          */
445         public function fetch(string $url, bool $binary = false, int $timeout = 0, string $accept_content = '', string $cookiejar = '', int &$redirects = 0)
446         {
447                 $ret = $this->fetchFull($url, $binary, $timeout, $accept_content, $cookiejar, $redirects);
448
449                 return $ret->getBody();
450         }
451
452         /**
453          * {@inheritDoc}
454          *
455          * @param int $redirects The recursion counter for internal use - default 0
456          *
457          * @throws \Friendica\Network\HTTPException\InternalServerErrorException
458          */
459         public function fetchFull(string $url, bool $binary = false, int $timeout = 0, string $accept_content = '', string $cookiejar = '', int &$redirects = 0)
460         {
461                 return $this->get(
462                         $url,
463                         $binary,
464                         [
465                                 'timeout'        => $timeout,
466                                 'accept_content' => $accept_content,
467                                 'cookiejar'      => $cookiejar
468                         ]
469                 );
470         }
471
472         /**
473          * {@inheritDoc}
474          */
475         public function getUserAgent()
476         {
477                 return
478                         FRIENDICA_PLATFORM . " '" .
479                         FRIENDICA_CODENAME . "' " .
480                         FRIENDICA_VERSION . '-' .
481                         DB_UPDATE_VERSION . '; ' .
482                         $this->baseUrl;
483         }
484 }