4 * This file is part of Crawler Detect - the web crawler detection library.
6 * (c) Mark Beech <m@rkbee.ch>
8 * This source file is subject to the MIT license that is bundled
9 * with this source code in the file LICENSE.
12 namespace Jaybizzle\CrawlerDetect;
14 use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
15 use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
16 use Jaybizzle\CrawlerDetect\Fixtures\Headers;
25 protected $userAgent = null;
28 * Headers that contain a user agent.
32 protected $httpHeaders = array();
35 * Store regex matches.
39 protected $matches = array();
44 * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
51 * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
53 protected $exclusions;
58 * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
60 protected $uaHttpHeaders;
63 * The compiled regex string.
67 protected $compiledRegex;
70 * The compiled exclusions regex string.
74 protected $compiledExclusions;
79 public function __construct(array $headers = null, $userAgent = null)
81 $this->crawlers = new Crawlers();
82 $this->exclusions = new Exclusions();
83 $this->uaHttpHeaders = new Headers();
85 $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
86 $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
88 $this->setHttpHeaders($headers);
89 $this->setUserAgent($userAgent);
93 * Compile the regex patterns into one regex string.
99 public function compileRegex($patterns)
101 return '('.implode('|', $patterns).')';
107 * @param array|null $httpHeaders
109 public function setHttpHeaders($httpHeaders)
111 // Use global _SERVER if $httpHeaders aren't defined.
112 if (! is_array($httpHeaders) || ! count($httpHeaders)) {
113 $httpHeaders = $_SERVER;
116 // Clear existing headers.
117 $this->httpHeaders = array();
119 // Only save HTTP headers. In PHP land, that means
120 // only _SERVER vars that start with HTTP_.
121 foreach ($httpHeaders as $key => $value) {
122 if (strpos($key, 'HTTP_') === 0) {
123 $this->httpHeaders[$key] = $value;
129 * Return user agent headers.
133 public function getUaHttpHeaders()
135 return $this->uaHttpHeaders->getAll();
139 * Set the user agent.
141 * @param string $userAgent
143 public function setUserAgent($userAgent)
145 if (is_null($userAgent)) {
146 foreach ($this->getUaHttpHeaders() as $altHeader) {
147 if (isset($this->httpHeaders[$altHeader])) {
148 $userAgent .= $this->httpHeaders[$altHeader].' ';
153 return $this->userAgent = $userAgent;
157 * Check user agent string against the regex.
159 * @param string|null $userAgent
163 public function isCrawler($userAgent = null)
165 $agent = trim(preg_replace(
166 "/{$this->compiledExclusions}/i",
168 $userAgent ?: $this->userAgent
175 $result = preg_match("/{$this->compiledRegex}/i", $agent, $matches);
178 $this->matches = $matches;
181 return (bool) $result;
185 * Return the matches.
187 * @return string|null
189 public function getMatches()
191 return isset($this->matches[0]) ? $this->matches[0] : null;