crawler takes request options in constructor, update docs

This commit is contained in:
James
2020-02-23 18:23:36 +00:00
parent c3cf29848a
commit 340e78de13
4 changed files with 30 additions and 45 deletions

View File

@@ -16,21 +16,15 @@ class Crawler{
private $observer;
private $crawler;
public function __construct($baseUrl=null){
$this->observer = new CrawlObserver();
$this->crawler = SpatieCrawler::create([
public function __construct($reqOps=[]){
$this->crawler = SpatieCrawler::create(array_merge($reqOps, [
RequestOptions::ALLOW_REDIRECTS => [
'track_redirects' => true,
],
RequestOptions::CONNECT_TIMEOUT => 3,
RequestOptions::TIMEOUT => 3,
])
//->setMaximumDepth(1)
->setCrawlObserver($this->observer)
;
if($baseUrl){
$this->crawler->setCrawlProfile(new CrawlInternalUrls($baseUrl));
}
]));
$this->observer = new CrawlObserver();
$this->crawler->setCrawlObserver($this->observer);
}
public function crawl($url){