From fe45abaf49675f038905a654a30855b39c4ad0e2 Mon Sep 17 00:00:00 2001 From: James Date: Sat, 22 Feb 2020 13:56:31 +0000 Subject: [PATCH] make baseurl (internal links only) optional in crawler constructor --- src/CrawlCommand.php | 4 ++-- src/Crawler.php | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/CrawlCommand.php b/src/CrawlCommand.php index 8a64de1..b7879d8 100644 --- a/src/CrawlCommand.php +++ b/src/CrawlCommand.php @@ -37,9 +37,9 @@ class CrawlCommand extends Command protected function execute(InputInterface $input, OutputInterface $output) { $baseUrl = $input->getArgument('url'); - $crawler=new Crawler($baseUrl); + $crawler=new Crawler(); $crawler->crawl($baseUrl); - + foreach($crawler->getResults() as $url=>$result){ $output->writeln("{$result['code']} {$url}"); if($input->getOption('found-on')){ diff --git a/src/Crawler.php b/src/Crawler.php index e6b3218..6ca04b7 100644 --- a/src/Crawler.php +++ b/src/Crawler.php @@ -16,7 +16,7 @@ class Crawler{ private $observer; private $crawler; - public function __construct($baseUrl){ + public function __construct($baseUrl=null){ $this->observer = new CrawlObserver(); $this->crawler = SpatieCrawler::create([ RequestOptions::ALLOW_REDIRECTS => [ @@ -26,9 +26,11 @@ class Crawler{ RequestOptions::TIMEOUT => 10, ]) //->setMaximumDepth(1) - ->setCrawlProfile(new CrawlInternalUrls($baseUrl)) ->setCrawlObserver($this->observer) ; + if($baseUrl){ + $this->crawler->setCrawlProfile(new CrawlInternalUrls($baseUrl)); + } } public function crawl($url){