make baseurl (internal links only) optional in crawler constructor

This commit is contained in:
James 2020-02-22 13:56:31 +00:00
parent ef4ebf7cd0
commit fe45abaf49
2 changed files with 6 additions and 4 deletions

View File

@ -37,9 +37,9 @@ class CrawlCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output) protected function execute(InputInterface $input, OutputInterface $output)
{ {
$baseUrl = $input->getArgument('url'); $baseUrl = $input->getArgument('url');
$crawler=new Crawler($baseUrl); $crawler=new Crawler();
$crawler->crawl($baseUrl); $crawler->crawl($baseUrl);
foreach($crawler->getResults() as $url=>$result){ foreach($crawler->getResults() as $url=>$result){
$output->writeln("{$result['code']} {$url}"); $output->writeln("{$result['code']} {$url}");
if($input->getOption('found-on')){ if($input->getOption('found-on')){

View File

@ -16,7 +16,7 @@ class Crawler{
private $observer; private $observer;
private $crawler; private $crawler;
public function __construct($baseUrl){ public function __construct($baseUrl=null){
$this->observer = new CrawlObserver(); $this->observer = new CrawlObserver();
$this->crawler = SpatieCrawler::create([ $this->crawler = SpatieCrawler::create([
RequestOptions::ALLOW_REDIRECTS => [ RequestOptions::ALLOW_REDIRECTS => [
@ -26,9 +26,11 @@ class Crawler{
RequestOptions::TIMEOUT => 10, RequestOptions::TIMEOUT => 10,
]) ])
//->setMaximumDepth(1) //->setMaximumDepth(1)
->setCrawlProfile(new CrawlInternalUrls($baseUrl))
->setCrawlObserver($this->observer) ->setCrawlObserver($this->observer)
; ;
if($baseUrl){
$this->crawler->setCrawlProfile(new CrawlInternalUrls($baseUrl));
}
} }
public function crawl($url){ public function crawl($url){