make baseurl (internal links only) optional in crawler constructor

This commit is contained in:
James 2020-02-22 13:56:31 +00:00
parent ef4ebf7cd0
commit fe45abaf49
2 changed files with 6 additions and 4 deletions

View File

@ -37,9 +37,9 @@ class CrawlCommand extends Command
protected function execute(InputInterface $input, OutputInterface $output)
{
$baseUrl = $input->getArgument('url');
$crawler=new Crawler($baseUrl);
$crawler=new Crawler();
$crawler->crawl($baseUrl);
foreach($crawler->getResults() as $url=>$result){
$output->writeln("{$result['code']} {$url}");
if($input->getOption('found-on')){

View File

@ -16,7 +16,7 @@ class Crawler{
private $observer;
private $crawler;
public function __construct($baseUrl){
public function __construct($baseUrl=null){
$this->observer = new CrawlObserver();
$this->crawler = SpatieCrawler::create([
RequestOptions::ALLOW_REDIRECTS => [
@ -26,9 +26,11 @@ class Crawler{
RequestOptions::TIMEOUT => 10,
])
//->setMaximumDepth(1)
->setCrawlProfile(new CrawlInternalUrls($baseUrl))
->setCrawlObserver($this->observer)
;
if($baseUrl){
$this->crawler->setCrawlProfile(new CrawlInternalUrls($baseUrl));
}
}
public function crawl($url){