make baseurl (internal links only) optional in crawler constructor
This commit is contained in:
parent
ef4ebf7cd0
commit
fe45abaf49
@ -37,9 +37,9 @@ class CrawlCommand extends Command
|
|||||||
protected function execute(InputInterface $input, OutputInterface $output)
|
protected function execute(InputInterface $input, OutputInterface $output)
|
||||||
{
|
{
|
||||||
$baseUrl = $input->getArgument('url');
|
$baseUrl = $input->getArgument('url');
|
||||||
$crawler=new Crawler($baseUrl);
|
$crawler=new Crawler();
|
||||||
$crawler->crawl($baseUrl);
|
$crawler->crawl($baseUrl);
|
||||||
|
|
||||||
foreach($crawler->getResults() as $url=>$result){
|
foreach($crawler->getResults() as $url=>$result){
|
||||||
$output->writeln("{$result['code']} {$url}");
|
$output->writeln("{$result['code']} {$url}");
|
||||||
if($input->getOption('found-on')){
|
if($input->getOption('found-on')){
|
||||||
|
@ -16,7 +16,7 @@ class Crawler{
|
|||||||
private $observer;
|
private $observer;
|
||||||
private $crawler;
|
private $crawler;
|
||||||
|
|
||||||
public function __construct($baseUrl){
|
public function __construct($baseUrl=null){
|
||||||
$this->observer = new CrawlObserver();
|
$this->observer = new CrawlObserver();
|
||||||
$this->crawler = SpatieCrawler::create([
|
$this->crawler = SpatieCrawler::create([
|
||||||
RequestOptions::ALLOW_REDIRECTS => [
|
RequestOptions::ALLOW_REDIRECTS => [
|
||||||
@ -26,9 +26,11 @@ class Crawler{
|
|||||||
RequestOptions::TIMEOUT => 10,
|
RequestOptions::TIMEOUT => 10,
|
||||||
])
|
])
|
||||||
//->setMaximumDepth(1)
|
//->setMaximumDepth(1)
|
||||||
->setCrawlProfile(new CrawlInternalUrls($baseUrl))
|
|
||||||
->setCrawlObserver($this->observer)
|
->setCrawlObserver($this->observer)
|
||||||
;
|
;
|
||||||
|
if($baseUrl){
|
||||||
|
$this->crawler->setCrawlProfile(new CrawlInternalUrls($baseUrl));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public function crawl($url){
|
public function crawl($url){
|
||||||
|
Loading…
Reference in New Issue
Block a user