sitemap/docker/crawl.php
2021-01-16 10:54:14 +00:00

46 lines
953 B
PHP

<?php
require_once(__DIR__.'/vendor/autoload.php');
use \JHodges\Sitemap\Crawler;
if($url=getenv('CRAWL_URL')){
$urls=array_filter(array_map('trim',explode(',',$url)));
}else{
die("No env: CRAWL_URL\n");
}
if($code=getenv('CRAWL_CODE')){
$codes=array_filter(array_map('trim',explode(',',$code)));
}else{
$codes=[];
}
$crawler=new Crawler();
foreach($urls as $url){
$crawler->crawl($url);
}
$summary=[];
$details='';
foreach($crawler->getResults() as $url=>$result){
$summary[$result['code']]++;
if( count($codes)==0 || in_array($result['code'],$codes) ){
$details.="{$result['code']} {$url}\n";
foreach($result['foundOn'] as $url=>$count){
$details.=" <- ($count) $url\n";
}
}
}
ksort($summary);
echo '|code|count|'."\n";
echo '|----|-----|'."\n";
foreach($summary as $code=>$count){
echo "| $code | $count |\n";
}
if($details){
echo "\n\n----\n\n```\n$details\n```\n";
}