sitemap/docker/crawl.php
2021-01-06 17:21:04 +00:00

42 lines
856 B
PHP

<?php
require_once(__DIR__.'/vendor/autoload.php');
use \JHodges\Sitemap\Crawler;
if(!$url=getenv('CRAWL_URL')){
die("No env: CRAWL_URL\n");
}
if($code=getenv('CRAWL_CODE')){
$codes=array_filter(array_map('trim',explode(',',$code)));
}else{
$codes=[];
}
$crawler=new Crawler();
$crawler->crawl($url);
$summary=[];
$details='';
foreach($crawler->getResults() as $url=>$result){
$summary[$result['code']]++;
if( count($codes)==0 || in_array($result['code'],$codes) ){
$details.="{$result['code']} {$url}\n";
foreach($result['foundOn'] as $url=>$count){
$details.=" <- ($count) $url\n";
}
}
}
ksort($summary);
echo '|code|count|'."\n";
echo '|----|-----|'."\n";
foreach($summary as $code=>$count){
echo "| $code | $count |\n";
}
if($details){
echo "\n\n----\n\n```\n$details\n```\n";
}