Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
ac9e614086 | ||
|
bea673807c | ||
|
35aeeeca72 | ||
|
5f71eeb6a5 | ||
|
2ece7c9e99 | ||
f739042cfd |
@ -5,7 +5,7 @@ name: testsuite
|
|||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: composer install
|
- name: composer install
|
||||||
image: chialab/php
|
image: chialab/php:7.3
|
||||||
commands:
|
commands:
|
||||||
- composer install
|
- composer install
|
||||||
volumes:
|
volumes:
|
||||||
@ -13,21 +13,21 @@ steps:
|
|||||||
path: /root/.composer/cache/
|
path: /root/.composer/cache/
|
||||||
|
|
||||||
- name: wait for test server
|
- name: wait for test server
|
||||||
image: alpine
|
image: alpine:3.12
|
||||||
commands:
|
commands:
|
||||||
- echo "Waiting for server to launch on testserver:8080..."
|
- echo "Waiting for server to launch on testserver:8080..."
|
||||||
- while ! nc -z testserver 8080; do sleep 0.1 ; done
|
- while ! nc -z testserver 8080; do sleep 0.1 ; done
|
||||||
- echo "Ready!"
|
- echo "Ready!"
|
||||||
|
|
||||||
- name: run tests
|
- name: run tests
|
||||||
image: chialab/php
|
image: chialab/php:7.3
|
||||||
commands:
|
commands:
|
||||||
- URL=http://testserver:8080 vendor/bin/phpunit tests --testdox --color=always --no-interaction
|
- URL=http://testserver:8080 vendor/bin/phpunit tests --testdox --color=always --no-interaction
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
- name: testserver
|
- name: testserver
|
||||||
image: node
|
image: node:15.2
|
||||||
detach: true
|
detach: true
|
||||||
commands:
|
commands:
|
||||||
- cd tests/server/
|
- cd tests/server/
|
||||||
|
@ -3,7 +3,7 @@ Similar project to [spatie/http-status-check](https://github.com/spatie/http-sta
|
|||||||
## Install
|
## Install
|
||||||
|
|
||||||
```plain
|
```plain
|
||||||
composer config repositories.jhodges composer https://git.jhodges.co.uk/composer
|
composer config repositories.jhodges composer https://composer.jhodges.co.uk/
|
||||||
composer require jhodges/sitemap
|
composer require jhodges/sitemap
|
||||||
```
|
```
|
||||||
|
|
||||||
|
17
docker/Dockerfile
Normal file
17
docker/Dockerfile
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
FROM markizano/devuan:beowulf-amd64
|
||||||
|
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get install -y php-cli git unzip php-xml
|
||||||
|
|
||||||
|
COPY get_composer.sh /root/
|
||||||
|
RUN bash /root/get_composer.sh && rm /root/get_composer.sh
|
||||||
|
|
||||||
|
RUN mkdir /app
|
||||||
|
WORKDIR /app
|
||||||
|
RUN composer init && composer config repositories.jhodges composer https://composer.jhodges.co.uk && composer require jhodges/sitemap
|
||||||
|
|
||||||
|
COPY crawl.php /app/
|
||||||
|
|
||||||
|
RUN apt clean
|
||||||
|
|
||||||
|
ENTRYPOINT php crawl.php
|
45
docker/crawl.php
Normal file
45
docker/crawl.php
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
<?php
|
||||||
|
require_once(__DIR__.'/vendor/autoload.php');
|
||||||
|
|
||||||
|
use \JHodges\Sitemap\Crawler;
|
||||||
|
|
||||||
|
if($url=getenv('CRAWL_URL')){
|
||||||
|
$urls=array_filter(array_map('trim',explode(',',$url)));
|
||||||
|
}else{
|
||||||
|
die("No env: CRAWL_URL\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if($code=getenv('CRAWL_CODE')){
|
||||||
|
$codes=array_filter(array_map('trim',explode(',',$code)));
|
||||||
|
}else{
|
||||||
|
$codes=[];
|
||||||
|
}
|
||||||
|
|
||||||
|
$crawler=new Crawler();
|
||||||
|
foreach($urls as $url){
|
||||||
|
$crawler->crawl($url);
|
||||||
|
}
|
||||||
|
|
||||||
|
$summary=[];
|
||||||
|
$details='';
|
||||||
|
foreach($crawler->getResults() as $url=>$result){
|
||||||
|
$summary[$result['code']]++;
|
||||||
|
|
||||||
|
if( count($codes)==0 || in_array($result['code'],$codes) ){
|
||||||
|
$details.="{$result['code']} {$url}\n";
|
||||||
|
foreach($result['foundOn'] as $url=>$count){
|
||||||
|
$details.=" <- ($count) $url\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ksort($summary);
|
||||||
|
echo '|code|count|'."\n";
|
||||||
|
echo '|----|-----|'."\n";
|
||||||
|
foreach($summary as $code=>$count){
|
||||||
|
echo "| $code | $count |\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if($details){
|
||||||
|
echo "\n\n----\n\n```\n$details\n```\n";
|
||||||
|
}
|
17
docker/get_composer.sh
Normal file
17
docker/get_composer.sh
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
EXPECTED_CHECKSUM="$(wget -q -O - https://composer.github.io/installer.sig)"
|
||||||
|
php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');"
|
||||||
|
ACTUAL_CHECKSUM="$(php -r "echo hash_file('sha384', 'composer-setup.php');")"
|
||||||
|
|
||||||
|
if [ "$EXPECTED_CHECKSUM" != "$ACTUAL_CHECKSUM" ]
|
||||||
|
then
|
||||||
|
>&2 echo 'ERROR: Invalid installer checksum'
|
||||||
|
rm composer-setup.php
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
php composer-setup.php --1 --filename=composer --install-dir=/usr/local/bin
|
||||||
|
RESULT=$?
|
||||||
|
rm composer-setup.php
|
||||||
|
exit $RESULT
|
@ -10,13 +10,14 @@ use Psr\Http\Message\UriInterface;
|
|||||||
use Spatie\Crawler\Crawler as SpatieCrawler;
|
use Spatie\Crawler\Crawler as SpatieCrawler;
|
||||||
use Spatie\Crawler\CrawlUrl;
|
use Spatie\Crawler\CrawlUrl;
|
||||||
use Spatie\Crawler\CrawlAllUrls;
|
use Spatie\Crawler\CrawlAllUrls;
|
||||||
|
use Spatie\Crawler\CrawlProfile;
|
||||||
|
|
||||||
class Crawler{
|
class Crawler{
|
||||||
|
|
||||||
private $observer;
|
private $observer;
|
||||||
private $crawler;
|
private $crawler;
|
||||||
|
|
||||||
public function __construct($reqOps=[]){
|
public function __construct(array $reqOps=[]){
|
||||||
$this->crawler = SpatieCrawler::create(array_merge($reqOps, [
|
$this->crawler = SpatieCrawler::create(array_merge($reqOps, [
|
||||||
RequestOptions::ALLOW_REDIRECTS => [
|
RequestOptions::ALLOW_REDIRECTS => [
|
||||||
'track_redirects' => true,
|
'track_redirects' => true,
|
||||||
@ -28,15 +29,21 @@ class Crawler{
|
|||||||
$this->crawler->setCrawlProfile(new CrawlAllUrls());
|
$this->crawler->setCrawlProfile(new CrawlAllUrls());
|
||||||
}
|
}
|
||||||
|
|
||||||
public function setUserAgent($agent){
|
public function setUserAgent(String $agent) : self{
|
||||||
$this->crawler->setUserAgent($agent);
|
$this->crawler->setUserAgent($agent);
|
||||||
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function crawl($url){
|
public function setCrawlProfile(CrawlProfile $p) : self{
|
||||||
|
$this->crawler->setCrawlProfile($p);
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function crawl(String $url) : void{
|
||||||
$this->crawler->startCrawling($url);
|
$this->crawler->startCrawling($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getResults(){
|
public function getResults() : array{
|
||||||
return $this->observer->results;
|
return $this->observer->results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user