215 lines
8.3 KiB
PHP
215 lines
8.3 KiB
PHP
<?php
|
|
use \PHPUnit\Framework\TestCase;
|
|
|
|
use JHodges\Sitemap\Crawler;
|
|
use GuzzleHttp\RequestOptions;
|
|
|
|
class CrawlerTest extends TestCase{
|
|
|
|
private $url='http://localhost:8080';
|
|
|
|
public function __construct(){
|
|
parent::__construct();
|
|
if(getenv('URL')){
|
|
$this->url=getenv('URL');
|
|
}
|
|
}
|
|
|
|
public function testFullSite(){
|
|
$crawler=new Crawler([RequestOptions::CONNECT_TIMEOUT => 3, RequestOptions::TIMEOUT => 3]);
|
|
$crawler->crawl($this->url);
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
'http://example.com/' => ['code' => 200],
|
|
$this->url.'/deeplink1' => ['code' => 200],
|
|
$this->url.'/deeplink2' => ['code' => 200],
|
|
$this->url.'/deeplink3' => ['code' => 200],
|
|
$this->url.'/externalLink' => ['code' => 200],
|
|
$this->url.'/found' => ['code' => 200],
|
|
$this->url.'/interlinked1' => ['code' => 200],
|
|
$this->url.'/interlinked2' => ['code' => 200],
|
|
$this->url.'/interlinked3' => ['code' => 200],
|
|
$this->url.'/internalServerError' => ['code' => 500],
|
|
$this->url.'/invalidStatusCode' => ['code' => '---'],
|
|
$this->url.'/notFound' => ['code' => 404],
|
|
$this->url.'/redirect1' => ['code' => 302],
|
|
$this->url.'/redirect2' => ['code' => 302],
|
|
$this->url.'/redirectLoop' => ['code' => '302'],
|
|
$this->url.'/redirectToFound' => ['code' => 302 ],
|
|
$this->url.'/redirectToNotFound' => ['code' => 302 ],
|
|
$this->url.'/redirectToRedirectToNotFound' => ['code' => 302],
|
|
$this->url.'/timeout' => ['code' => '---'],
|
|
$this->url.'/twoRedirectsToSameLocation' => ['code' => 200],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testFound(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/found');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/found' => ['code' => 200],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testNotFound(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/notFound');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/notFound' => ['code' => 404],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testExternalLink(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/externalLink');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/externalLink' => ['code' => 200],
|
|
'http://example.com/' => ['code' => 200],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testDeeplink(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/deeplink1');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/deeplink1' => ['code' => 200],
|
|
$this->url.'/deeplink2' => ['code' => 200],
|
|
$this->url.'/deeplink3' => ['code' => 200],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testInterlinked(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/interlinked1');
|
|
$crawler->crawl($this->url.'/interlinked4'); //this ensures the order or results for the URL tracking test PART2
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/interlinked1' => ['code' => 200 , 'foundOn' => [
|
|
$this->url.'/interlinked1' => 1,
|
|
$this->url.'/interlinked2' => 1,
|
|
$this->url.'/interlinked3' => 1,
|
|
$this->url.'/interlinked4' => 1,
|
|
]],
|
|
$this->url.'/interlinked2' => ['code' => 200 , 'foundOn' => [
|
|
$this->url.'/interlinked1' => 1,
|
|
$this->url.'/interlinked2' => 1,
|
|
$this->url.'/interlinked3' => 1,
|
|
$this->url.'/interlinked4' => 1,
|
|
]],
|
|
$this->url.'/interlinked3' => ['code' => 200 , 'foundOn' => [
|
|
$this->url.'/interlinked1' => 1,
|
|
$this->url.'/interlinked2' => 1,
|
|
$this->url.'/interlinked3' => 1,
|
|
$this->url.'/interlinked4' => 1,
|
|
]],
|
|
$this->url.'/found' => ['code' => 200 , 'foundOn' => [
|
|
$this->url.'/interlinked1' => 1,
|
|
$this->url.'/interlinked2' => 1,
|
|
$this->url.'/interlinked3' => 1,
|
|
$this->url.'/interlinked4' => 1,
|
|
]],
|
|
$this->url.'/redirectToFound' => ['code' => 302 , 'foundOn' => [
|
|
$this->url.'/interlinked1' => 1,
|
|
$this->url.'/interlinked2' => 1,
|
|
$this->url.'/interlinked3' => 1,
|
|
$this->url.'/interlinked4' => 1,
|
|
]],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testRedirectToFound(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/redirectToFound');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/redirectToFound' => ['code' => 302],
|
|
$this->url.'/found' => ['code' => 200 ],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testRedirectToNotFound(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/redirectToNotFound');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/redirectToNotFound' => ['code' => 302],
|
|
$this->url.'/notFound' => ['code' => 404 ],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testRedirectToRedirectToNotFound(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/redirectToRedirectToNotFound');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/redirectToRedirectToNotFound' => ['code' => 302],
|
|
$this->url.'/redirectToNotFound' => ['code' => 302],
|
|
$this->url.'/notFound' => ['code' => 404],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testTwoRedirectsToSameLocation(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/twoRedirectsToSameLocation');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/twoRedirectsToSameLocation' => ['code' => 200],
|
|
$this->url.'/redirect1' => ['code' => 302],
|
|
$this->url.'/redirect2' => ['code' => 302],
|
|
$this->url.'/found' => ['code' => 200],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testTimeout(){
|
|
$crawler=new Crawler([RequestOptions::CONNECT_TIMEOUT => 3, RequestOptions::TIMEOUT => 3]);
|
|
$crawler->crawl($this->url.'/timeout');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/timeout' => ['code' => '---'],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testRedirectLoop(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/redirectLoop');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/redirectLoop' => ['code' => '302'],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testInternalServerError(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/internalServerError');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/internalServerError' => ['code' => 500],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function testInvalidStatusCode(){
|
|
$crawler=new Crawler();
|
|
$crawler->crawl($this->url.'/invalidStatusCode');
|
|
$sitemap=$crawler->getResults();
|
|
$this->assertTreeContains($sitemap,[
|
|
$this->url.'/invalidStatusCode' => ['code' => '---'],
|
|
], print_r($sitemap,true));
|
|
}
|
|
|
|
public function assertTreeContains($haystack, $contains, $crumbs=''){
|
|
foreach($contains as $k=>$v){
|
|
$this->assertArrayHasKey($k, $haystack, $crumbs);
|
|
if(is_array($v)){
|
|
$this->assertTreeContains($haystack[$k], $v, $crumbs.' => '.$k);
|
|
}else{
|
|
$this->assertEquals($v, $haystack[$k], $crumbs.' => '.$k);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|