test and fix redirect tracking

This commit is contained in:
James
2020-02-22 10:06:18 +00:00
parent d81dc86381
commit 9a84ec204d
3 changed files with 121 additions and 31 deletions

View File

@@ -9,7 +9,7 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/' => ['code' => 200 ],
'http://localhost:8080/link1' => ['code' => 200 ],
'http://localhost:8080/link2' => ['code' => 200 ],
@@ -19,11 +19,60 @@ class CrawlerTest extends TestCase{
]);
}
public function testCollectsAllFoundOnUrls(){
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/page1');
$crawler->crawl('http://localhost:8080/page4'); // this ensures the order or results for the URL tracking test 3PARTS.
$sitemap=$crawler->getResults();
print_r($sitemap);
$this->assertTreeContains($sitemap,[
'http://localhost:8080/page1' => ['code' => 200 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
]],
'http://localhost:8080/page2' => ['code' => 200 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
]],
'http://localhost:8080/page3' => ['code' => 200 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
]],
'http://localhost:8080/notFound1' => ['code' => 404 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
]],
'http://localhost:8080/notFound2' => ['code' => 404 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
'http://localhost:8080/page4' => 1,
]],
'http://localhost:8080/redirectToNotFound' => ['code' => 302 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
'http://localhost:8080/page4' => 1,
]],
'http://localhost:8080/redirectToRedirectToNotFound' => ['code' => 302 , 'foundOn' => [
'http://localhost:8080/page1' => 1,
'http://localhost:8080/page2' => 1,
'http://localhost:8080/page3' => 1,
'http://localhost:8080/page4' => 1,
]],
]);
}
public function testCanFollowRedirectToFound(){
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/redirectToFound');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/redirectToFound' => ['code' => 302 ],
'http://localhost:8080/' => ['code' => 200 ],
]);
@@ -33,9 +82,9 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/redirectToNotFound');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/redirectToNotFound' => ['code' => 302 ],
'http://localhost:8080/notExists' => ['code' => 404 ],
'http://localhost:8080/notFound2' => ['code' => 404 ],
]);
}
@@ -43,10 +92,10 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/redirectToRedirectToNotFound');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/redirectToRedirectToNotFound' => ['code' => 302 ],
'http://localhost:8080/redirectToNotFound' => ['code' => 302 ],
'http://localhost:8080/notExists' => ['code' => 404 ],
'http://localhost:8080/notFound2' => ['code' => 404 ],
]);
}
@@ -54,7 +103,7 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/twoRedirectsToSameLocation');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/twoRedirectsToSameLocation' => ['code' => 200 ],
'http://localhost:8080/redirect1' => ['code' => 302 ],
'http://localhost:8080/redirect2' => ['code' => 302 ],
@@ -66,8 +115,8 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/timeout');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
'http://localhost:8080/timeout' => ['code' => '???' ],
$this->assertTreeContains($sitemap,[
'http://localhost:8080/timeout' => ['code' => '' ],
]);
}
@@ -75,17 +124,18 @@ class CrawlerTest extends TestCase{
$crawler=new Crawler('http://localhost:8080');
$crawler->crawl('http://localhost:8080/internalServerError');
$sitemap=$crawler->getResults();
$this->assertSitemapContains($sitemap,[
$this->assertTreeContains($sitemap,[
'http://localhost:8080/internalServerError' => ['code' => 500 ],
]);
}
public function assertSitemapContains($sitemap, $contains){
foreach($contains as $url=>$vals){
$this->assertArrayHasKey($url, $sitemap, "$url not found in sitemap");
foreach($vals as $k=>$v){
$this->assertArrayHasKey($k, $sitemap[$url], "$url => $k not found in sitemap");
$this->assertEquals($v, $sitemap[$url][$k], "$url => $k = $v not found in sitemap");
public function assertTreeContains($haystack, $contains, $crumbs=''){
foreach($contains as $k=>$v){
$this->assertArrayHasKey($k, $haystack, $crumbs);
if(is_array($v)){
$this->assertTreeContains($haystack[$k], $v, $crumbs.' => '.$k);
}else{
$this->assertEquals($v, $haystack[$k], $crumbs.' => '.$k);
}
}
}

View File

@@ -23,7 +23,7 @@ app.get('/link4', function (request, response) {
});
app.get('/redirectToNotFound', function (request, response) {
response.redirect('/notExists');
response.redirect('/notFound2');
});
app.get('/redirectToFound', function (request, response) {
@@ -54,6 +54,18 @@ app.get('/internalServerError', function (request, response) {
response.status(500).end();
});
app.get('/page1', function (request, response) {
response.end('<a href="/page1">Page1</a><a href="/page2">Page2</a><a href="/page3">Page3</a><a href="/notFound1">NotFound</a><a href="/redirectToRedirectToNotFound">redirectToRedirectToNotFound</a>');
});
app.get('/page2', function (request, response) {
response.end('<a href="/page1">Page1</a><a href="/page2">Page2</a><a href="/page3">Page3</a><a href="/notFound1">NotFound</a><a href="/redirectToRedirectToNotFound">redirectToRedirectToNotFound</a>');
});
app.get('/page3', function (request, response) {
response.end('<a href="/page1">Page1</a><a href="/page2">Page2</a><a href="/page3">Page3</a><a href="/notFound1">NotFound</a><a href="/redirectToRedirectToNotFound">redirectToRedirectToNotFound</a>');
});
app.get('/page4', function (request, response) {
response.end('<a href="/redirectToRedirectToNotFound">redirectToRedirectToNotFound</a>');
});
let server = app.listen(8080, function () {
const host = 'localhost';