16 Commits
v1.0.1 ... v1.1

Author SHA1 Message Date
James
0714a891fe update deps properly
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-17 08:37:20 +00:00
James
20b260233a fix redirect loop test (now returns 302) may not be good :/
All checks were successful
continuous-integration/drone/push Build is passing
2020-11-17 08:26:47 +00:00
James
0c30a268b8 fix broken external link test
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 19:46:37 +00:00
James
e8ebef4cc4 update deps even more :)
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 16:01:32 +00:00
James
f696e28e49 update deps
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 15:47:38 +00:00
James
432acb7475 add setUserAgent support
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2020-11-16 12:58:28 +00:00
James
6b28eb168e revert fix redirectloop test
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2020-11-15 18:34:28 +00:00
James
4e29ca154e downgrade
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:33:05 +00:00
James
fbf6d31c5b update package version
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:29:48 +00:00
James
a4037f2d95 fix package version
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:28:20 +00:00
James
437d0fbf4e fix redirect loop test
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:54:32 +00:00
James
418747027b try older spate crawler
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:49:47 +00:00
James
7b9b125f57 crawl externals too
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:43:23 +00:00
James
e2fe2eedf7 fix
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:33:08 +00:00
James
f46074dfba better phpunit output 2020-11-15 14:31:42 +00:00
James
44d07858b5 update
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:27:14 +00:00
6 changed files with 800 additions and 383 deletions

View File

@@ -5,7 +5,7 @@ name: testsuite
steps:
- name: composer install
image: chialab/php
image: chialab/php:7.4
commands:
- composer install
volumes:
@@ -13,21 +13,21 @@ steps:
path: /root/.composer/cache/
- name: wait for test server
image: alpine
image: alpine:3.12
commands:
- echo "Waiting for server to launch on testserver:8080..."
- while ! nc -z testserver 8080; do sleep 0.1 ; done
- echo "Ready!"
- name: run tests
image: chialab/php
image: chialab/php:7.4
commands:
- URL=http://testserver:8080 vendor/bin/phpunit tests
- URL=http://testserver:8080 vendor/bin/phpunit tests --testdox --color=always --no-interaction
services:
- name: testserver
image: node
image: node:15.2.0-alpine3.12
detach: true
commands:
- cd tests/server/

View File

@@ -3,8 +3,8 @@
"description": "Generate full sitemap report using spatie/crawler",
"type": "library",
"require": {
"php": "^7.1",
"spatie/crawler": "^4.6",
"php": "^7.4",
"spatie/crawler": "^5.0",
"cweagans/composer-patches": "~1.0"
},
"require-dev": {
@@ -25,9 +25,6 @@
"patches": {
"spatie/crawler": {
"add crawled again observer": "https://patch-diff.githubusercontent.com/raw/spatie/crawler/pull/280.patch"
},
"guzzlehttp/guzzle": {
"Status code must be an integer value between 1xx and 5xx": "https://patch-diff.githubusercontent.com/raw/guzzle/guzzle/pull/2591.patch"
}
}
}

1153
composer.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ use Psr\Http\Message\UriInterface;
use Spatie\Crawler\Crawler as SpatieCrawler;
use Spatie\Crawler\CrawlUrl;
use Spatie\Crawler\CrawlInternalUrls;
use Spatie\Crawler\CrawlAllUrls;
class Crawler{
@@ -25,6 +25,11 @@ class Crawler{
$this->observer = new CrawlObserver();
$this->crawler->setCrawlObserver($this->observer);
$this->crawler->setCrawlProfile(new CrawlAllUrls());
}
public function setUserAgent($agent){
$this->crawler->setUserAgent($agent);
}
public function crawl($url){

View File

@@ -34,7 +34,7 @@ class CrawlerTest extends TestCase{
$this->url.'/notFound' => ['code' => 404],
$this->url.'/redirect1' => ['code' => 302],
$this->url.'/redirect2' => ['code' => 302],
$this->url.'/redirectLoop' => ['code' => '---'],
$this->url.'/redirectLoop' => ['code' => '302'],
$this->url.'/redirectToFound' => ['code' => 302 ],
$this->url.'/redirectToNotFound' => ['code' => 302 ],
$this->url.'/redirectToRedirectToNotFound' => ['code' => 302],
@@ -178,7 +178,7 @@ class CrawlerTest extends TestCase{
$crawler->crawl($this->url.'/redirectLoop');
$sitemap=$crawler->getResults();
$this->assertTreeContains($sitemap,[
$this->url.'/redirectLoop' => ['code' => '---'],
$this->url.'/redirectLoop' => ['code' => '302'],
], print_r($sitemap,true));
}

View File

@@ -25,7 +25,7 @@ app.get('/', function (request, response) {
});
app.get('/externalLink', function (request, response) {
response.end('<a href="http://example.com/"</a>');
response.end('<a href="http://example.com/">ext</a>');
});
app.get('/deeplink1', function (request, response) {