16 Commits
v1.0.1 ... v1.1

Author SHA1 Message Date
James
0714a891fe update deps properly
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-17 08:37:20 +00:00
James
20b260233a fix redirect loop test (now returns 302) may not be good :/
All checks were successful
continuous-integration/drone/push Build is passing
2020-11-17 08:26:47 +00:00
James
0c30a268b8 fix broken external link test
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 19:46:37 +00:00
James
e8ebef4cc4 update deps even more :)
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 16:01:32 +00:00
James
f696e28e49 update deps
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-16 15:47:38 +00:00
James
432acb7475 add setUserAgent support
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2020-11-16 12:58:28 +00:00
James
6b28eb168e revert fix redirectloop test
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing
2020-11-15 18:34:28 +00:00
James
4e29ca154e downgrade
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:33:05 +00:00
James
fbf6d31c5b update package version
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:29:48 +00:00
James
a4037f2d95 fix package version
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 18:28:20 +00:00
James
437d0fbf4e fix redirect loop test
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:54:32 +00:00
James
418747027b try older spate crawler
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:49:47 +00:00
James
7b9b125f57 crawl externals too
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:43:23 +00:00
James
e2fe2eedf7 fix
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:33:08 +00:00
James
f46074dfba better phpunit output 2020-11-15 14:31:42 +00:00
James
44d07858b5 update
Some checks failed
continuous-integration/drone/push Build is failing
2020-11-15 14:27:14 +00:00
6 changed files with 800 additions and 383 deletions

View File

@@ -5,7 +5,7 @@ name: testsuite
steps: steps:
- name: composer install - name: composer install
image: chialab/php image: chialab/php:7.4
commands: commands:
- composer install - composer install
volumes: volumes:
@@ -13,21 +13,21 @@ steps:
path: /root/.composer/cache/ path: /root/.composer/cache/
- name: wait for test server - name: wait for test server
image: alpine image: alpine:3.12
commands: commands:
- echo "Waiting for server to launch on testserver:8080..." - echo "Waiting for server to launch on testserver:8080..."
- while ! nc -z testserver 8080; do sleep 0.1 ; done - while ! nc -z testserver 8080; do sleep 0.1 ; done
- echo "Ready!" - echo "Ready!"
- name: run tests - name: run tests
image: chialab/php image: chialab/php:7.4
commands: commands:
- URL=http://testserver:8080 vendor/bin/phpunit tests - URL=http://testserver:8080 vendor/bin/phpunit tests --testdox --color=always --no-interaction
services: services:
- name: testserver - name: testserver
image: node image: node:15.2.0-alpine3.12
detach: true detach: true
commands: commands:
- cd tests/server/ - cd tests/server/

View File

@@ -3,8 +3,8 @@
"description": "Generate full sitemap report using spatie/crawler", "description": "Generate full sitemap report using spatie/crawler",
"type": "library", "type": "library",
"require": { "require": {
"php": "^7.1", "php": "^7.4",
"spatie/crawler": "^4.6", "spatie/crawler": "^5.0",
"cweagans/composer-patches": "~1.0" "cweagans/composer-patches": "~1.0"
}, },
"require-dev": { "require-dev": {
@@ -25,9 +25,6 @@
"patches": { "patches": {
"spatie/crawler": { "spatie/crawler": {
"add crawled again observer": "https://patch-diff.githubusercontent.com/raw/spatie/crawler/pull/280.patch" "add crawled again observer": "https://patch-diff.githubusercontent.com/raw/spatie/crawler/pull/280.patch"
},
"guzzlehttp/guzzle": {
"Status code must be an integer value between 1xx and 5xx": "https://patch-diff.githubusercontent.com/raw/guzzle/guzzle/pull/2591.patch"
} }
} }
} }

1153
composer.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ use Psr\Http\Message\UriInterface;
use Spatie\Crawler\Crawler as SpatieCrawler; use Spatie\Crawler\Crawler as SpatieCrawler;
use Spatie\Crawler\CrawlUrl; use Spatie\Crawler\CrawlUrl;
use Spatie\Crawler\CrawlInternalUrls; use Spatie\Crawler\CrawlAllUrls;
class Crawler{ class Crawler{
@@ -25,6 +25,11 @@ class Crawler{
$this->observer = new CrawlObserver(); $this->observer = new CrawlObserver();
$this->crawler->setCrawlObserver($this->observer); $this->crawler->setCrawlObserver($this->observer);
$this->crawler->setCrawlProfile(new CrawlAllUrls());
}
public function setUserAgent($agent){
$this->crawler->setUserAgent($agent);
} }
public function crawl($url){ public function crawl($url){

View File

@@ -34,7 +34,7 @@ class CrawlerTest extends TestCase{
$this->url.'/notFound' => ['code' => 404], $this->url.'/notFound' => ['code' => 404],
$this->url.'/redirect1' => ['code' => 302], $this->url.'/redirect1' => ['code' => 302],
$this->url.'/redirect2' => ['code' => 302], $this->url.'/redirect2' => ['code' => 302],
$this->url.'/redirectLoop' => ['code' => '---'], $this->url.'/redirectLoop' => ['code' => '302'],
$this->url.'/redirectToFound' => ['code' => 302 ], $this->url.'/redirectToFound' => ['code' => 302 ],
$this->url.'/redirectToNotFound' => ['code' => 302 ], $this->url.'/redirectToNotFound' => ['code' => 302 ],
$this->url.'/redirectToRedirectToNotFound' => ['code' => 302], $this->url.'/redirectToRedirectToNotFound' => ['code' => 302],
@@ -178,7 +178,7 @@ class CrawlerTest extends TestCase{
$crawler->crawl($this->url.'/redirectLoop'); $crawler->crawl($this->url.'/redirectLoop');
$sitemap=$crawler->getResults(); $sitemap=$crawler->getResults();
$this->assertTreeContains($sitemap,[ $this->assertTreeContains($sitemap,[
$this->url.'/redirectLoop' => ['code' => '---'], $this->url.'/redirectLoop' => ['code' => '302'],
], print_r($sitemap,true)); ], print_r($sitemap,true));
} }

View File

@@ -25,7 +25,7 @@ app.get('/', function (request, response) {
}); });
app.get('/externalLink', function (request, response) { app.get('/externalLink', function (request, response) {
response.end('<a href="http://example.com/"</a>'); response.end('<a href="http://example.com/">ext</a>');
}); });
app.get('/deeplink1', function (request, response) { app.get('/deeplink1', function (request, response) {