Skip to content

Commit

Permalink
add crawl_profile config to set custom CrawlProfile implementation (#94)
Browse files Browse the repository at this point in the history
* add crawl_profile config to set custom CrawlProfile implementation, fixes #90

* apply style ci fixes

* add tests for custom crawl profile

* apply fixes from style ci

* remove empty line
  • Loading branch information
fetzi authored and freekmurze committed Oct 3, 2017
1 parent 2a5d6f7 commit ab890d2
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 2 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ return [
*/
'chrome_binary_path' => '',

/*
* The sitemap generator uses a CrawlProfile implementation to determine
* which urls should be crawled for the sitemap.
*/
'crawl_profile' => Profile::class,

];
```

Expand Down
7 changes: 7 additions & 0 deletions config/sitemap.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<?php

use GuzzleHttp\RequestOptions;
use Spatie\Sitemap\Crawler\Profile;

return [

Expand Down Expand Up @@ -47,4 +48,10 @@
*/
'chrome_binary_path' => null,

/*
* The sitemap generator uses a CrawlProfile implementation to determine
* which urls should be crawled for the sitemap.
*/
'crawl_profile' => Profile::class,

];
7 changes: 5 additions & 2 deletions src/SitemapGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Spatie\Crawler\Crawler;
use Spatie\Sitemap\Tags\Url;
use Spatie\Crawler\CrawlProfile;
use Spatie\Sitemap\Crawler\Profile;
use Spatie\Sitemap\Crawler\Observer;
use Spatie\Crawler\Url as CrawlerUrl;
Expand Down Expand Up @@ -103,7 +104,7 @@ public function writeToFile(string $path)
return $this;
}

protected function getCrawlProfile(): Profile
protected function getCrawlProfile(): CrawlProfile
{
$shouldCrawl = function (CrawlerUrl $url) {
if ($url->host !== CrawlerUrl::create($this->urlToBeCrawled)->host) {
Expand All @@ -117,7 +118,9 @@ protected function getCrawlProfile(): Profile
return ($this->shouldCrawl)($url);
};

return new Profile($shouldCrawl);
$profileClass = config('sitemap.crawl_profile', Profile::class);

return app($profileClass, [$shouldCrawl]);
}

protected function getCrawlObserver(): Observer
Expand Down
5 changes: 5 additions & 0 deletions src/SitemapServiceProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Spatie\Sitemap;

use Spatie\Crawler\Crawler;
use Spatie\Sitemap\Crawler\Profile;
use Illuminate\Support\ServiceProvider;

class SitemapServiceProvider extends ServiceProvider
Expand All @@ -27,6 +28,10 @@ public function boot()
->give(function () {
return Crawler::create(config('sitemap.guzzle_options'));
});

$this->app->bind(Profile::class, function ($app, $params) {
return new Profile(reset($params));
});
}

/**
Expand Down
58 changes: 58 additions & 0 deletions tests/CrawlProfileTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<?php

namespace Spatie\Sitemap\Test;

use Spatie\Crawler\Crawler;
use Spatie\Sitemap\Sitemap;
use Spatie\Sitemap\Crawler\Profile;
use Spatie\Sitemap\SitemapGenerator;

class CrawlProfileTest extends TestCase
{
/**
* @var Crawler
*/
private $crawler;

public function setUp()
{
parent::setUp();

$this->crawler = $this->createMock(Crawler::class);

$this->crawler->method('setCrawlObserver')->willReturn($this->crawler);
$this->crawler->method('setConcurrency')->willReturn($this->crawler);
}

/** @test */
public function it_should_use_the_default_crawl_profile()
{
$this->crawler
->method('setCrawlProfile')
->with($this->isInstanceOf(Profile::class))
->willReturn($this->crawler);

$sitemapGenerator = new SitemapGenerator($this->crawler);

$sitemap = $sitemapGenerator->getSitemap();

$this->assertInstanceOf(Sitemap::class, $sitemap);
}

/** @test */
public function it_should_use_a_custom_crawl_profile()
{
config(['sitemap.crawl_profile' => CustomCrawlProfile::class]);

$this->crawler
->method('setCrawlProfile')
->with($this->isInstanceOf(CustomCrawlProfile::class))
->willReturn($this->crawler);

$sitemapGenerator = new SitemapGenerator($this->crawler);

$sitemap = $sitemapGenerator->getSitemap();

$this->assertInstanceOf(Sitemap::class, $sitemap);
}
}
21 changes: 21 additions & 0 deletions tests/CustomCrawlProfile.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

namespace Spatie\Sitemap\Test;

use Spatie\Crawler\Url;
use Spatie\Crawler\CrawlProfile;

class CustomCrawlProfile implements CrawlProfile
{
/**
* Determine if the given url should be crawled.
*
* @param \Spatie\Crawler\Url $url
*
* @return bool
*/
public function shouldCrawl(Url $url): bool
{
return true;
}
}

0 comments on commit ab890d2

Please sign in to comment.