From ab890d2cc595a4fac0afef4d1b691c60bb8d515e Mon Sep 17 00:00:00 2001 From: Johannes Pichler Date: Tue, 3 Oct 2017 09:40:58 +0200 Subject: [PATCH] add crawl_profile config to set custom CrawlProfile implementation (#94) * add crawl_profile config to set custom CrawlProfile implementation, fixes #90 * apply style ci fixes * add tests for custom crawl profile * apply fixes from style ci * remove empty line --- README.md | 6 ++++ config/sitemap.php | 7 ++++ src/SitemapGenerator.php | 7 ++-- src/SitemapServiceProvider.php | 5 +++ tests/CrawlProfileTest.php | 58 ++++++++++++++++++++++++++++++++++ tests/CustomCrawlProfile.php | 21 ++++++++++++ 6 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 tests/CrawlProfileTest.php create mode 100644 tests/CustomCrawlProfile.php diff --git a/README.md b/README.md index c100420..2a7ef09 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,12 @@ return [ */ 'chrome_binary_path' => '', + /* + * The sitemap generator uses a CrawlProfile implementation to determine + * which urls should be crawled for the sitemap. + */ + 'crawl_profile' => Profile::class, + ]; ``` diff --git a/config/sitemap.php b/config/sitemap.php index 2ad425a..8ffec69 100644 --- a/config/sitemap.php +++ b/config/sitemap.php @@ -1,6 +1,7 @@ null, + /* + * The sitemap generator uses a CrawlProfile implementation to determine + * which urls should be crawled for the sitemap. + */ + 'crawl_profile' => Profile::class, + ]; diff --git a/src/SitemapGenerator.php b/src/SitemapGenerator.php index 3c4d5a3..b065ad4 100644 --- a/src/SitemapGenerator.php +++ b/src/SitemapGenerator.php @@ -4,6 +4,7 @@ use Spatie\Crawler\Crawler; use Spatie\Sitemap\Tags\Url; +use Spatie\Crawler\CrawlProfile; use Spatie\Sitemap\Crawler\Profile; use Spatie\Sitemap\Crawler\Observer; use Spatie\Crawler\Url as CrawlerUrl; @@ -103,7 +104,7 @@ public function writeToFile(string $path) return $this; } - protected function getCrawlProfile(): Profile + protected function getCrawlProfile(): CrawlProfile { $shouldCrawl = function (CrawlerUrl $url) { if ($url->host !== CrawlerUrl::create($this->urlToBeCrawled)->host) { @@ -117,7 +118,9 @@ protected function getCrawlProfile(): Profile return ($this->shouldCrawl)($url); }; - return new Profile($shouldCrawl); + $profileClass = config('sitemap.crawl_profile', Profile::class); + + return app($profileClass, [$shouldCrawl]); } protected function getCrawlObserver(): Observer diff --git a/src/SitemapServiceProvider.php b/src/SitemapServiceProvider.php index a7204e9..446c745 100644 --- a/src/SitemapServiceProvider.php +++ b/src/SitemapServiceProvider.php @@ -3,6 +3,7 @@ namespace Spatie\Sitemap; use Spatie\Crawler\Crawler; +use Spatie\Sitemap\Crawler\Profile; use Illuminate\Support\ServiceProvider; class SitemapServiceProvider extends ServiceProvider @@ -27,6 +28,10 @@ public function boot() ->give(function () { return Crawler::create(config('sitemap.guzzle_options')); }); + + $this->app->bind(Profile::class, function ($app, $params) { + return new Profile(reset($params)); + }); } /** diff --git a/tests/CrawlProfileTest.php b/tests/CrawlProfileTest.php new file mode 100644 index 0000000..7dcb9b0 --- /dev/null +++ b/tests/CrawlProfileTest.php @@ -0,0 +1,58 @@ +crawler = $this->createMock(Crawler::class); + + $this->crawler->method('setCrawlObserver')->willReturn($this->crawler); + $this->crawler->method('setConcurrency')->willReturn($this->crawler); + } + + /** @test */ + public function it_should_use_the_default_crawl_profile() + { + $this->crawler + ->method('setCrawlProfile') + ->with($this->isInstanceOf(Profile::class)) + ->willReturn($this->crawler); + + $sitemapGenerator = new SitemapGenerator($this->crawler); + + $sitemap = $sitemapGenerator->getSitemap(); + + $this->assertInstanceOf(Sitemap::class, $sitemap); + } + + /** @test */ + public function it_should_use_a_custom_crawl_profile() + { + config(['sitemap.crawl_profile' => CustomCrawlProfile::class]); + + $this->crawler + ->method('setCrawlProfile') + ->with($this->isInstanceOf(CustomCrawlProfile::class)) + ->willReturn($this->crawler); + + $sitemapGenerator = new SitemapGenerator($this->crawler); + + $sitemap = $sitemapGenerator->getSitemap(); + + $this->assertInstanceOf(Sitemap::class, $sitemap); + } +} diff --git a/tests/CustomCrawlProfile.php b/tests/CustomCrawlProfile.php new file mode 100644 index 0000000..a6f1326 --- /dev/null +++ b/tests/CustomCrawlProfile.php @@ -0,0 +1,21 @@ +