From 821d47e1bb8a5e3f14510a7cc6bbc30dd0152e9a Mon Sep 17 00:00:00 2001 From: Art Date: Sun, 3 May 2020 13:24:39 -0700 Subject: [PATCH] Allow passing null userAgent. --- .../webcrawlerverifier/DefaultKnownCrawlerDetector.java | 2 +- .../optimaize/webcrawlerverifier/KnownCrawlerDetector.java | 2 +- .../webcrawlerverifier/bots/KnownHostBotVerifier.java | 2 +- .../webcrawlerverifier/bots/KnownHostBotVerifierImpl.java | 6 ++++-- .../webcrawlerverifier/DefaultKnownCrawlerDetectorTest.java | 1 + 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetector.java b/src/main/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetector.java index d018d9b..24b6be7 100644 --- a/src/main/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetector.java +++ b/src/main/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetector.java @@ -22,7 +22,7 @@ public DefaultKnownCrawlerDetector(@NotNull List verifiers @NotNull @Override - public Optional detect(@NotNull String userAgent, @NotNull String ip) { + public Optional detect(String userAgent, @NotNull String ip) { for (KnownHostBotVerifier verifier : verifiers) { BotCheckerResult check = verifier.check(userAgent, ip); if (check != BotCheckerResult.IS_NOT) { diff --git a/src/main/java/com/optimaize/webcrawlerverifier/KnownCrawlerDetector.java b/src/main/java/com/optimaize/webcrawlerverifier/KnownCrawlerDetector.java index a000f96..a212f7a 100644 --- a/src/main/java/com/optimaize/webcrawlerverifier/KnownCrawlerDetector.java +++ b/src/main/java/com/optimaize/webcrawlerverifier/KnownCrawlerDetector.java @@ -14,6 +14,6 @@ public interface KnownCrawlerDetector { * @return absent if none detected. */ @NotNull - Optional detect(@NotNull String userAgent, @NotNull String ip); + Optional detect(String userAgent, @NotNull String ip); } diff --git a/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifier.java b/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifier.java index d19ad99..581cf77 100644 --- a/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifier.java +++ b/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifier.java @@ -35,6 +35,6 @@ public interface KnownHostBotVerifier { * @return */ @NotNull - BotCheckerResult check(@NotNull String userAgent, @NotNull String ip); + BotCheckerResult check(String userAgent, @NotNull String ip); } diff --git a/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifierImpl.java b/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifierImpl.java index 74d626c..86d748d 100644 --- a/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifierImpl.java +++ b/src/main/java/com/optimaize/webcrawlerverifier/bots/KnownHostBotVerifierImpl.java @@ -8,6 +8,8 @@ import java.util.Set; +import static com.google.common.base.Strings.isNullOrEmpty; + /** * */ @@ -38,8 +40,8 @@ public String getIdentifier() { @Override @NotNull - public BotCheckerResult check(@NotNull String userAgent, @NotNull String ip) { - if (!crawlerData.getUserAgentChecker().apply(userAgent)) { + public BotCheckerResult check(String userAgent, @NotNull String ip) { + if (isNullOrEmpty(userAgent) || !crawlerData.getUserAgentChecker().apply(userAgent)) { return BotCheckerResult.IS_NOT; } else { Set permittedIps = crawlerData.getIps(); diff --git a/src/test/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetectorTest.java b/src/test/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetectorTest.java index e6983d5..3aca71f 100644 --- a/src/test/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetectorTest.java +++ b/src/test/java/com/optimaize/webcrawlerverifier/DefaultKnownCrawlerDetectorTest.java @@ -20,6 +20,7 @@ public class DefaultKnownCrawlerDetectorTest { public void none() throws Exception { DefaultKnownCrawlerDetector detector = all(); assertFalse(detector.detect("", "127.0.0.1").isPresent()); + assertFalse(detector.detect(null, "127.0.0.1").isPresent()); } /**