From a2681a0aef5c7858609915cc020f2e52d705a3cf Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 12 Nov 2025 10:14:09 +0000 Subject: [PATCH] Add binary artifact detection feature This commit adds the ability to detect and flag binary artifacts in git repositories. Changes: - Add isBinaryFile() method in FileUtils.java that detects binary files by checking if the first 16KiB contains zero bytes - Add matchesAnyPattern() method to support pattern-based exclusions - Add binary-exclude configuration option in dictionary format to whitelist known binary files by name patterns - Update RunnableSigsLoader to load binary exclusion patterns from dictionary - Modify RunnableScanner to detect binary files before text scanning and flag them as BINARY_ARTIFACT if not excluded - Update README.md with documentation and examples for the new feature The feature allows users to: 1. Automatically detect binary files in repositories (files with zero bytes in first 16KiB) 2. Exclude known/legitimate binary files using regex patterns 3. Get warnings for unexpected binary artifacts that may have been accidentally committed --- README.md | 8 ++- .../cyberferret/async/RunnableScanner.java | 36 ++++++++++++++ .../cyberferret/async/RunnableSigsLoader.java | 27 ++++++++++ .../cyberferret/fxui/SceneBuilder.java | 1 + .../exadmin/cyberferret/utils/FileUtils.java | 49 +++++++++++++++++++ 5 files changed, 120 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index edc8582..7eca4dd 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,12 @@ Set VM options "--module-path "...\JDKs\javafx-sdk-24.0.1\lib" --add-modules ja # Reserved key 'VERSION' is used for users notifications only, may be skipped VERSION=1.1 -# All key names may be in 3 formats +# All key names may be in 4 formats # KEY_NAME=VALUE - means the ferret will search for VALUE-string case-insensitive, the VALUE-string will be converted to RegExp pattern '\bVALUE\b'. Note: all spaces inside will be replaced with '\\s+', all special chars (&, -, +) will be escaped by '\\' # KEY_NAME(regexp)=VALUE - means you have finally defined RegExp pattern, and it will be used as is # KEY_NAME(allowed)=VALUE - means you have defined exact string - which may be found during scanned, but must be treated as allowed. Actually no matter what key name will be used - the value is a global string. # KEY_NAME(exclude-ext)=VALUE1,VALUE2,etc.. - list of file extentions to be ignored for the "KEY_NAME" signature +# BINARY_ARTIFACTS(binary-exclude)=PATTERN1,PATTERN2,etc.. - list of regex patterns for binary files to exclude from detection (e.g., .*\.jar, .*\.png) # Notes: all key names must be unique Examples @@ -69,4 +70,9 @@ PASSW-003=qwerty123 IP-ADDR(regexp)=((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) IP-ADDR-1(allowed)=0.0.0.0 IP-ADDR-2(allowed)=127.0.0.1 + +# Binary Artifacts Detection +# The tool automatically detects binary files (files with zero bytes in first 16KiB) +# Use binary-exclude to whitelist known binary files by their name patterns +BINARY_ARTIFACTS(binary-exclude)=.*\\.jar,.*\\.png,.*\\.jpg,.*\\.gif,.*\\.zip,.*\\.tar,.*\\.gz,.*\\.exe,.*\\.dll,.*\\.so,.*\\.dylib ``` diff --git a/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java b/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java index 4b9553b..7490bc7 100644 --- a/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java +++ b/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java @@ -35,6 +35,7 @@ public class RunnableScanner extends ARunnable { private Map sigMap = null; private Map allowedSigMap = null; private Map> excludeExtMap = null; + private List binaryExcludePatterns = null; public RunnableScanner() { } @@ -51,6 +52,10 @@ public void setExcludeExtMap(Map> excludeExtMap) { this.excludeExtMap = excludeExtMap; } + public void setBinaryExcludePatterns(List binaryExcludePatterns) { + this.binaryExcludePatterns = binaryExcludePatterns; + } + public void setDirToScan(String dirToScan) { this.dirToScan = dirToScan; } @@ -212,6 +217,37 @@ private void scan(FoundPathItem pathItem, Path rootDir, ExcludeFileModel exclude if (pathItem.getType() == ItemType.DIRECTORY || pathItem.getType() == ItemType.SIGNATURE) return; Path filePath = pathItem.getFilePath(); + + // Check if file is binary + boolean isBinary = false; + try { + isBinary = FileUtils.isBinaryFile(filePath); + } catch (IOException ex) { + log.warn("Could not determine if file '{}' is binary. Treating as text.", filePath); + } + + // If file is binary, check if it's excluded + if (isBinary) { + boolean isExcluded = FileUtils.matchesAnyPattern(filePath, binaryExcludePatterns); + if (isExcluded) { + log.debug("Binary file '{}' is excluded from detection", filePath); + return; + } + + // Flag the binary artifact + FoundPathItem binaryItem = new FoundPathItem(filePath, ItemType.SIGNATURE, pathItem); + binaryItem.setVisualName("BINARY_ARTIFACT"); + binaryItem.setLineNumber(0); + binaryItem.setDisplayText("Binary file detected (contains zero bytes in first 16KiB)"); + binaryItem.setFoundString(filePath.getFileName().toString()); + + calculateIgnoreFlagState(binaryItem, pathItem, rootDir, excludeFileModel); + foundItemsContainer.addItem(binaryItem); + log.warn("Binary artifact detected: {}", filePath); + return; + } + + // For text files, proceed with normal scanning String fileBody; try { fileBody = FileUtils.readFile(filePath); diff --git a/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java b/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java index ef93a55..c8398cf 100644 --- a/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java +++ b/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java @@ -23,6 +23,7 @@ public class RunnableSigsLoader extends ARunnable { private Map regExpMap; // map of signatures private Map allowedSignaturesMap; // effectively the list of exact strings which are allowed when capturing private Map> excludeExtsMap; // signature -> List of file extensions to ignore + private List binaryExcludePatterns; // list of file name patterns to exclude from binary detection private String dictionaryVersion = "undefined"; public void setFileToLoad(Path filePath) { @@ -47,6 +48,10 @@ public Map> getExcludeExtsMap() { return excludeExtsMap; } + public List getBinaryExcludePatterns() { + return binaryExcludePatterns; + } + public boolean isReady() { return isReady.get(); } @@ -68,6 +73,7 @@ public void _run() throws Exception { Map allowedSignaturesTmpMap = new HashMap<>(); Map> includeExt = new HashMap<>(); Map> excludeExtTmpMap = new HashMap<>(); + List binaryExcludePatternsTmp = new ArrayList<>(); for (Object key : properties.keySet()) { String sigId = key.toString(); @@ -93,6 +99,25 @@ public void _run() throws Exception { continue; } + // load binary file exclusion patterns + if (sigId.endsWith("(binary-exclude)")) { + String[] patterns = expression.split(","); + for (String patternStr : patterns) { + patternStr = patternStr.trim(); + if (!patternStr.isEmpty()) { + try { + Pattern pattern = Pattern.compile(patternStr); + binaryExcludePatternsTmp.add(pattern); + log.info("Binary exclusion pattern loaded: '{}'", patternStr); + } catch (PatternSyntaxException pse) { + log.error("Error while compiling binary exclusion pattern '{}'", patternStr, pse); + } + } + } + + continue; + } + if (sigId.endsWith("(allowed)")) { sigId = sigId.substring(0, sigId.length() - 9); @@ -106,9 +131,11 @@ public void _run() throws Exception { regExpMap = Collections.unmodifiableMap(regExpTmpMap); allowedSignaturesMap = Collections.unmodifiableMap(allowedSignaturesTmpMap); excludeExtsMap = Collections.unmodifiableMap(excludeExtTmpMap); + binaryExcludePatterns = Collections.unmodifiableList(binaryExcludePatternsTmp); log.info("Signatures are loaded successfully from {}. Number of signatures is {}", signaturesFile, regExpMap.size()); log.info("Number of allowed signatures is {}", allowedSignaturesMap.size()); + log.info("Number of binary exclusion patterns is {}", binaryExcludePatterns.size()); log.info("Dictionary version is {}", dictionaryVersion); isReady.set(true); diff --git a/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java b/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java index 10dab58..94ae361 100644 --- a/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java +++ b/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java @@ -145,6 +145,7 @@ protected TitledPane createOfflineDictionaryPane() { runnableScanner.setSignaturesMap(runnableSigsLoader.getRegExpMap()); runnableScanner.setAllowedSigMap(runnableSigsLoader.getAllowedSignaturesMap()); runnableScanner.setExcludeExtMap(runnableSigsLoader.getExcludeExtsMap()); + runnableScanner.setBinaryExcludePatterns(runnableSigsLoader.getBinaryExcludePatterns()); btnLoadSigs.setDisable(false); }); diff --git a/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java b/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java index 84bba77..ec02c94 100644 --- a/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java +++ b/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java @@ -1,9 +1,12 @@ package com.github.exadmin.cyberferret.utils; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; +import java.util.regex.Pattern; public class FileUtils { public static String readFile(String filePath) throws IOException { @@ -30,4 +33,50 @@ public static String getFileExtensionAsString(Path path) { ? fileName.substring(dotIndex + 1) : null; } + + /** + * Detects if a file is binary by checking if the first 16KiB contains any zero bytes. + * @param path the file path to check + * @return true if the file is detected as binary, false otherwise + * @throws IOException if an I/O error occurs + */ + public static boolean isBinaryFile(Path path) throws IOException { + if (!Files.isRegularFile(path)) { + return false; + } + + // Read first 16KiB + byte[] buffer = new byte[16 * 1024]; + try (InputStream is = Files.newInputStream(path)) { + int bytesRead = is.read(buffer); + if (bytesRead > 0) { + for (int i = 0; i < bytesRead; i++) { + if (buffer[i] == 0) { + return true; // Found zero byte, file is binary + } + } + } + } + return false; + } + + /** + * Checks if a file name matches any of the provided patterns. + * @param path the file path to check + * @param patterns list of regex patterns to match against the file name + * @return true if the file name matches any pattern, false otherwise + */ + public static boolean matchesAnyPattern(Path path, List patterns) { + if (path == null || patterns == null || patterns.isEmpty()) { + return false; + } + + String fileName = path.getFileName().toString(); + for (Pattern pattern : patterns) { + if (pattern.matcher(fileName).matches()) { + return true; + } + } + return false; + } }