Skip to content

v0.10.0-filtering #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Upcoming changes...

## [0.10.0] - 2025-02-17
### Added
- Add support to skip rule
- Improve file filtering

## [0.9.0] - 2025-02-03
### Added
Expand Down Expand Up @@ -104,4 +108,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[0.7.1]: https://github.com/scanoss/scanoss.java/compare/v0.7.0...v0.7.1
[0.8.0]: https://github.com/scanoss/scanoss.java/compare/v0.7.1...v0.8.0
[0.8.1]: https://github.com/scanoss/scanoss.java/compare/v0.8.0...v0.8.1
[0.9.0]: https://github.com/scanoss/scanoss.java/compare/v0.8.1...v0.9.0
[0.9.0]: https://github.com/scanoss/scanoss.java/compare/v0.8.1...v0.9.0
[0.10.0]: https://github.com/scanoss/scanoss.java/compare/v0.9.0...v0.10.0
41 changes: 40 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.scanoss</groupId>
<artifactId>scanoss</artifactId>
<version>0.9.0</version>
<version>0.10.0</version>
<packaging>jar</packaging>
<name>scanoss.java</name>
<url>https://github.com/scanoss/scanoss.java</url>
Expand Down Expand Up @@ -114,7 +114,19 @@
<optional>true</optional>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.eclipse.jgit</groupId>
<artifactId>org.eclipse.jgit</artifactId>
<version>6.10.0.202406032230-r</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.10.15</version>
<scope>compile</scope>

</dependency>

<!-- Test dependencies -->
<dependency>
Expand Down Expand Up @@ -301,5 +313,32 @@
</plugins>
</build>
</profile>
<profile>
<id>local</id>
<distributionManagement>
<repository>
<id>local-repo</id>
<name>Local Repository</name>
<url>file://${user.home}/.m2/repository</url>
</repository>
</distributionManagement>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>${native.maven.plugin.version}</version>
<configuration>
<skip>false</skip>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<gpg.skip>true</gpg.skip>
<maven.javadoc.skip>true</maven.javadoc.skip>
<maven.source.skip>true</maven.source.skip>
</properties>
</profile>
</profiles>
</project>
160 changes: 66 additions & 94 deletions src/main/java/com/scanoss/Scanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@
import com.scanoss.dto.ScanFileResult;
import com.scanoss.exceptions.ScannerException;
import com.scanoss.exceptions.WinnowingException;
import com.scanoss.processor.FileProcessor;
import com.scanoss.processor.ScanFileProcessor;
import com.scanoss.processor.WfpFileProcessor;
import com.scanoss.filters.FilterConfig;
import com.scanoss.filters.factories.FileFilterFactory;
import com.scanoss.filters.factories.FolderFilterFactory;
import com.scanoss.processor.*;
import com.scanoss.rest.ScanApi;
import com.scanoss.settings.Settings;
import com.scanoss.settings.ScanossSettings;
import com.scanoss.utils.JsonUtils;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
import lombok.*;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
Expand All @@ -49,6 +48,7 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Predicate;

import static com.scanoss.ScanossConstants.*;

Expand All @@ -64,36 +64,59 @@
public class Scanner {
@Builder.Default
private Boolean skipSnippets = Boolean.FALSE; // Skip snippet generations

@Builder.Default
private Boolean allExtensions = Boolean.FALSE; // Fingerprint all file extensions

@Builder.Default
private Boolean obfuscate = Boolean.FALSE; // Obfuscate file path

@Builder.Default
private Boolean hpsm = Boolean.FALSE; // Enable High Precision Snippet Matching data collection

@Builder.Default
private Boolean hiddenFilesFolders = Boolean.FALSE; // Enable Scanning of hidden files/folders

@Builder.Default
private Boolean allFolders = Boolean.FALSE; // Enable Scanning of all folders (except hidden)

@Builder.Default
private Integer numThreads = DEFAULT_WORKER_THREADS; // Number of parallel threads to use when processing a folder

@Builder.Default
private Duration timeout = Duration.ofSeconds(DEFAULT_TIMEOUT); // API POST timeout

@Builder.Default
private Integer retryLimit = DEFAULT_HTTP_RETRY_LIMIT; // Retry limit for posting scan requests
private String url; // Alternative scanning URL
private String apiKey; // API key
private String scanFlags; // Scan flags to pass to the API
private String sbomType; // SBOM type (identify/ignore)
private String sbom; // SBOM to supply while scanning
private int snippetLimit; // Size limit for a single line of generated snippet
private String customCert; // Custom certificate
private Proxy proxy; // Proxy
private Winnowing winnowing;
private ScanApi scanApi;
private ScanFileProcessor scanFileProcessor;
private WfpFileProcessor wfpFileProcessor;
private Settings settings;
private ScannerPostProcessor postProcessor;

private final String url; // Alternative scanning URL
private final String apiKey; // API key
private final String scanFlags; // Scan flags to pass to the APIç
private final String sbomType; // SBOM type (identify/ignore)
private final String sbom; // SBOM to supply while scanning
private final int snippetLimit; // Size limit for a single line of generated snippet
private final String customCert; // Custom certificate
private final Proxy proxy; // Proxy
private final Winnowing winnowing;
private final ScanApi scanApi;
private final ScanFileProcessor scanFileProcessor;
private final WfpFileProcessor wfpFileProcessor;
private final ScanossSettings settings;
private final ScannerPostProcessor postProcessor;
private final FilterConfig filterConfig;
private Predicate<Path> fileFilter;
private Predicate<Path> folderFilter;

//TODO: Once this Lombok PR is merged https://github.com/projectlombok/lombok/pull/3723#pullrequestreview-2617412643
// Update Lombok dependency
public static class ScannerBuilder {
private ScannerBuilder folderFilter(Predicate<Path> folderFilter) {
return this;
}
private ScannerBuilder fileFilter(Predicate<Path> fileFilter) {
return this;
}
}

@SuppressWarnings("unused")
private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate, Boolean hpsm,
Expand All @@ -102,7 +125,9 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
Integer snippetLimit, String customCert, Proxy proxy,
Winnowing winnowing, ScanApi scanApi,
ScanFileProcessor scanFileProcessor, WfpFileProcessor wfpFileProcessor,
Settings settings, ScannerPostProcessor postProcessor
ScanossSettings settings, ScannerPostProcessor postProcessor, FilterConfig filterConfig,
Predicate<Path> fileFilter,
Predicate<Path> folderFilter
) {
this.skipSnippets = skipSnippets;
this.allExtensions = allExtensions;
Expand Down Expand Up @@ -134,9 +159,20 @@ private Scanner(Boolean skipSnippets, Boolean allExtensions, Boolean obfuscate,
this.wfpFileProcessor = Objects.requireNonNullElseGet(wfpFileProcessor, () -> WfpFileProcessor.builder()
.winnowing(this.winnowing)
.build());
this.settings = Objects.requireNonNullElseGet(settings, () -> Settings.builder().build());
this.settings = Objects.requireNonNullElseGet(settings, () -> ScanossSettings.builder().build());
this.postProcessor = Objects.requireNonNullElseGet(postProcessor, () ->
ScannerPostProcessor.builder().build()); }
ScannerPostProcessor.builder().build());

this.filterConfig = Objects.requireNonNullElseGet(filterConfig, () -> FilterConfig.builder()
.allFolders(allFolders)
.allExtensions(allExtensions)
.hiddenFilesFolders(hiddenFilesFolders)
.gitIgnorePatterns(this.settings.getScanningIgnorePattern())
.build());

this.fileFilter = Objects.requireNonNullElseGet(fileFilter , () -> FileFilterFactory.build(this.filterConfig));
this.folderFilter = Objects.requireNonNullElseGet(folderFilter, () -> FolderFilterFactory.build(this.filterConfig));
}

/**
* Generate a WFP/Fingerprint for the given file
Expand All @@ -157,70 +193,6 @@ public String wfpFile(@NonNull String filename) throws ScannerException, Winnowi
return this.winnowing.wfpForFile(filename, filename);
}

/**
* Determine if a folder should be processed or not
*
* @param name folder/directory to review
* @return <code>true</code> if the folder should be skipped, <code>false</code> otherwise
*/
private Boolean filterFolder(String name) {
String nameLower = name.toLowerCase();
if (!hiddenFilesFolders && name.startsWith(".") && !name.equals(".")) {
log.trace("Skipping hidden folder: {}", name);
return true;
}
boolean ignore = false;
if (!allFolders) { // skip this check if all folders is selected
for (String ending : ScanossConstants.FILTERED_DIRS) {
if (nameLower.endsWith(ending)) {
log.trace("Skipping folder due to ending: {} - {}", name, ending);
ignore = true;
}
}
if(!ignore){
for (String ending : ScanossConstants.FILTERED_DIR_EXT) {
if (nameLower.endsWith(ending)) {
log.trace("Skipping folder due to ending: {} - {}", name, ending);
ignore = true;
}
}
}
}
return ignore;
}

/**
* Determine if a file should be processed or not
*
* @param name filename to review
* @return <code>true</code> if the file should be skipped, <code>false</code> otherwise
*/
private Boolean filterFile(String name) {
// Skip hidden files unless explicitly asked to read them
if (!hiddenFilesFolders && name.startsWith(".")) {
log.trace("Skipping hidden file: {}", name);
return true;
}
// Process all file extensions if requested
if (this.allExtensions) {
log.trace("Processing all file extensions: {}", name);
return false;
}
// Skip some specific files
if (ScanossConstants.FILTERED_FILES.contains(name)) {
log.trace("Skipping specific file: {}", name);
return true;
}
// Skip specific file endings/extensions
for (String ending : ScanossConstants.FILTERED_EXTENSIONS) {
if (name.endsWith(ending)) {
log.trace("Skipping file due to ending: {} - {}", name, ending);
return true;
}
}
return false;
}

/**
* Strip the leading string from the specified path
*
Expand Down Expand Up @@ -262,17 +234,16 @@ public List<String> processFolder(@NonNull String folder, FileProcessor processo
Files.walkFileTree(Paths.get(folder), new SimpleFileVisitor<>() {
@Override
public FileVisitResult preVisitDirectory(Path file, BasicFileAttributes attrs) {
String nameLower = file.getFileName().toString().toLowerCase();
if (attrs.isDirectory() && filterFolder(nameLower)) {
if(folderFilter.test(file)) {
log.debug("Processing file: {}", file.getFileName().toString());
return FileVisitResult.SKIP_SUBTREE; // Skip the rest of this directory tree
}
return FileVisitResult.CONTINUE;
}

@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
String nameLower = file.getFileName().toString().toLowerCase();
if (attrs.isRegularFile() && !filterFile(nameLower) && attrs.size() > 0) {
if (attrs.isRegularFile() && !fileFilter.test(file) && attrs.size() > 0) {
String filename = file.toString();
Future<String> future = executorService.submit(() -> processor.process(filename, stripDirectory(folder, filename)));
futures.add(future);
Expand Down Expand Up @@ -321,7 +292,8 @@ public List<String> processFileList(@NonNull String root, @NonNull List<String>
Path path = Path.of(file);
boolean skipDir = false;
for (Path p : path) {
if (filterFolder(p.toString().toLowerCase())) { // should we skip this folder or not
// should we skip this folder or not
if (this.folderFilter.test(p)) { // should we skip this folder or not
skipDir = true;
break;
}
Expand All @@ -330,7 +302,7 @@ public List<String> processFileList(@NonNull String root, @NonNull List<String>
continue; // skip this file as the folder is not allowed
}
String nameLower = path.getFileName().toString().toLowerCase();
if (!filterFile(nameLower)) {
if (!this.fileFilter.test(path)) {
Path fullPath = Path.of(root, file);
File f = fullPath.toFile();
if (f.exists() && f.isFile() && f.length() > 0 && ! Files.isSymbolicLink(fullPath)) {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/scanoss/ScannerPostProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ private ScanFileDetails createUpdatedResultDetails(ScanFileDetails existingCompo
.file(existingComponent.getFile())
.fileHash(existingComponent.getFileHash())
.fileUrl(existingComponent.getFileUrl())
.lines(existingComponent.getLines())
.purls(new String[]{newPurl.toString()})
.component(newPurl.getName())
.vendor(newPurl.getNamespace())
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/com/scanoss/ScanossConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public class ScanossConstants {
static final int MAX_LONG_LINE_CHARS = 1000; // Maximum length of a single source line to be considered source code

// File extensions to ignore snippets for
static final List<String> SKIP_SNIPPET_EXT = Arrays.asList(
public static final List<String> SKIP_SNIPPET_EXT = Arrays.asList(
".exe", ".zip", ".tar", ".tgz", ".gz", ".7z", ".rar", ".jar", ".war", ".ear", ".class", ".pyc",
".o", ".a", ".so", ".obj", ".dll", ".lib", ".out", ".app", ".bin",
".lst", ".dat", ".json", ".htm", ".html", ".xml", ".md", ".txt",
Expand All @@ -62,17 +62,17 @@ public class ScanossConstants {
);

// Folders to skip
static final List<String> FILTERED_DIRS = Arrays.asList(
public static final List<String> FILTERED_DIRS = Arrays.asList(
"nbproject", "nbbuild", "nbdist", "__pycache__", "venv", "_yardoc", "eggs", "wheels", "htmlcov",
"__pypackages__", "target"
);

// Folder endings to skip
static final List<String> FILTERED_DIR_EXT = List.of(".egg-info");
public static final List<String> FILTERED_DIR_EXT = List.of(".egg-info");


// File extensions to skip
static final List<String> FILTERED_EXTENSIONS = Arrays.asList(
public static final List<String> FILTERED_EXTENSIONS = Arrays.asList(
".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".ac", ".adoc", ".am",
".asciidoc", ".bmp", ".build", ".cfg", ".chm", ".class", ".cmake", ".cnf",
".conf", ".config", ".contributors", ".copying", ".crt", ".csproj", ".css",
Expand All @@ -95,7 +95,7 @@ public class ScanossConstants {
);

// Files to skip
static final List<String> FILTERED_FILES = Arrays.asList(
public static final List<String> FILTERED_FILES = Arrays.asList(
"gradlew", "gradlew.bat", "mvnw", "mvnw.cmd", "gradle-wrapper.jar", "maven-wrapper.jar",
"thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile"
);
Expand Down
Loading