diff --git a/api/maven-api-core/src/main/java/org/apache/maven/api/SourceRoot.java b/api/maven-api-core/src/main/java/org/apache/maven/api/SourceRoot.java index 0b77dbcec513..b99767523657 100644 --- a/api/maven-api-core/src/main/java/org/apache/maven/api/SourceRoot.java +++ b/api/maven-api-core/src/main/java/org/apache/maven/api/SourceRoot.java @@ -20,6 +20,7 @@ import java.nio.file.Path; import java.nio.file.PathMatcher; +import java.util.Collection; import java.util.List; import java.util.Optional; @@ -46,23 +47,47 @@ default Path directory() { } /** - * {@return the list of pattern matchers for the files to include}. + * {@return the list of patterns for the files to include}. + * The path separator is {@code /} on all platforms, including Windows. + * The prefix before the {@code :} character, if present and longer than 1 character, is the syntax. + * If no syntax is specified, or if its length is 1 character (interpreted as a Windows drive), + * the default is a Maven-specific variation of the {@code "glob"} pattern. + * + *

* The default implementation returns an empty list, which means to apply a language-dependent pattern. * For example, for the Java language, the pattern includes all files with the {@code .java} suffix. + * + * @see java.nio.file.FileSystem#getPathMatcher(String) */ - default List includes() { + default List includes() { return List.of(); } /** - * {@return the list of pattern matchers for the files to exclude}. + * {@return the list of patterns for the files to exclude}. * The exclusions are applied after the inclusions. * The default implementation returns an empty list. */ - default List excludes() { + default List excludes() { return List.of(); } + /** + * {@return a matcher combining the include and exclude patterns}. + * If the user did not specified any includes, the given {@code defaultIncludes} are used. + * These defaults depend on the plugin. + * For example, the default include of the Java compiler plugin is "**/*.java". + * + *

If the user did not specified any excludes, the default can be files generated + * by Source Code Management (SCM) software or by the operating system. + * Examples: "**/.gitignore", "**/.DS_Store".

+ * + * @param defaultIncludes the default includes if unspecified by the user + * @param useDefaultExcludes whether to add the default set of patterns to exclude, + * mostly Source Code Management (SCM) files + */ + PathMatcher matcher(Collection defaultIncludes, boolean useDefaultExcludes); + /** * {@return in which context the source files will be used}. * Not to be confused with dependency scope. diff --git a/impl/maven-core/src/main/java/org/apache/maven/project/MavenProject.java b/impl/maven-core/src/main/java/org/apache/maven/project/MavenProject.java index 5e7e48059fc3..758b9936e8c0 100644 --- a/impl/maven-core/src/main/java/org/apache/maven/project/MavenProject.java +++ b/impl/maven-core/src/main/java/org/apache/maven/project/MavenProject.java @@ -822,8 +822,8 @@ public boolean add(Resource resource) { private static Resource toResource(SourceRoot sourceRoot) { return new Resource(org.apache.maven.api.model.Resource.newBuilder() .directory(sourceRoot.directory().toString()) - .includes(sourceRoot.includes().stream().map(Object::toString).toList()) - .excludes(sourceRoot.excludes().stream().map(Object::toString).toList()) + .includes(sourceRoot.includes()) + .excludes(sourceRoot.excludes()) .filtering(Boolean.toString(sourceRoot.stringFiltering())) .build()); } diff --git a/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultSourceRoot.java b/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultSourceRoot.java index cc24ee6089d6..376c579e2880 100644 --- a/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultSourceRoot.java +++ b/impl/maven-impl/src/main/java/org/apache/maven/impl/DefaultSourceRoot.java @@ -18,9 +18,9 @@ */ package org.apache.maven.impl; -import java.nio.file.FileSystem; import java.nio.file.Path; import java.nio.file.PathMatcher; +import java.util.Collection; import java.util.List; import java.util.Objects; import java.util.Optional; @@ -39,9 +39,9 @@ public final class DefaultSourceRoot implements SourceRoot { private final Path directory; - private final List includes; + private final List includes; - private final List excludes; + private final List excludes; private final ProjectScope scope; @@ -65,9 +65,8 @@ public final class DefaultSourceRoot implements SourceRoot { * @param source a source element from the model */ public DefaultSourceRoot(final Session session, final Path baseDir, final Source source) { - FileSystem fs = baseDir.getFileSystem(); - includes = matchers(fs, source.getIncludes()); - excludes = matchers(fs, source.getExcludes()); + includes = source.getIncludes(); + excludes = source.getExcludes(); stringFiltering = source.isStringFiltering(); enabled = source.isEnabled(); moduleName = nonBlank(source.getModule()); @@ -106,9 +105,8 @@ public DefaultSourceRoot(final Path baseDir, ProjectScope scope, Resource resour throw new IllegalArgumentException("Source declaration without directory value."); } directory = baseDir.resolve(value).normalize(); - FileSystem fs = directory.getFileSystem(); - includes = matchers(fs, resource.getIncludes()); - excludes = matchers(fs, resource.getExcludes()); + includes = resource.getIncludes(); + excludes = resource.getExcludes(); stringFiltering = Boolean.parseBoolean(resource.getFiltering()); enabled = true; moduleName = null; @@ -144,13 +142,15 @@ public DefaultSourceRoot(final ProjectScope scope, final Language language, fina * @param scope scope of source code (main or test) * @param language language of the source code * @param directory directory of the source code + * @param includes patterns for the files to include, or {@code null} or empty if unspecified + * @param excludes patterns for the files to exclude, or {@code null} or empty if nothing to exclude */ public DefaultSourceRoot( final ProjectScope scope, final Language language, final Path directory, - List includes, - List excludes) { + List includes, + List excludes) { this.scope = Objects.requireNonNull(scope); this.language = language; this.directory = Objects.requireNonNull(directory); @@ -176,38 +176,6 @@ private static String nonBlank(String value) { return value; } - /** - * Creates a path matcher for each pattern. - * The path separator is {@code /} on all platforms, including Windows. - * The prefix before the {@code :} character is the syntax. - * If no syntax is specified, {@code "glob"} is assumed. - * - * @param fs the file system of the root directory - * @param patterns the patterns for which to create path matcher - * @return a path matcher for each pattern - */ - private static List matchers(FileSystem fs, List patterns) { - final var matchers = new PathMatcher[patterns.size()]; - for (int i = 0; i < matchers.length; i++) { - String rawPattern = patterns.get(i); - String pattern = rawPattern.contains(":") ? rawPattern : "glob:" + rawPattern; - matchers[i] = new PathMatcher() { - final PathMatcher delegate = fs.getPathMatcher(pattern); - - @Override - public boolean matches(Path path) { - return delegate.matches(path); - } - - @Override - public String toString() { - return rawPattern; - } - }; - } - return List.of(matchers); - } - /** * {@return the root directory where the sources are stored}. */ @@ -217,23 +185,39 @@ public Path directory() { } /** - * {@return the list of pattern matchers for the files to include}. + * {@return the patterns for the files to include}. */ @Override @SuppressWarnings("ReturnOfCollectionOrArrayField") // Safe because unmodifiable - public List includes() { + public List includes() { return includes; } /** - * {@return the list of pattern matchers for the files to exclude}. + * {@return the patterns for the files to exclude}. */ @Override @SuppressWarnings("ReturnOfCollectionOrArrayField") // Safe because unmodifiable - public List excludes() { + public List excludes() { return excludes; } + /** + * {@return a matcher combining the include and exclude patterns}. + * + * @param defaultIncludes the default includes if unspecified by the user + * @param useDefaultExcludes whether to add the default set of patterns to exclude, + * mostly Source Code Management (SCM) files + */ + @Override + public PathMatcher matcher(Collection defaultIncludes, boolean useDefaultExcludes) { + Collection actual = includes(); + if (actual == null || actual.isEmpty()) { + actual = defaultIncludes; + } + return new PathSelector(directory(), actual, excludes(), useDefaultExcludes).simplify(); + } + /** * {@return in which context the source files will be used}. */ diff --git a/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java b/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java new file mode 100644 index 000000000000..7e2ff5cd5536 --- /dev/null +++ b/impl/maven-impl/src/main/java/org/apache/maven/impl/PathSelector.java @@ -0,0 +1,622 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.maven.impl; + +import java.io.File; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +/** + * Determines whether a path is selected according to include/exclude patterns. + * The pathnames used for method parameters will be relative to some base directory + * and use {@code '/'} as separator, regardless of the hosting operating system. + * + *

Syntax

+ * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character, + * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets + * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}). + * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long + * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior. + * This is implemented as the {@code "glob"} syntax with the following modifications: + * + *
    + *
  • The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}. + * Note that it means that the backslash cannot be used for escaping characters.
  • + *
  • Trailing {@code "/"} is completed as {@code "/**"}.
  • + *
  • The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". + * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.
  • + *
  • Bracket characters [ ] and { } are escaped.
  • + *
  • On Unix only, the escape character {@code '\\'} is itself escaped.
  • + *
+ * + * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern. + * Note that putting such a prefix is recommended anyway for better performances. + * + * @author Benjamin Bentmann + * @author Martin Desruisseaux + * + * @see java.nio.file.FileSystem#getPathMatcher(String) + */ +public class PathSelector implements PathMatcher { + /** + * Patterns which should be excluded by default, like SCM files. + * + *

Source: this list is copied from {@code plexus-utils-4.0.2} (released in + * September 23, 2024), class {@code org.codehaus.plexus.util.AbstractScanner}.

+ */ + private static final List DEFAULT_EXCLUDES = List.of( + // Miscellaneous typical temporary files + "**/*~", + "**/#*#", + "**/.#*", + "**/%*%", + "**/._*", + + // CVS + "**/CVS", + "**/CVS/**", + "**/.cvsignore", + + // RCS + "**/RCS", + "**/RCS/**", + + // SCCS + "**/SCCS", + "**/SCCS/**", + + // Visual SourceSafe + "**/vssver.scc", + + // MKS + "**/project.pj", + + // Subversion + "**/.svn", + "**/.svn/**", + + // Arch + "**/.arch-ids", + "**/.arch-ids/**", + + // Bazaar + "**/.bzr", + "**/.bzr/**", + + // SurroundSCM + "**/.MySCMServerInfo", + + // Mac + "**/.DS_Store", + + // Serena Dimensions Version 10 + "**/.metadata", + "**/.metadata/**", + + // Mercurial + "**/.hg", + "**/.hg/**", + + // git + "**/.git", + "**/.git/**", + "**/.gitignore", + + // BitKeeper + "**/BitKeeper", + "**/BitKeeper/**", + "**/ChangeSet", + "**/ChangeSet/**", + + // darcs + "**/_darcs", + "**/_darcs/**", + "**/.darcsrepo", + "**/.darcsrepo/**", + "**/-darcs-backup*", + "**/.darcs-temp-mail"); + + /** + * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax. + */ + private static final int MAVEN_SYNTAX_THRESHOLD = 1; + + /** + * The default syntax to use if none was specified. Note that when this default syntax is applied, + * the user-provided pattern get some changes as documented in class Javadoc. + */ + private static final String DEFAULT_SYNTAX = "glob:"; + + /** + * Characters having a special meaning in the glob syntax. + * + * @see FileSystem#getPathMatcher(String) + */ + private static final String SPECIAL_CHARACTERS = "*?[]{}\\"; + + /** + * A path matcher which accepts all files. + * + * @see #simplify() + */ + private static final PathMatcher INCLUDES_ALL = (path) -> true; + + /** + * String representations of the normalized include filters. + * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. + * + * @see #toString() + */ + private final String[] includePatterns; + + /** + * String representations of the normalized exclude filters. + * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. + * This array may be longer or shorter than the user-supplied excludes, depending on whether + * default excludes have been added and whether some unnecessary excludes have been omitted. + * + * @see #toString() + */ + private final String[] excludePatterns; + + /** + * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length. + */ + private final PathMatcher[] includes; + + /** + * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length. + */ + private final PathMatcher[] excludes; + + /** + * The matcher for all directories to include. This array includes the parents of all those directories, + * because they need to be accepted before we can walk to the sub-directories. + * This is an optimization for skipping whole directories when possible. + */ + private final PathMatcher[] dirIncludes; + + /** + * The matcher for directories to exclude. This array does not include the parent directories, + * because they may contain other sub-trees that need to be included. + * This is an optimization for skipping whole directories when possible. + */ + private final PathMatcher[] dirExcludes; + + /** + * The base directory. All files will be relativized to that directory before to be matched. + */ + private final Path baseDirectory; + + /** + * Creates a new selector from the given includes and excludes. + * + * @param directory the base directory of the files to filter + * @param includes the patterns of the files to include, or null or empty for including all files + * @param excludes the patterns of the files to exclude, or null or empty for no exclusion + * @param useDefaultExcludes whether to augment the excludes with a default set of SCM patterns + */ + public PathSelector( + Path directory, Collection includes, Collection excludes, boolean useDefaultExcludes) { + includePatterns = normalizePatterns(includes, false); + excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns, useDefaultExcludes), true); + baseDirectory = directory; + FileSystem system = directory.getFileSystem(); + this.includes = matchers(system, includePatterns); + this.excludes = matchers(system, excludePatterns); + dirIncludes = matchers(system, directoryPatterns(includePatterns, false)); + dirExcludes = matchers(system, directoryPatterns(excludePatterns, true)); + } + + /** + * Returns the given array of excludes, optionally expanded with a default set of excludes, + * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never + * match a file because there is no include which would accept a file that could match the exclude. + * For example, if the only include is {@code "*.java"}, then the "**/project.pj", + * "**/.DS_Store" and other excludes will never match a file and can be omitted. + * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements, + * removing unnecessary excludes can reduce a lot the number of matches tested on each source file. + * + *

Implementation note

+ * The removal of unnecessary excludes is done on a best effort basis. The current implementation + * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt. + * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do + * better in the future if benchmarking suggests that it would be worth the effort. + * + * @param excludes the user-specified excludes, potentially not yet converted to glob syntax + * @param includes the include patterns converted to glob syntax + * @param useDefaultExcludes whether to expand user exclude with the set of default excludes + * @return the potentially expanded or reduced set of excludes to use + */ + private static Collection effectiveExcludes( + Collection excludes, final String[] includes, final boolean useDefaultExcludes) { + if (excludes == null || excludes.isEmpty()) { + if (useDefaultExcludes) { + excludes = new ArrayList<>(DEFAULT_EXCLUDES); + } else { + return List.of(); + } + } else { + excludes = new ArrayList<>(excludes); + if (useDefaultExcludes) { + excludes.addAll(DEFAULT_EXCLUDES); + } + } + /* + * Get the prefixes and suffixes of all includes, stopping at the first special character. + * Redundant prefixes and suffixes are omitted. + */ + var prefixes = new String[includes.length]; + var suffixes = new String[includes.length]; + for (int i = 0; i < includes.length; i++) { + String include = includes[i]; + if (!include.startsWith(DEFAULT_SYNTAX)) { + return excludes; // Do not filter if at least one pattern is too complicated. + } + include = include.substring(DEFAULT_SYNTAX.length()); + prefixes[i] = prefixOrSuffix(include, false); + suffixes[i] = prefixOrSuffix(include, true); + } + prefixes = sortByLength(prefixes, false); + suffixes = sortByLength(suffixes, true); + /* + * Keep only the exclude which start with one of the prefixes and end with one of the suffixes. + * Note that a prefix or suffix may be the empty string, which match everything. + */ + final Iterator it = excludes.iterator(); + nextExclude: + while (it.hasNext()) { + final String exclude = it.next(); + final int s = exclude.indexOf(':'); + if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) { + if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) { + it.remove(); + } + } + } + return excludes; + } + + /** + * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern. + * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}. + * + * @param include the pattern for which to get a prefix or suffix without special character + * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired + */ + private static String prefixOrSuffix(final String include, boolean suffix) { + int s = suffix ? -1 : include.length(); + for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) { + char c = SPECIAL_CHARACTERS.charAt(i); + if (suffix) { + s = Math.max(s, include.lastIndexOf(c)); + } else { + int p = include.indexOf(c); + if (p >= 0 && p < s) { + s = p; + } + } + } + return suffix ? include.substring(s + 1) : include.substring(0, s); + } + + /** + * Returns {@code true} if the given exclude cannot match any include patterns. + * In case of doubt, returns {@code false}. + * + * @param exclude the exclude pattern to test + * @param fragments the prefixes or suffixes (fragments without special characters) of the includes + * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes + * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt + */ + private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) { + exclude = prefixOrSuffix(exclude, suffix); + for (String fragment : fragments) { + int fg = fragment.length(); + int ex = exclude.length(); + int length = Math.min(fg, ex); + if (suffix) { + fg -= length; + ex -= length; + } else { + fg = 0; + ex = 0; + } + if (exclude.regionMatches(ex, fragment, fg, length)) { + return false; + } + } + return true; + } + + /** + * Sorts the given patterns by their length. The main intent is to have the empty string first, + * while will cause the loops testing for prefixes and suffixes to stop almost immediately. + * Short prefixes or suffixes are also more likely to be matched. + * + * @param fragments the fragments to sort in-place + * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes + * @return the given array, or a smaller array if some fragments were discarded because redundant + */ + private static String[] sortByLength(final String[] fragments, final boolean suffix) { + Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length()); + int count = 0; + /* + * Simplify the array of prefixes or suffixes by removing all redundant elements. + * An element is redundant if there is a shorter prefix or suffix with the same characters. + */ + nextBase: + for (String fragment : fragments) { + for (int i = count; --i >= 0; ) { + String base = fragments[i]; + if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) { + continue nextBase; // Skip this fragment + } + } + fragments[count++] = fragment; + } + return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count); + } + + /** + * Returns the given array of patterns with path separator normalized to {@code '/'}. + * Null or empty patterns are ignored, and duplications are removed. + * + * @param patterns the patterns to normalize + * @param excludes whether the patterns are exclude patterns + * @return normalized patterns without null, empty or duplicated patterns + */ + private static String[] normalizePatterns(final Collection patterns, final boolean excludes) { + if (patterns == null || patterns.isEmpty()) { + return new String[0]; + } + // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. + final var normalized = new LinkedHashSet(patterns.size()); + for (String pattern : patterns) { + if (pattern != null && !pattern.isEmpty()) { + if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) { + pattern = pattern.replace(File.separatorChar, '/'); + if (pattern.endsWith("/")) { + pattern += "**"; + } + // Following are okay only when "**" means "0 or more directories". + while (pattern.endsWith("/**/**")) { + pattern = pattern.substring(0, pattern.length() - 3); + } + while (pattern.startsWith("**/**/")) { + pattern = pattern.substring(3); + } + pattern = pattern.replace("/**/**/", "/**/"); + pattern = pattern.replace("\\", "\\\\") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("{", "\\{") + .replace("}", "\\}"); + normalized.add(DEFAULT_SYNTAX + pattern); + /* + * If the pattern starts or ends with "**", Java GLOB expects a directory level at + * that location while Maven seems to consider that "**" can mean "no directory". + * Add another pattern for reproducing this effect. + */ + addPatternsWithOneDirRemoved(normalized, pattern, 0); + } else { + normalized.add(pattern); + } + } + } + return simplify(normalized, excludes); + } + + /** + * Adds all variants of the given pattern with {@code **} removed. + * This is used for simulating the Maven behavior where {@code "**} may match zero directory. + * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory. + * + * @param patterns where to add the derived patterns + * @param pattern the pattern for which to add derived forms, without the "glob:" syntax prefix + * @param end should be 0 (reserved for recursive invocations of this method) + */ + private static void addPatternsWithOneDirRemoved(final Set patterns, final String pattern, int end) { + final int length = pattern.length(); + int start; + while ((start = pattern.indexOf("**", end)) >= 0) { + end = start + 2; // 2 is the length of "**". + if (end < length) { + if (pattern.charAt(end) != '/') { + continue; + } + if (start == 0) { + end++; // Ommit the leading slash if there is nothing before it. + } + } + if (start > 0) { + if (pattern.charAt(--start) != '/') { + continue; + } + } + String reduced = pattern.substring(0, start) + pattern.substring(end); + patterns.add(DEFAULT_SYNTAX + reduced); + addPatternsWithOneDirRemoved(patterns, reduced, start); + } + } + + /** + * Applies some heuristic rules for simplifying the set of patterns, + * then returns the patterns as an array. + * + * @param patterns the patterns to simplify and return asarray + * @param excludes whether the patterns are exclude patterns + * @return the set content as an array, after simplification + */ + private static String[] simplify(Set patterns, boolean excludes) { + /* + * If the "**" pattern is present, it makes all other patterns useless. + * In the case of include patterns, an empty set means to include everything. + */ + if (patterns.remove("**")) { + patterns.clear(); + if (excludes) { + patterns.add("**"); + } + } + return patterns.toArray(String[]::new); + } + + /** + * Eventually adds the parent directory of the given patterns, without duplicated values. + * The patterns given to this method should have been normalized. + * + * @param patterns the normalized include or exclude patterns + * @param excludes whether the patterns are exclude patterns + * @return pattens of directories to include or exclude + */ + private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { + // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. + final var directories = new LinkedHashSet(patterns.length); + for (String pattern : patterns) { + if (pattern.startsWith(DEFAULT_SYNTAX)) { + if (excludes) { + if (pattern.endsWith("/**")) { + directories.add(pattern.substring(0, pattern.length() - 3)); + } + } else { + int s = pattern.indexOf(':'); + if (pattern.regionMatches(++s, "**/", 0, 3)) { + s = pattern.indexOf('/', s + 3); + if (s < 0) { + return new String[0]; // Pattern is "**", so we need to accept everything. + } + directories.add(pattern.substring(0, s)); + } + } + } + } + return simplify(directories, excludes); + } + + /** + * Creates the path matchers for the given patterns. + * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern. + */ + private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) { + final var matchers = new PathMatcher[patterns.length]; + for (int i = 0; i < patterns.length; i++) { + matchers[i] = fs.getPathMatcher(patterns[i]); + } + return matchers; + } + + /** + * {@return a potentially simpler matcher equivalent to this matcher}. + */ + @SuppressWarnings("checkstyle:MissingSwitchDefault") + public PathMatcher simplify() { + if (excludes.length == 0) { + switch (includes.length) { + case 0: + return INCLUDES_ALL; + case 1: + return includes[0]; + } + } + return this; + } + + /** + * Determines whether a path is selected. + * This is true if the given file matches an include pattern and no exclude pattern. + * + * @param path the pathname to test, must not be {@code null} + * @return {@code true} if the given path is selected, {@code false} otherwise + */ + @Override + public boolean matches(Path path) { + path = baseDirectory.relativize(path); + return (includes.length == 0 || isMatched(path, includes)) + && (excludes.length == 0 || !isMatched(path, excludes)); + } + + /** + * {@return whether the given file matches according to one of the given matchers}. + */ + private static boolean isMatched(Path path, PathMatcher[] matchers) { + for (PathMatcher matcher : matchers) { + if (matcher.matches(path)) { + return true; + } + } + return false; + } + + /** + * Determines whether a directory could contain selected paths. + * + * @param directory the directory pathname to test, must not be {@code null} + * @return {@code true} if the given directory might contain selected paths, {@code false} if the + * directory will definitively not contain selected paths + */ + public boolean couldHoldSelected(Path directory) { + if (baseDirectory.equals(directory)) { + return true; + } + directory = baseDirectory.relativize(directory); + return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) + && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); + } + + /** + * Appends the elements of the given array in the given buffer. + * This is a helper method for {@link #toString()} implementations. + * + * @param buffer the buffer to add the elements to + * @param label label identifying the array of elements to add + * @param patterns the elements to append, or {@code null} if none + */ + private static void append(StringBuilder buffer, String label, String[] patterns) { + buffer.append(label).append(": ["); + if (patterns != null) { + for (int i = 0; i < patterns.length; i++) { + if (i != 0) { + buffer.append(", "); + } + buffer.append(patterns[i]); + } + } + buffer.append(']'); + } + + /** + * {@return a string representation for logging purposes}. + */ + @Override + public String toString() { + var buffer = new StringBuilder(); + append(buffer, "includes", includePatterns); + append(buffer.append(", "), "excludes", excludePatterns); + return buffer.toString(); + } +} diff --git a/impl/maven-impl/src/test/java/org/apache/maven/impl/PathSelectorTest.java b/impl/maven-impl/src/test/java/org/apache/maven/impl/PathSelectorTest.java new file mode 100644 index 000000000000..81a971371993 --- /dev/null +++ b/impl/maven-impl/src/test/java/org/apache/maven/impl/PathSelectorTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.maven.impl; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class PathSelectorTest { + /** + * The temporary directory containing the files to test. + */ + private Path directory; + + /** + * The filtered set of paths. Created by {@link #filter()}. + */ + private Set filtered; + + /** + * Creates a temporary directory and checks its list of content based on patterns. + * + * @param tempDir temporary directory where to create a tree + * @throws IOException if an error occurred while creating a temporary file or directory + */ + @Test + public void testTree(final @TempDir Path tempDir) throws IOException { + directory = tempDir; + Path foo = Files.createDirectory(tempDir.resolve("foo")); + Path bar = Files.createDirectory(foo.resolve("bar")); + Path biz = Files.createDirectory(tempDir.resolve("biz")); + Files.createFile(tempDir.resolve("root.txt")); + Files.createFile(bar.resolve("leaf.txt")); + Files.createFile(biz.resolve("excluded.txt")); + + filter(""); + assertFilteredFilesContains("root.txt"); + assertFilteredFilesContains("foo/bar/leaf.txt"); + assertTrue(filtered.isEmpty(), filtered.toString()); + + filter("glob:"); + assertFilteredFilesContains("foo/bar/leaf.txt"); + assertTrue(filtered.isEmpty(), filtered.toString()); + } + + /** + * Creates the filtered paths in a modifiable set. + * The result is assigned to {@link #filtered}. + * + * @param syntax syntax to test, either an empty string of {@code "glob:"} + * @throws IOException if an error occurred while listing the files. + */ + private void filter(final String syntax) throws IOException { + var includes = List.of(syntax + "**/*.txt"); + var excludes = List.of(syntax + "biz/**"); + var matcher = new PathSelector(directory, includes, excludes, false); + filtered = new HashSet<>(Files.walk(directory).filter(matcher::matches).toList()); + } + + /** + * Asserts that the filtered set of paths contains the given item. + * If present, the path is removed from the collection of filtered files. + * It allows caller to verify that there are no unexpected elements remaining + * after all expected elements have been removed. + * + * @param path the path to test + */ + private void assertFilteredFilesContains(String path) { + assertTrue(filtered.remove(directory.resolve(path)), path); + } + + /** + * Tests the omission of unnecessary excludes. + * + * Note: at the time of writing this test (April 2025), the list of excludes go down from 40 to 17 elements. + * This is not bad, but we could do better with, for example, a special treatment of the excludes that are + * for excluding an entire directory. + */ + @Test + public void testExcludeOmission() { + directory = Path.of("dummy"); + var includes = List.of("**/*.java"); + var excludes = List.of("biz/**"); + var matcher = new PathSelector(directory, includes, excludes, true); + String s = matcher.toString(); + assertTrue(s.contains("glob:**/*.java")); + assertFalse(s.contains("project.pj")); // Unnecessary exclusion should have been omitted. + assertFalse(s.contains(".DS_Store")); + } +}