Skip to content

Commit

Permalink
Add logs to MvnCLI and use dictionaries to improve perf on large repos (
Browse files Browse the repository at this point in the history
#1213)

* Add logs to MvnCLI and use dictionaries to improve perf on large repos
* Add cancellation token to MvnCLI command
  • Loading branch information
grvillic authored Jul 30, 2024
1 parent 80cff26 commit 80146ce
Show file tree
Hide file tree
Showing 15 changed files with 265 additions and 52 deletions.
4 changes: 4 additions & 0 deletions docs/detectors/maven.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ Full dependency graph generation is supported.
## Known limitations

Maven detection will not run if `mvn` is unavailable.

## Environment Variables

The environment variable `MvnCLIFileLevelTimeoutSeconds` is used to control the max execution time Mvn CLI is allowed to take per each `pom.xml` file. Default value, unbounded. This will restrict any spikes in scanning time caused by Mvn CLI during package restore. We suggest to restore the Maven packages beforehand, so that no network calls happen when executing "mvn dependency:tree" and the graph is captured quickly.
4 changes: 4 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ the given configurations are considered development dependencies.
If an entire lockfile will contain only dev dependencies, see `CD_GRADLE_DEV_LOCKFILES` above.

[1]: https://go.dev/ref/mod#go-mod-graph

## `MvnCLIFileLevelTimeoutSeconds`

When set to any positive integer value, it controls the max execution time Mvn CLI is allowed to take per each `pom.xml` file. Default behavior is unbounded.
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Microsoft.ComponentDetection.Common;

using System.Collections.Generic;
using System.Diagnostics;

[DebuggerDisplay("{Name}")]
public class DirectoryItemFacadeOptimized
{
public string Name { get; set; }

public HashSet<string> FileNames { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ private async Task<IndividualDetectorScanResult> ProcessAsync(IObservable<Proces
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
});

var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs);
var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs, cancellationToken);

await preprocessedObserbable.ForEachAsync(processRequest => processor.Post(processRequest));

Expand All @@ -135,7 +135,7 @@ private async Task<IndividualDetectorScanResult> ProcessAsync(IObservable<Proces
};
}

protected virtual Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
protected virtual Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
return Task.FromResult(processRequests);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ public GoComponentDetector(
public override int Version => 7;

protected override Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
// Filter out any go.sum process requests if the adjacent go.mod file is present and has a go version >= 1.17
var goModProcessRequests = processRequests.Where(processRequest =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ public GoComponentWithReplaceDetector(
public override int Version => 1;

protected override Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
// Filter out any go.sum process requests if the adjacent go.mod file is present and has a go version >= 1.17
var goModProcessRequests = processRequests.Where(processRequest =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ public IvyDetector(

public override IEnumerable<string> Categories => new[] { Enum.GetName(typeof(DetectorClass), DetectorClass.Maven) };

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
if (await this.IsAntLocallyAvailableAsync())
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Detectors.Maven;
namespace Microsoft.ComponentDetection.Detectors.Maven;

using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts.Internal;

Expand All @@ -9,7 +10,7 @@ public interface IMavenCommandService

Task<bool> MavenCLIExistsAsync();

Task GenerateDependenciesFileAsync(ProcessRequest processRequest);
Task GenerateDependenciesFileAsync(ProcessRequest processRequest, CancellationToken cancellationToken = default);

void ParseDependenciesFile(ProcessRequest processRequest);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ namespace Microsoft.ComponentDetection.Detectors.Maven;

using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Contracts.Internal;
using Microsoft.Extensions.Logging;

public class MavenCommandService : IMavenCommandService
{
private const string DetectorLogPrefix = "MvnCli detector";
internal const string MvnCLIFileLevelTimeoutSecondsEnvVar = "MvnCLIFileLevelTimeoutSeconds";
internal const string PrimaryCommand = "mvn";

internal const string MvnVersionArgument = "--version";
Expand All @@ -17,15 +20,18 @@ public class MavenCommandService : IMavenCommandService

private readonly ICommandLineInvocationService commandLineInvocationService;
private readonly IMavenStyleDependencyGraphParserService parserService;
private readonly IEnvironmentVariableService envVarService;
private readonly ILogger<MavenCommandService> logger;

public MavenCommandService(
ICommandLineInvocationService commandLineInvocationService,
IMavenStyleDependencyGraphParserService parserService,
IEnvironmentVariableService envVarService,
ILogger<MavenCommandService> logger)
{
this.commandLineInvocationService = commandLineInvocationService;
this.parserService = parserService;
this.envVarService = envVarService;
this.logger = logger;
}

Expand All @@ -36,16 +42,45 @@ public async Task<bool> MavenCLIExistsAsync()
return await this.commandLineInvocationService.CanCommandBeLocatedAsync(PrimaryCommand, AdditionalValidCommands, MvnVersionArgument);
}

public async Task GenerateDependenciesFileAsync(ProcessRequest processRequest)
public async Task GenerateDependenciesFileAsync(ProcessRequest processRequest, CancellationToken cancellationToken = default)
{
var cliFileTimeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
var timeoutSeconds = -1;
if (this.envVarService.DoesEnvironmentVariableExist(MvnCLIFileLevelTimeoutSecondsEnvVar)
&& int.TryParse(this.envVarService.GetEnvironmentVariable(MvnCLIFileLevelTimeoutSecondsEnvVar), out timeoutSeconds)
&& timeoutSeconds >= 0)
{
cliFileTimeout.CancelAfter(TimeSpan.FromSeconds(timeoutSeconds));
this.logger.LogInformation("{DetectorPrefix}: {TimeoutVar} var was set to {TimeoutSeconds} seconds.", DetectorLogPrefix, MvnCLIFileLevelTimeoutSecondsEnvVar, timeoutSeconds);
}

var pomFile = processRequest.ComponentStream;
this.logger.LogDebug("{DetectorPrefix}: Running \"dependency:tree\" on {PomFileLocation}", DetectorLogPrefix, pomFile.Location);

var cliParameters = new[] { "dependency:tree", "-B", $"-DoutputFile={this.BcdeMvnDependencyFileName}", "-DoutputType=text", $"-f{pomFile.Location}" };
var result = await this.commandLineInvocationService.ExecuteCommandAsync(PrimaryCommand, AdditionalValidCommands, cliParameters);

var result = await this.commandLineInvocationService.ExecuteCommandAsync(PrimaryCommand, AdditionalValidCommands, cancellationToken: cliFileTimeout.Token, cliParameters);

if (result.ExitCode != 0)
{
this.logger.LogDebug("Mvn execution failed for pom file: {PomFileLocation}", pomFile.Location);
this.logger.LogError("Mvn output: {MvnStdErr}", string.IsNullOrEmpty(result.StdErr) ? result.StdOut : result.StdErr);
processRequest.SingleFileComponentRecorder.RegisterPackageParseFailure(pomFile.Location);
this.logger.LogDebug("{DetectorPrefix}: execution failed for pom file: {PomFileLocation}", DetectorLogPrefix, pomFile.Location);
var errorMessage = string.IsNullOrWhiteSpace(result.StdErr) ? result.StdOut : result.StdErr;
var isErrorMessagePopulated = !string.IsNullOrWhiteSpace(errorMessage);

if (isErrorMessagePopulated)
{
this.logger.LogError("Mvn output: {MvnStdErr}", errorMessage);
processRequest.SingleFileComponentRecorder.RegisterPackageParseFailure(pomFile.Location);
}

if (timeoutSeconds != -1 && cliFileTimeout.IsCancellationRequested)
{
this.logger.LogWarning("{DetectorPrefix}: There was a timeout in {PomFileLocation} file. Increase it using {TimeoutVar} environment variable.", DetectorLogPrefix, pomFile.Location, MvnCLIFileLevelTimeoutSecondsEnvVar);
}
}
else
{
this.logger.LogDebug("{DetectorPrefix}: Execution of \"dependency:tree\" on {PomFileLocation} completed successfully", DetectorLogPrefix, pomFile.Location);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace Microsoft.ComponentDetection.Detectors.Maven;

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
Expand All @@ -17,6 +18,8 @@ namespace Microsoft.ComponentDetection.Detectors.Maven;

public class MvnCliComponentDetector : FileComponentDetector
{
private const string MavenManifest = "pom.xml";

private readonly IMavenCommandService mavenCommandService;

public MvnCliComponentDetector(
Expand All @@ -33,23 +36,28 @@ public MvnCliComponentDetector(

public override string Id => "MvnCli";

public override IList<string> SearchPatterns => new List<string> { "pom.xml" };
public override IList<string> SearchPatterns => new List<string> { MavenManifest };

public override IEnumerable<ComponentType> SupportedComponentTypes => new[] { ComponentType.Maven };

public override int Version => 3;
public override int Version => 4;

public override IEnumerable<string> Categories => new[] { Enum.GetName(typeof(DetectorClass), DetectorClass.Maven) };

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
private void LogDebugWithId(string message)
{
this.Logger.LogDebug("{DetectorId} detector: {Message}", this.Id, message);
}

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
if (!await this.mavenCommandService.MavenCLIExistsAsync())
{
this.Logger.LogDebug("Skipping maven detection as maven is not available in the local PATH.");
this.LogDebugWithId("Skipping maven detection as maven is not available in the local PATH.");
return Enumerable.Empty<ProcessRequest>().ToObservable();
}

var processPomFile = new ActionBlock<ProcessRequest>(this.mavenCommandService.GenerateDependenciesFileAsync);
var processPomFile = new ActionBlock<ProcessRequest>(x => this.mavenCommandService.GenerateDependenciesFileAsync(x, cancellationToken));

await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest =>
{
Expand All @@ -60,6 +68,8 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest =>

await processPomFile.Completion;

this.LogDebugWithId($"Nested {MavenManifest} files processed successfully, retrieving generated dependency graphs.");

return this.ComponentStreamEnumerableFactory.GetComponentStreams(this.CurrentScanRequest.SourceDirectory, new[] { this.mavenCommandService.BcdeMvnDependencyFileName }, this.CurrentScanRequest.DirectoryExclusionPredicate)
.Select(componentStream =>
{
Expand All @@ -76,7 +86,7 @@ await this.RemoveNestedPomXmls(processRequests).ForEachAsync(processRequest =>
Pattern = componentStream.Pattern,
},
SingleFileComponentRecorder = this.ComponentRecorder.CreateSingleFileComponentRecorder(
Path.Combine(Path.GetDirectoryName(componentStream.Location), "pom.xml")),
Path.Combine(Path.GetDirectoryName(componentStream.Location), MavenManifest)),
};
})
.ToObservable();
Expand All @@ -93,68 +103,57 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID

private IObservable<ProcessRequest> RemoveNestedPomXmls(IObservable<ProcessRequest> componentStreams)
{
var directoryItemFacades = new List<DirectoryItemFacade>();
var directoryItemFacadesByPath = new Dictionary<string, DirectoryItemFacade>();
var directoryItemFacades = new ConcurrentDictionary<string, DirectoryItemFacadeOptimized>(StringComparer.OrdinalIgnoreCase);
var topLevelDirectories = new ConcurrentDictionary<string, DirectoryItemFacadeOptimized>(StringComparer.OrdinalIgnoreCase);

return Observable.Create<ProcessRequest>(s =>
{
return componentStreams.Subscribe(
processRequest =>
{
var item = processRequest.ComponentStream;
var currentDir = item.Location;
DirectoryItemFacade last = null;
do
DirectoryItemFacadeOptimized last = null;
while (!string.IsNullOrWhiteSpace(currentDir))
{
currentDir = Path.GetDirectoryName(currentDir);

// We've reached the top / root
if (currentDir == null)
if (string.IsNullOrWhiteSpace(currentDir))
{
// If our last directory isn't in our list of top level nodes, it should be added. This happens for the first processed item and then subsequent times we have a new root (edge cases with multiple hard drives, for example)
if (!directoryItemFacades.Contains(last))
if (last != null && !topLevelDirectories.ContainsKey(last.Name))
{
directoryItemFacades.Add(last);
topLevelDirectories.TryAdd(last.Name, last);
}

this.LogDebugWithId($"Discovered {item.Location}.");

// If we got to the top without finding a directory that had a pom.xml on the way, we yield.
s.OnNext(processRequest);
break;
}

var directoryExisted = directoryItemFacadesByPath.TryGetValue(currentDir, out var current);
if (!directoryExisted)
var current = directoryItemFacades.GetOrAdd(currentDir, _ => new DirectoryItemFacadeOptimized
{
directoryItemFacadesByPath[currentDir] = current = new DirectoryItemFacade
{
Name = currentDir,
Files = new List<IComponentStream>(),
Directories = new List<DirectoryItemFacade>(),
};
}

// If we came from a directory, we add it to our graph.
if (last != null)
{
current.Directories.Add(last);
}
Name = currentDir,
FileNames = new HashSet<string>(),
});

// If we didn't come from a directory, it's because we're just getting started. Our current directory should include the file that led to it showing up in the graph.
else
if (last == null)
{
current.Files.Add(item);
current.FileNames.Add(Path.GetFileName(item.Location));
}

if (last != null && current.Files.FirstOrDefault(x => string.Equals(Path.GetFileName(x.Location), "pom.xml", StringComparison.OrdinalIgnoreCase)) != null)
if (last != null && current.FileNames.Contains(MavenManifest))
{
this.Logger.LogDebug("Ignoring pom.xml at {ChildPomXmlLocation}, as it has a parent pom.xml that will be processed at {ParentDirName}\\pom.xml .", item.Location, current.Name);
this.LogDebugWithId($"Ignoring {MavenManifest} at {item.Location}, as it has a parent {MavenManifest} that will be processed at {current.Name}\\{MavenManifest} .");
break;
}

last = current;
}

// Go all the way up
while (currentDir != null);
},
s.OnCompleted);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ protected abstract bool TryEnqueueFirstLevelDependencies(
TypedComponent parentComponent = null,
bool skipValidation = false);

protected override Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs) =>
protected override Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default) =>
Task.FromResult(this.RemoveNodeModuleNestedFiles(processRequests)
.Where(pr =>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ public PipComponentDetector(

public override int Version { get; } = 12;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(
IObservable<ProcessRequest> processRequests,
IDictionary<string, string> detectorArgs,
CancellationToken cancellationToken = default)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private enum PipReportOverrideBehavior

protected override bool EnableParallelism { get; set; } = true;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PipExePath", out var pipExePath);
if (!await this.pipCommandService.PipExistsAsync(pipExePath))
Expand Down
Loading

0 comments on commit 80146ce

Please sign in to comment.