Skip to content

Commit 024e2a5

Browse files
authored
PipReport back to experimental, add pre-generated PipReport parsing (#1201)
* revert experiment graduation, bump threads, and enable fast deps * put reqs back * add ability for pip to detect pregenerated reports with a specific naming scheme * better directory handling * improve logging
1 parent dd3f531 commit 024e2a5

File tree

17 files changed

+10159
-36
lines changed

17 files changed

+10159
-36
lines changed

docs/detectors/pip.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ Serialization specifications:
1919
- https://peps.python.org/pep-0508/
2020
- https://peps.python.org/pep-0301/
2121

22+
The detector can also pick up installation reports that have already been generated in the same directory as the `setup.py` or `requirements.txt` files,
23+
as long as the report adheres to the following naming scheme: `component-detection-pip-report.json` or `*.component-detection-pip-report.json`
24+
2225
### Legacy Detection (PipDetector, SimplePipDetector)
2326

2427
Pip detection is performed by running the following code snippet on every *setup.py*:
@@ -60,3 +63,7 @@ The environment variable `PipReportOverrideBehavior` is used to override pip rep
6063
- `SourceCodeScan`: Scan `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph.
6164

6265
The environment variable `PipReportSkipFallbackOnFailure` is used to skip the default fallback behavior if pip report fails. Default behavior scans `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph.
66+
67+
The environment variable `PipReportFileLevelTimeoutSeconds` is used to control the timeout limit for generating the PipReport for individual files. This defaults to the overall timeout.
68+
69+
The environment variable `PipReportDisableFastDeps` is used to disable the fast deps feature in PipReport.

src/Microsoft.ComponentDetection.Contracts/ScanRequest.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class ScanRequest
1919
/// <param name="imagesToScan">Container images to scan.</param>
2020
/// <param name="componentRecorder">Detector component recorder.</param>
2121
/// <param name="maxThreads">Max number of threads to use for detection.</param>
22-
public ScanRequest(DirectoryInfo sourceDirectory, ExcludeDirectoryPredicate directoryExclusionPredicate, ILogger logger, IDictionary<string, string> detectorArgs, IEnumerable<string> imagesToScan, IComponentRecorder componentRecorder, int maxThreads = 3)
22+
public ScanRequest(DirectoryInfo sourceDirectory, ExcludeDirectoryPredicate directoryExclusionPredicate, ILogger logger, IDictionary<string, string> detectorArgs, IEnumerable<string> imagesToScan, IComponentRecorder componentRecorder, int maxThreads = 5)
2323
{
2424
this.SourceDirectory = sourceDirectory;
2525
this.DirectoryExclusionPredicate = directoryExclusionPredicate;

src/Microsoft.ComponentDetection.Detectors/pip/PipCommandService.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
1313

1414
public class PipCommandService : IPipCommandService
1515
{
16+
private const string PipReportDisableFastDepsEnvVar = "PipReportDisableFastDeps";
17+
1618
private readonly ICommandLineInvocationService commandLineInvocationService;
1719
private readonly IPathUtilityService pathUtilityService;
1820
private readonly IFileUtilityService fileUtilityService;
@@ -127,6 +129,11 @@ private async Task<bool> CanCommandBeLocatedAsync(string pipPath)
127129
pipReportCommand += $" --index-url {this.environmentService.GetEnvironmentVariable("PIP_INDEX_URL")}";
128130
}
129131

132+
if (!this.environmentService.DoesEnvironmentVariableExist(PipReportDisableFastDepsEnvVar) || !this.environmentService.IsEnvironmentVariableValueTrue(PipReportDisableFastDepsEnvVar))
133+
{
134+
pipReportCommand += $" --use-feature=fast-deps";
135+
}
136+
130137
this.logger.LogDebug("PipReport: Generating pip installation report for {Path} with command: {Command}", formattedPath, pipReportCommand.RemoveSensitiveInformation());
131138
command = await this.commandLineInvocationService.ExecuteCommandAsync(
132139
pipExecutable,

src/Microsoft.ComponentDetection.Detectors/pip/PipComponentDetector.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
1111
using Microsoft.ComponentDetection.Contracts.TypedComponent;
1212
using Microsoft.Extensions.Logging;
1313

14-
public class PipComponentDetector : FileComponentDetector, IDefaultOffComponentDetector
14+
public class PipComponentDetector : FileComponentDetector
1515
{
1616
private readonly IPythonCommandService pythonCommandService;
1717
private readonly IPythonResolver pythonResolver;
@@ -38,7 +38,7 @@ public PipComponentDetector(
3838

3939
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
4040

41-
public override int Version { get; } = 11;
41+
public override int Version { get; } = 12;
4242

4343
protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
4444
{

src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs

Lines changed: 102 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,16 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
1313
using Microsoft.ComponentDetection.Contracts.Internal;
1414
using Microsoft.ComponentDetection.Contracts.TypedComponent;
1515
using Microsoft.Extensions.Logging;
16+
using Newtonsoft.Json;
1617

17-
public class PipReportComponentDetector : FileComponentDetector
18+
public class PipReportComponentDetector : FileComponentDetector, IExperimentalDetector
1819
{
20+
// environment variables
1921
private const string PipReportOverrideBehaviorEnvVar = "PipReportOverrideBehavior";
2022
private const string PipReportSkipFallbackOnFailureEnvVar = "PipReportSkipFallbackOnFailure";
23+
private const string PipReportFileLevelTimeoutSecondsEnvVar = "PipReportFileLevelTimeoutSeconds";
24+
25+
private static readonly IList<string> PipReportPreGeneratedFilePatterns = new List<string> { "*.component-detection-pip-report.json", "component-detection-pip-report.json" };
2126

2227
/// <summary>
2328
/// The maximum version of the report specification that this detector can handle.
@@ -33,6 +38,7 @@ public class PipReportComponentDetector : FileComponentDetector
3338
private readonly IEnvironmentVariableService envVarService;
3439
private readonly IPythonCommandService pythonCommandService;
3540
private readonly IPythonResolver pythonResolver;
41+
private readonly IFileUtilityService fileUtilityService;
3642

3743
public PipReportComponentDetector(
3844
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
@@ -41,6 +47,7 @@ public PipReportComponentDetector(
4147
IEnvironmentVariableService envVarService,
4248
IPythonCommandService pythonCommandService,
4349
IPythonResolver pythonResolver,
50+
IFileUtilityService fileUtilityService,
4451
ILogger<PipReportComponentDetector> logger)
4552
{
4653
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
@@ -49,6 +56,7 @@ public PipReportComponentDetector(
4956
this.envVarService = envVarService;
5057
this.pythonCommandService = pythonCommandService;
5158
this.pythonResolver = pythonResolver;
59+
this.fileUtilityService = fileUtilityService;
5260
this.Logger = logger;
5361
}
5462

@@ -67,7 +75,7 @@ private enum PipReportOverrideBehavior
6775

6876
public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };
6977

70-
public override int Version { get; } = 4;
78+
public override int Version { get; } = 5;
7179

7280
protected override bool EnableParallelism { get; set; } = true;
7381

@@ -117,7 +125,7 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
117125
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
118126
var file = processRequest.ComponentStream;
119127

120-
FileInfo reportFile = null;
128+
List<FileInfo> reportFiles = new();
121129
try
122130
{
123131
var pipOverride = this.GetPipReportOverrideBehavior();
@@ -150,44 +158,103 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
150158
}
151159

152160
var stopwatch = Stopwatch.StartNew();
153-
this.Logger.LogInformation("PipReport: Generating pip installation report for {File}", file.Location);
154161

155-
// Call pip executable to generate the installation report of a given project file.
156-
(var report, reportFile) = await this.pipCommandService.GenerateInstallationReportAsync(file.Location, pipExePath, cancellationToken);
162+
// Search for a pre-generated pip report file in the same directory as the file being scanned.
163+
var fileParentDirectory = Path.GetDirectoryName(file.Location);
164+
if (fileParentDirectory is null)
165+
{
166+
this.Logger.LogWarning("PipReport: Unable to determine parent directory for {File}.", file.Location);
167+
return;
168+
}
169+
170+
var fileParentDirectoryInfo = Directory.Exists(fileParentDirectory)
171+
? new DirectoryInfo(fileParentDirectory)
172+
: null;
157173

158-
// The report version is used to determine how to parse the report. If it is greater
159-
// than the maximum supported version, there may be new fields and the parsing will fail.
160-
if (!int.TryParse(report.Version, out var reportVersion) || reportVersion > MaxReportVersion.Major)
174+
List<FileInfo> preGeneratedReportFiles = null;
175+
if (fileParentDirectoryInfo is not null)
161176
{
162-
this.Logger.LogWarning(
163-
"PipReport: The pip installation report version {ReportVersion} is not supported. The maximum supported version is {MaxVersion}.",
164-
report.Version,
165-
MaxReportVersion);
177+
preGeneratedReportFiles = PipReportPreGeneratedFilePatterns
178+
.SelectMany(pattern => fileParentDirectoryInfo.GetFiles(pattern))
179+
.Where(file => File.Exists(file.FullName))
180+
.ToList();
181+
}
182+
183+
List<PipInstallationReport> reports = new();
184+
if (preGeneratedReportFiles is not null && preGeneratedReportFiles.Any())
185+
{
186+
this.Logger.LogInformation("PipReport: Found pre-generated pip report(s) for {File}.", file.Location);
166187

167-
using var versionRecord = new InvalidParseVersionTelemetryRecord
188+
foreach (var existingReport in preGeneratedReportFiles)
168189
{
169-
DetectorId = this.Id,
170-
FilePath = file.Location,
171-
Version = report.Version,
172-
MaxVersion = MaxReportVersion.ToString(),
173-
};
190+
this.Logger.LogInformation("PipReport: Using pre-generated pip report '{ReportFile}' for package file '{File}'.", existingReport.FullName, file.Location);
191+
var reportOutput = await this.fileUtilityService.ReadAllTextAsync(existingReport);
192+
var report = JsonConvert.DeserializeObject<PipInstallationReport>(reportOutput);
193+
reports.Add(report);
194+
}
195+
}
196+
else
197+
{
198+
this.Logger.LogInformation("PipReport: Generating pip installation report for {File}", file.Location);
174199

175-
return;
200+
// create linked cancellation token that will cancel if the file level timeout is reached, or if the parent token is cancelled.
201+
// default to only using parent token if the env var is not set or is invalid
202+
var childCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
203+
if (this.envVarService.DoesEnvironmentVariableExist(PipReportFileLevelTimeoutSecondsEnvVar)
204+
&& int.TryParse(this.envVarService.GetEnvironmentVariable(PipReportFileLevelTimeoutSecondsEnvVar), out var timeoutSeconds))
205+
{
206+
childCts.CancelAfter(TimeSpan.FromSeconds(timeoutSeconds));
207+
}
208+
209+
// Call pip executable to generate the installation report of a given project file.
210+
(var report, var reportFile) = await this.pipCommandService.GenerateInstallationReportAsync(file.Location, pipExePath, childCts.Token);
211+
reports.Add(report);
212+
reportFiles.Add(reportFile);
176213
}
177214

178-
stopwatch.Stop();
179-
this.Logger.LogInformation(
180-
"PipReport: Generating pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
181-
file.Location,
182-
stopwatch.ElapsedMilliseconds / 1000.0,
183-
report.InstallItems?.Length ?? 0);
215+
if (!reports.Any())
216+
{
217+
this.Logger.LogWarning("PipReport: Failed to generate or find pip installation report for {File}.", file.Location);
218+
return;
219+
}
184220

185-
// Now that all installed packages are known, we can build a graph of the dependencies.
186-
if (report.InstallItems is not null)
221+
foreach (var report in reports)
187222
{
188-
var graph = this.BuildGraphFromInstallationReport(report);
189-
this.RecordComponents(singleFileComponentRecorder, graph);
223+
// The report version is used to determine how to parse the report. If it is greater
224+
// than the maximum supported version, there may be new fields and the parsing will fail.
225+
if (!int.TryParse(report.Version, out var reportVersion) || reportVersion > MaxReportVersion.Major)
226+
{
227+
this.Logger.LogWarning(
228+
"PipReport: The pip installation report version {ReportVersion} is not supported. The maximum supported version is {MaxVersion}.",
229+
report.Version,
230+
MaxReportVersion);
231+
232+
using var versionRecord = new InvalidParseVersionTelemetryRecord
233+
{
234+
DetectorId = this.Id,
235+
FilePath = file.Location,
236+
Version = report.Version,
237+
MaxVersion = MaxReportVersion.ToString(),
238+
};
239+
240+
return;
241+
}
242+
243+
this.Logger.LogInformation(
244+
"PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
245+
file.Location,
246+
stopwatch.ElapsedMilliseconds / 1000.0,
247+
report.InstallItems?.Length ?? 0);
248+
249+
// Now that all installed packages are known, we can build a graph of the dependencies.
250+
if (report.InstallItems is not null)
251+
{
252+
var graph = this.BuildGraphFromInstallationReport(report);
253+
this.RecordComponents(singleFileComponentRecorder, graph);
254+
}
190255
}
256+
257+
stopwatch.Stop();
191258
}
192259
catch (Exception e)
193260
{
@@ -211,9 +278,12 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
211278
finally
212279
{
213280
// Clean up the report output JSON file so it isn't left on the machine.
214-
if (reportFile is not null && reportFile.Exists)
281+
foreach (var reportFile in reportFiles)
215282
{
216-
reportFile.Delete();
283+
if (reportFile is not null && reportFile.Exists)
284+
{
285+
reportFile.Delete();
286+
}
217287
}
218288
}
219289
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
namespace Microsoft.ComponentDetection.Orchestrator.Experiments.Configs;
2+
3+
using Microsoft.ComponentDetection.Contracts;
4+
using Microsoft.ComponentDetection.Detectors.Pip;
5+
6+
/// <summary>
7+
/// Validating the <see cref="PipReportComponentDetector"/>.
8+
/// </summary>
9+
public class PipReportExperiment : IExperimentConfiguration
10+
{
11+
public string Name => "PipReport";
12+
13+
public bool IsInControlGroup(IComponentDetector componentDetector) => componentDetector is PipComponentDetector;
14+
15+
public bool IsInExperimentGroup(IComponentDetector componentDetector) => componentDetector is PipReportComponentDetector;
16+
17+
public bool ShouldRecord(IComponentDetector componentDetector, int numComponents) => true;
18+
}

src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
6464
services.AddSingleton<IExperimentConfiguration, RustCliDetectorExperiment>();
6565
services.AddSingleton<IExperimentConfiguration, VcpkgExperiment>();
6666
services.AddSingleton<IExperimentConfiguration, GoDetectorReplaceExperiment>();
67+
services.AddSingleton<IExperimentConfiguration, PipReportExperiment>();
6768

6869
// Detectors
6970
// CocoaPods

src/Microsoft.ComponentDetection.Orchestrator/Services/DetectorProcessingService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ namespace Microsoft.ComponentDetection.Orchestrator.Services;
2323

2424
public class DetectorProcessingService : IDetectorProcessingService
2525
{
26-
private const int DefaultMaxDetectionThreads = 3;
26+
private const int DefaultMaxDetectionThreads = 5;
2727
private const int ExperimentalTimeoutSeconds = 240; // 4 minutes
2828
private const int ProcessTimeoutBufferSeconds = 5;
2929

test/Microsoft.ComponentDetection.Detectors.Tests/Microsoft.ComponentDetection.Detectors.Tests.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
<None Update="Mocks\pip_report_single_pkg_bad_version.json">
4747
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
4848
</None>
49+
<None Update="Mocks\test.component-detection-pip-report.json">
50+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
51+
</None>
4952
</ItemGroup>
5053

5154
</Project>

0 commit comments

Comments
 (0)