From fb9423e93c792422485720923fd71294245f71e7 Mon Sep 17 00:00:00 2001 From: Paul Dorsch <107068277+pauld-msft@users.noreply.github.com> Date: Mon, 8 Jul 2024 12:39:27 -0400 Subject: [PATCH] PipReport option to fallback and parse roots from source (#1190) * add fallback logic to pipreport for cases where we shouldn't reach out to remote feed, and should parse source instead * add the manual detection as fallback if pip report fails * add option to skip or fallback to a source code scan * add docs and fix tests * remove fallback * add fallback back, and env var to allow for skipping fallback --- docs/detectors/pip.md | 6 + .../Contracts/PipDependencySpecification.cs | 33 +++++- .../pip/PipReportComponentDetector.cs | 97 ++++++++++++++- .../pip/PythonVersionUtilities.cs | 35 +++--- .../PipReportComponentDetectorTests.cs | 110 ++++++++++++++---- .../resources/pip/fallback/requirements.txt | 2 + 6 files changed, 235 insertions(+), 48 deletions(-) create mode 100644 test/Microsoft.ComponentDetection.VerificationTests/resources/pip/fallback/requirements.txt diff --git a/docs/detectors/pip.md b/docs/detectors/pip.md index af1585335..ad68a6fc7 100644 --- a/docs/detectors/pip.md +++ b/docs/detectors/pip.md @@ -54,3 +54,9 @@ The default value is 4096. The enviroment variable `PIP_INDEX_URL` is used to determine what package feed should be used for `pip install --report` detection. The default value will use the PyPi index unless pip defaults have been configured globally. + +The environment variable `PipReportOverrideBehavior` is used to override pip report with one of the following detection strategies. +- `Skip`: Will not run pip detection +- `SourceCodeScan`: Scan `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph. + +The environment variable `PipReportSkipFallbackOnFailure` is used to skip the default fallback behavior if pip report fails. Default behavior scans `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph. diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/Contracts/PipDependencySpecification.cs b/src/Microsoft.ComponentDetection.Detectors/pip/Contracts/PipDependencySpecification.cs index eab2f86bc..878b631bc 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/Contracts/PipDependencySpecification.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/Contracts/PipDependencySpecification.cs @@ -160,7 +160,7 @@ public bool PackageConditionsMet(Dictionary pythonEnvironmentVar continue; // If the variable isn't in the environment, we can't evaluate it. } - if (string.Equals(conditionalVar, "python_version", System.StringComparison.OrdinalIgnoreCase)) + if (string.Equals(conditionalVar, "python_version", StringComparison.OrdinalIgnoreCase)) { var pythonVersion = PythonVersion.Create(conditionalValue); if (pythonVersion.Valid) @@ -173,10 +173,10 @@ public bool PackageConditionsMet(Dictionary pythonEnvironmentVar conditionMet = pythonEnvironmentVariables[conditionalVar] == conditionalValue; } } - else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase)) + else if (string.Equals(conditionalVar, "sys_platform", StringComparison.OrdinalIgnoreCase)) { // if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition - conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase); + conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, StringComparison.OrdinalIgnoreCase); } else { @@ -197,6 +197,33 @@ public bool PackageConditionsMet(Dictionary pythonEnvironmentVar return conditionsMet; } + /// + /// Iterates through the package versions that are explicitly stated, and returns + /// the highest version that adheres to the version requirements. + /// + /// + /// DependencySpecifiers: (>=1.2.3, !=1.2.4, <2.0.0) + /// Result: 1.2.3 + /// Explaination: Even through 2.0.0 and 1.2.4 are higher, they do not adhere to the dep specifier requirements. + /// + /// Highest explicitly stated version. + public string GetHighestExplicitPackageVersion() + { + var versions = this.DependencySpecifiers + .Select(x => PythonVersionUtilities.ParseSpec(x).Version.Trim()) + .Where(x => !string.IsNullOrEmpty(x)) + .ToList(); + + var topVersion = versions + .Where(x => PythonVersionUtilities.VersionValidForSpec(x, this.DependencySpecifiers)) + .Select(x => (Version: x, PythonVersion: PythonVersion.Create(x))) + .Where(x => x.PythonVersion.Valid) + .OrderByDescending(x => x.PythonVersion) + .FirstOrDefault(); + + return topVersion.Version; + } + /// /// Common method that can be used to determine whether this package is a valid parent /// package of another package. Note that this logic is not perfect, it does not diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs index 15b5c2133..17bec5f6b 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs @@ -16,7 +16,8 @@ namespace Microsoft.ComponentDetection.Detectors.Pip; public class PipReportComponentDetector : FileComponentDetector, IExperimentalDetector { - private const string DisablePipReportScanEnvVar = "DisablePipReportScan"; + private const string PipReportOverrideBehaviorEnvVar = "PipReportOverrideBehavior"; + private const string PipReportSkipFallbackOnFailureEnvVar = "PipReportSkipFallbackOnFailure"; /// /// The maximum version of the report specification that this detector can handle. @@ -51,6 +52,13 @@ public PipReportComponentDetector( this.Logger = logger; } + private enum PipReportOverrideBehavior + { + None, // do not override pip report + Skip, // skip pip report altogether + SourceCodeScan, // scan source code files, and record components explicitly from the package files without hitting a remote feed + } + public override string Id => "PipReport"; public override IList SearchPatterns => new List { "setup.py", "requirements.txt" }; @@ -105,18 +113,35 @@ protected override async Task> OnPrepareDetectionAsy protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary detectorArgs, CancellationToken cancellationToken = default) { this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PipExePath", out var pipExePath); + this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath); var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder; var file = processRequest.ComponentStream; FileInfo reportFile = null; try { - if (this.IsPipReportManuallyDisabled()) + var pipOverride = this.GetPipReportOverrideBehavior(); + if (pipOverride == PipReportOverrideBehavior.SourceCodeScan) { - this.Logger.LogWarning("PipReport: Found {DisablePipReportScanEnvVar} environment variable equal to true. Skipping pip report.", DisablePipReportScanEnvVar); + this.Logger.LogInformation( + "PipReport: Found {PipReportOverrideBehaviorEnvVar} environment variable set to {Override}. Manually compiling" + + " dependency list for '{File}' without reaching out to a remote feed.", + PipReportOverrideBehaviorEnvVar, + PipReportOverrideBehavior.SourceCodeScan.ToString(), + file.Location); + + await this.RegisterExplicitComponentsInFileAsync(singleFileComponentRecorder, file.Location, pythonExePath); + return; + } + else if (pipOverride == PipReportOverrideBehavior.Skip) + { + var skipReason = $"PipReport: Found {PipReportOverrideBehaviorEnvVar} environment variable set " + + $"to {PipReportOverrideBehavior.Skip}. Skipping pip detection for '{file.Location}'."; + + this.Logger.LogInformation("{Message}", skipReason); using var skipReportRecord = new PipReportSkipTelemetryRecord { - SkipReason = $"PipReport: Found {DisablePipReportScanEnvVar} environment variable equal to true. Skipping pip report.", + SkipReason = skipReason, DetectorId = this.Id, DetectorVersion = this.Version, }; @@ -175,6 +200,13 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID ExceptionMessage = e.Message, StackTrace = e.StackTrace, }; + + // if pipreport fails, try to at least list the dependencies that are found in the source files + if (this.GetPipReportOverrideBehavior() != PipReportOverrideBehavior.SourceCodeScan && !this.PipReportSkipFallbackOnFailure()) + { + this.Logger.LogInformation("PipReport: Trying to Manually compile dependency list for '{File}' without reaching out to a remote feed.", file.Location); + await this.RegisterExplicitComponentsInFileAsync(singleFileComponentRecorder, file.Location, pythonExePath); + } } finally { @@ -308,6 +340,59 @@ private void RecordComponents( } } - private bool IsPipReportManuallyDisabled() - => this.envVarService.IsEnvironmentVariableValueTrue(DisablePipReportScanEnvVar); + private async Task RegisterExplicitComponentsInFileAsync( + ISingleFileComponentRecorder recorder, + string filePath, + string pythonPath = null) + { + var initialPackages = await this.pythonCommandService.ParseFileAsync(filePath, pythonPath); + if (initialPackages == null) + { + return; + } + + var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null) + .Select(tuple => tuple.PackageString) + .Where(x => !string.IsNullOrWhiteSpace(x)) + .Select(x => new PipDependencySpecification(x)) + .Where(x => !x.PackageIsUnsafe()) + .Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables())) + .ToList(); + + listedPackage.Select(x => (x.Name, Version: x.GetHighestExplicitPackageVersion())) + .Where(x => !string.IsNullOrEmpty(x.Version)) + .Select(x => new PipComponent(x.Name, x.Version)) + .Select(x => new DetectedComponent(x)) + .ToList() + .ForEach(pipComponent => recorder.RegisterUsage(pipComponent, isExplicitReferencedDependency: true)); + + initialPackages.Where(tuple => tuple.Component != null) + .Select(tuple => new DetectedComponent(tuple.Component)) + .ToList() + .ForEach(gitComponent => recorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true)); + } + + private PipReportOverrideBehavior GetPipReportOverrideBehavior() + { + if (!this.envVarService.DoesEnvironmentVariableExist(PipReportOverrideBehaviorEnvVar)) + { + return PipReportOverrideBehavior.None; + } + + if (string.Equals(this.envVarService.GetEnvironmentVariable(PipReportOverrideBehaviorEnvVar), PipReportOverrideBehavior.SourceCodeScan.ToString(), StringComparison.OrdinalIgnoreCase)) + { + return PipReportOverrideBehavior.SourceCodeScan; + } + else if (string.Equals(this.envVarService.GetEnvironmentVariable(PipReportOverrideBehaviorEnvVar), PipReportOverrideBehavior.Skip.ToString(), StringComparison.OrdinalIgnoreCase)) + { + return PipReportOverrideBehavior.Skip; + } + + return PipReportOverrideBehavior.None; + } + + private bool PipReportSkipFallbackOnFailure() + { + return this.envVarService.IsEnvironmentVariableValueTrue(PipReportSkipFallbackOnFailureEnvVar); + } } diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/PythonVersionUtilities.cs b/src/Microsoft.ComponentDetection.Detectors/pip/PythonVersionUtilities.cs index 46d9533c8..020b1225f 100644 --- a/src/Microsoft.ComponentDetection.Detectors/pip/PythonVersionUtilities.cs +++ b/src/Microsoft.ComponentDetection.Detectors/pip/PythonVersionUtilities.cs @@ -85,20 +85,10 @@ public static bool CheckEquality(string version, string specVer, bool fuzzy = fa private static bool VersionValidForSpec(string version, string spec) { - var opChars = new char[] { '=', '<', '>', '~', '!' }; - var specArray = spec.ToCharArray(); - - var i = 0; - while (i < spec.Length && i < 3 && opChars.Contains(specArray[i])) - { - i++; - } - - var op = spec[..i]; - var specVerSection = spec[i..].Trim(); + (var op, var specVersion) = ParseSpec(spec); var targetVer = PythonVersion.Create(version); - var specVer = PythonVersion.Create(specVerSection); + var specVer = PythonVersion.Create(specVersion); if (!targetVer.Valid) { @@ -107,7 +97,7 @@ private static bool VersionValidForSpec(string version, string spec) if (!specVer.Valid) { - throw new ArgumentException($"The version specification {specVerSection} is not a valid python version"); + throw new ArgumentException($"The version specification {specVersion} is not a valid python version"); } return op switch @@ -119,8 +109,25 @@ private static bool VersionValidForSpec(string version, string spec) "<=" => specVer >= targetVer, ">=" => targetVer >= specVer, "!=" => targetVer.CompareTo(specVer) != 0, - "~=" => CheckEquality(version, spec[i..], true), + "~=" => CheckEquality(version, specVersion, true), _ => false, }; } + + public static (string Operator, string Version) ParseSpec(string spec) + { + var opChars = new char[] { '=', '<', '>', '~', '!' }; + var specArray = spec.ToCharArray(); + + var i = 0; + while (i < spec.Length && i < 3 && opChars.Contains(specArray[i])) + { + i++; + } + + var op = spec[..i]; + var specVerSection = spec[i..].Trim(); + + return (op, specVerSection); + } } diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PipReportComponentDetectorTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PipReportComponentDetectorTests.cs index 32552d5b4..303e53a37 100644 --- a/test/Microsoft.ComponentDetection.Detectors.Tests/PipReportComponentDetectorTests.cs +++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PipReportComponentDetectorTests.cs @@ -2,6 +2,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests; using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; @@ -249,31 +250,6 @@ public async Task TestPipReportDetector_SimpleExtrasAsync() requestsComponent.Version.Should().Be("2.32.3"); } - [TestMethod] - public async Task TestPipReportDetector_SkipAsync() - { - this.mockEnvVarService.Setup(x => x.IsEnvironmentVariableValueTrue("DisablePipReportScan")).Returns(true); - - this.pipCommandService.Setup(x => x.GenerateInstallationReportAsync(It.IsAny(), It.IsAny(), It.IsAny())) - .ReturnsAsync((this.simpleExtrasReport, null)); - - var (result, componentRecorder) = await this.DetectorTestUtility - .WithFile("requirements.txt", string.Empty) - .ExecuteDetectorAsync(); - - result.ResultCode.Should().Be(ProcessingResultCode.Success); - - this.mockLogger.Verify(x => x.Log( - LogLevel.Warning, - It.IsAny(), - It.Is((o, t) => o.ToString().StartsWith("PipReport: Found DisablePipReportScan environment variable equal to true")), - It.IsAny(), - (Func)It.IsAny())); - - var detectedComponents = componentRecorder.GetDetectedComponents(); - detectedComponents.Should().BeEmpty(); - } - [TestMethod] public async Task TestPipReportDetector_MultiComponentAsync() { @@ -470,4 +446,88 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde "tinycss2 1.3.0 - pip", new[] { "jupyterlab 4.2.0 - pip" }); } + + [TestMethod] + public async Task TestPipReportDetector_OverrideSourceCodeScanAsync() + { + this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny())).ReturnsAsync(true); + this.mockEnvVarService.Setup(x => x.DoesEnvironmentVariableExist("PipReportOverrideBehavior")).Returns(true); + this.mockEnvVarService.Setup(x => x.GetEnvironmentVariable("PipReportOverrideBehavior")).Returns("sourcecodescan"); + + var baseSetupPyDependencies = this.ToGitTuple(new List { "a==1.0", "b>=2.0,!=2.1,<3.0.0", "c!=1.1" }); + var baseRequirementsTextDependencies = this.ToGitTuple(new List { "d~=1.0", "e<=2.0", "f===1.1", "g<3.0", "h>=1.0,<=3.0,!=2.0,!=4.0" }); + baseRequirementsTextDependencies.Add((null, new GitComponent(new Uri("https://github.com/example/example"), "deadbee"))); + + this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "setup.py"), null)).ReturnsAsync(baseSetupPyDependencies); + this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "requirements.txt"), null)).ReturnsAsync(baseRequirementsTextDependencies); + + var (result, componentRecorder) = await this.DetectorTestUtility + .WithFile("setup.py", string.Empty) + .WithFile("requirements.txt", string.Empty) + .ExecuteDetectorAsync(); + + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().HaveCount(7); + + var pipComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Id.Contains("pip")).ToList(); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "a").Component).Version.Should().Be("1.0"); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "b").Component).Version.Should().Be("2.0"); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "d").Component).Version.Should().Be("1.0"); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "e").Component).Version.Should().Be("2.0"); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "f").Component).Version.Should().Be("1.1"); + ((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "h").Component).Version.Should().Be("3.0"); + + var gitComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Type == ComponentType.Git); + gitComponents.Should().ContainSingle(); + var gitComponent = (GitComponent)gitComponents.Single().Component; + + gitComponent.RepositoryUrl.Should().Be("https://github.com/example/example"); + gitComponent.CommitHash.Should().Be("deadbee"); + + this.mockLogger.Verify(x => x.Log( + LogLevel.Information, + It.IsAny(), + It.Is((o, t) => o.ToString().StartsWith("PipReport: Found PipReportOverrideBehavior environment variable set to SourceCodeScan.")), + It.IsAny(), + (Func)It.IsAny())); + } + + [TestMethod] + public async Task TestPipReportDetector_OverrideSkipAsync() + { + this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny())).ReturnsAsync(true); + this.mockEnvVarService.Setup(x => x.DoesEnvironmentVariableExist("PipReportOverrideBehavior")).Returns(true); + this.mockEnvVarService.Setup(x => x.GetEnvironmentVariable("PipReportOverrideBehavior")).Returns("skip"); + + var baseSetupPyDependencies = this.ToGitTuple(new List { "a==1.0", "b>=2.0,!=2.1,<3.0.0", "c!=1.1" }); + var baseRequirementsTextDependencies = this.ToGitTuple(new List { "d~=1.0", "e<=2.0", "f===1.1", "g<3.0", "h>=1.0,<=3.0,!=2.0,!=4.0" }); + baseRequirementsTextDependencies.Add((null, new GitComponent(new Uri("https://github.com/example/example"), "deadbee"))); + + this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "setup.py"), null)).ReturnsAsync(baseSetupPyDependencies); + this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "requirements.txt"), null)).ReturnsAsync(baseRequirementsTextDependencies); + + var (result, componentRecorder) = await this.DetectorTestUtility + .WithFile("setup.py", string.Empty) + .WithFile("requirements.txt", string.Empty) + .ExecuteDetectorAsync(); + + result.ResultCode.Should().Be(ProcessingResultCode.Success); + + var detectedComponents = componentRecorder.GetDetectedComponents(); + detectedComponents.Should().BeEmpty(); + + this.mockLogger.Verify(x => x.Log( + LogLevel.Information, + It.IsAny(), + It.Is((o, t) => o.ToString().StartsWith("PipReport: Found PipReportOverrideBehavior environment variable set to Skip.")), + It.IsAny(), + (Func)It.IsAny())); + } + + private List<(string PackageString, GitComponent Component)> ToGitTuple(IList components) + { + return components.Select(dep => (dep, null)).ToList(); + } } diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/fallback/requirements.txt b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/fallback/requirements.txt new file mode 100644 index 000000000..8e7116747 --- /dev/null +++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/pip/fallback/requirements.txt @@ -0,0 +1,2 @@ +fakepythonpackage==1.2.3 +zipp>=3.2.0,<4.0