Skip to content

Commit

Permalink
PipReport option to fallback and parse roots from source (#1190)
Browse files Browse the repository at this point in the history
* add fallback logic to pipreport for cases where we shouldn't reach out to remote feed, and should parse source instead

* add the manual detection as fallback if pip report fails

* add option to skip or fallback to a source code scan

* add docs and fix tests

* remove fallback

* add fallback back, and env var to allow for skipping fallback
  • Loading branch information
pauld-msft authored Jul 8, 2024
1 parent 52daf67 commit fb9423e
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 48 deletions.
6 changes: 6 additions & 0 deletions docs/detectors/pip.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,9 @@ The default value is 4096.

The enviroment variable `PIP_INDEX_URL` is used to determine what package feed should be used for `pip install --report` detection.
The default value will use the PyPi index unless pip defaults have been configured globally.

The environment variable `PipReportOverrideBehavior` is used to override pip report with one of the following detection strategies.
- `Skip`: Will not run pip detection
- `SourceCodeScan`: Scan `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph.

The environment variable `PipReportSkipFallbackOnFailure` is used to skip the default fallback behavior if pip report fails. Default behavior scans `setup.py` and `requirements.txt` files, and record components explicitly from the package files without hitting a remote feed. Does not compile a dependency graph.
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
continue; // If the variable isn't in the environment, we can't evaluate it.
}

if (string.Equals(conditionalVar, "python_version", System.StringComparison.OrdinalIgnoreCase))
if (string.Equals(conditionalVar, "python_version", StringComparison.OrdinalIgnoreCase))
{
var pythonVersion = PythonVersion.Create(conditionalValue);
if (pythonVersion.Valid)
Expand All @@ -173,10 +173,10 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
conditionMet = pythonEnvironmentVariables[conditionalVar] == conditionalValue;
}
}
else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase))
else if (string.Equals(conditionalVar, "sys_platform", StringComparison.OrdinalIgnoreCase))
{
// if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, StringComparison.OrdinalIgnoreCase);
}
else
{
Expand All @@ -197,6 +197,33 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
return conditionsMet;
}

/// <summary>
/// Iterates through the package versions that are explicitly stated, and returns
/// the highest version that adheres to the version requirements.
/// </summary>
/// <example>
/// DependencySpecifiers: (&gt;=1.2.3, !=1.2.4, &lt;2.0.0)
/// Result: 1.2.3
/// Explaination: Even through 2.0.0 and 1.2.4 are higher, they do not adhere to the dep specifier requirements.
/// </example>
/// <returns>Highest explicitly stated version.</returns>
public string GetHighestExplicitPackageVersion()
{
var versions = this.DependencySpecifiers
.Select(x => PythonVersionUtilities.ParseSpec(x).Version.Trim())
.Where(x => !string.IsNullOrEmpty(x))
.ToList();

var topVersion = versions
.Where(x => PythonVersionUtilities.VersionValidForSpec(x, this.DependencySpecifiers))
.Select(x => (Version: x, PythonVersion: PythonVersion.Create(x)))
.Where(x => x.PythonVersion.Valid)
.OrderByDescending(x => x.PythonVersion)
.FirstOrDefault();

return topVersion.Version;
}

/// <summary>
/// Common method that can be used to determine whether this package is a valid parent
/// package of another package. Note that this logic is not perfect, it does not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;

public class PipReportComponentDetector : FileComponentDetector, IExperimentalDetector
{
private const string DisablePipReportScanEnvVar = "DisablePipReportScan";
private const string PipReportOverrideBehaviorEnvVar = "PipReportOverrideBehavior";
private const string PipReportSkipFallbackOnFailureEnvVar = "PipReportSkipFallbackOnFailure";

/// <summary>
/// The maximum version of the report specification that this detector can handle.
Expand Down Expand Up @@ -51,6 +52,13 @@ public PipReportComponentDetector(
this.Logger = logger;
}

private enum PipReportOverrideBehavior
{
None, // do not override pip report
Skip, // skip pip report altogether
SourceCodeScan, // scan source code files, and record components explicitly from the package files without hitting a remote feed
}

public override string Id => "PipReport";

public override IList<string> SearchPatterns => new List<string> { "setup.py", "requirements.txt" };
Expand Down Expand Up @@ -105,18 +113,35 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
protected override async Task OnFileFoundAsync(ProcessRequest processRequest, IDictionary<string, string> detectorArgs, CancellationToken cancellationToken = default)
{
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PipExePath", out var pipExePath);
this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
var singleFileComponentRecorder = processRequest.SingleFileComponentRecorder;
var file = processRequest.ComponentStream;

FileInfo reportFile = null;
try
{
if (this.IsPipReportManuallyDisabled())
var pipOverride = this.GetPipReportOverrideBehavior();
if (pipOverride == PipReportOverrideBehavior.SourceCodeScan)
{
this.Logger.LogWarning("PipReport: Found {DisablePipReportScanEnvVar} environment variable equal to true. Skipping pip report.", DisablePipReportScanEnvVar);
this.Logger.LogInformation(
"PipReport: Found {PipReportOverrideBehaviorEnvVar} environment variable set to {Override}. Manually compiling" +
" dependency list for '{File}' without reaching out to a remote feed.",
PipReportOverrideBehaviorEnvVar,
PipReportOverrideBehavior.SourceCodeScan.ToString(),
file.Location);

await this.RegisterExplicitComponentsInFileAsync(singleFileComponentRecorder, file.Location, pythonExePath);
return;
}
else if (pipOverride == PipReportOverrideBehavior.Skip)
{
var skipReason = $"PipReport: Found {PipReportOverrideBehaviorEnvVar} environment variable set " +
$"to {PipReportOverrideBehavior.Skip}. Skipping pip detection for '{file.Location}'.";

this.Logger.LogInformation("{Message}", skipReason);
using var skipReportRecord = new PipReportSkipTelemetryRecord
{
SkipReason = $"PipReport: Found {DisablePipReportScanEnvVar} environment variable equal to true. Skipping pip report.",
SkipReason = skipReason,
DetectorId = this.Id,
DetectorVersion = this.Version,
};
Expand Down Expand Up @@ -175,6 +200,13 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
ExceptionMessage = e.Message,
StackTrace = e.StackTrace,
};

// if pipreport fails, try to at least list the dependencies that are found in the source files
if (this.GetPipReportOverrideBehavior() != PipReportOverrideBehavior.SourceCodeScan && !this.PipReportSkipFallbackOnFailure())
{
this.Logger.LogInformation("PipReport: Trying to Manually compile dependency list for '{File}' without reaching out to a remote feed.", file.Location);
await this.RegisterExplicitComponentsInFileAsync(singleFileComponentRecorder, file.Location, pythonExePath);
}
}
finally
{
Expand Down Expand Up @@ -308,6 +340,59 @@ private void RecordComponents(
}
}

private bool IsPipReportManuallyDisabled()
=> this.envVarService.IsEnvironmentVariableValueTrue(DisablePipReportScanEnvVar);
private async Task RegisterExplicitComponentsInFileAsync(
ISingleFileComponentRecorder recorder,
string filePath,
string pythonPath = null)
{
var initialPackages = await this.pythonCommandService.ParseFileAsync(filePath, pythonPath);
if (initialPackages == null)
{
return;
}

var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
.Select(tuple => tuple.PackageString)
.Where(x => !string.IsNullOrWhiteSpace(x))
.Select(x => new PipDependencySpecification(x))
.Where(x => !x.PackageIsUnsafe())
.Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables()))
.ToList();

listedPackage.Select(x => (x.Name, Version: x.GetHighestExplicitPackageVersion()))
.Where(x => !string.IsNullOrEmpty(x.Version))
.Select(x => new PipComponent(x.Name, x.Version))
.Select(x => new DetectedComponent(x))
.ToList()
.ForEach(pipComponent => recorder.RegisterUsage(pipComponent, isExplicitReferencedDependency: true));

initialPackages.Where(tuple => tuple.Component != null)
.Select(tuple => new DetectedComponent(tuple.Component))
.ToList()
.ForEach(gitComponent => recorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true));
}

private PipReportOverrideBehavior GetPipReportOverrideBehavior()
{
if (!this.envVarService.DoesEnvironmentVariableExist(PipReportOverrideBehaviorEnvVar))
{
return PipReportOverrideBehavior.None;
}

if (string.Equals(this.envVarService.GetEnvironmentVariable(PipReportOverrideBehaviorEnvVar), PipReportOverrideBehavior.SourceCodeScan.ToString(), StringComparison.OrdinalIgnoreCase))
{
return PipReportOverrideBehavior.SourceCodeScan;
}
else if (string.Equals(this.envVarService.GetEnvironmentVariable(PipReportOverrideBehaviorEnvVar), PipReportOverrideBehavior.Skip.ToString(), StringComparison.OrdinalIgnoreCase))
{
return PipReportOverrideBehavior.Skip;
}

return PipReportOverrideBehavior.None;
}

private bool PipReportSkipFallbackOnFailure()
{
return this.envVarService.IsEnvironmentVariableValueTrue(PipReportSkipFallbackOnFailureEnvVar);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,20 +85,10 @@ public static bool CheckEquality(string version, string specVer, bool fuzzy = fa

private static bool VersionValidForSpec(string version, string spec)
{
var opChars = new char[] { '=', '<', '>', '~', '!' };
var specArray = spec.ToCharArray();

var i = 0;
while (i < spec.Length && i < 3 && opChars.Contains(specArray[i]))
{
i++;
}

var op = spec[..i];
var specVerSection = spec[i..].Trim();
(var op, var specVersion) = ParseSpec(spec);

var targetVer = PythonVersion.Create(version);
var specVer = PythonVersion.Create(specVerSection);
var specVer = PythonVersion.Create(specVersion);

if (!targetVer.Valid)
{
Expand All @@ -107,7 +97,7 @@ private static bool VersionValidForSpec(string version, string spec)

if (!specVer.Valid)
{
throw new ArgumentException($"The version specification {specVerSection} is not a valid python version");
throw new ArgumentException($"The version specification {specVersion} is not a valid python version");
}

return op switch
Expand All @@ -119,8 +109,25 @@ private static bool VersionValidForSpec(string version, string spec)
"<=" => specVer >= targetVer,
">=" => targetVer >= specVer,
"!=" => targetVer.CompareTo(specVer) != 0,
"~=" => CheckEquality(version, spec[i..], true),
"~=" => CheckEquality(version, specVersion, true),
_ => false,
};
}

public static (string Operator, string Version) ParseSpec(string spec)
{
var opChars = new char[] { '=', '<', '>', '~', '!' };
var specArray = spec.ToCharArray();

var i = 0;
while (i < spec.Length && i < 3 && opChars.Contains(specArray[i]))
{
i++;
}

var op = spec[..i];
var specVerSection = spec[i..].Trim();

return (op, specVerSection);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ namespace Microsoft.ComponentDetection.Detectors.Tests;

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -249,31 +250,6 @@ public async Task TestPipReportDetector_SimpleExtrasAsync()
requestsComponent.Version.Should().Be("2.32.3");
}

[TestMethod]
public async Task TestPipReportDetector_SkipAsync()
{
this.mockEnvVarService.Setup(x => x.IsEnvironmentVariableValueTrue("DisablePipReportScan")).Returns(true);

this.pipCommandService.Setup(x => x.GenerateInstallationReportAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<CancellationToken>()))
.ReturnsAsync((this.simpleExtrasReport, null));

var (result, componentRecorder) = await this.DetectorTestUtility
.WithFile("requirements.txt", string.Empty)
.ExecuteDetectorAsync();

result.ResultCode.Should().Be(ProcessingResultCode.Success);

this.mockLogger.Verify(x => x.Log(
LogLevel.Warning,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => o.ToString().StartsWith("PipReport: Found DisablePipReportScan environment variable equal to true")),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));

var detectedComponents = componentRecorder.GetDetectedComponents();
detectedComponents.Should().BeEmpty();
}

[TestMethod]
public async Task TestPipReportDetector_MultiComponentAsync()
{
Expand Down Expand Up @@ -470,4 +446,88 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde
"tinycss2 1.3.0 - pip",
new[] { "jupyterlab 4.2.0 - pip" });
}

[TestMethod]
public async Task TestPipReportDetector_OverrideSourceCodeScanAsync()
{
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
this.mockEnvVarService.Setup(x => x.DoesEnvironmentVariableExist("PipReportOverrideBehavior")).Returns(true);
this.mockEnvVarService.Setup(x => x.GetEnvironmentVariable("PipReportOverrideBehavior")).Returns("sourcecodescan");

var baseSetupPyDependencies = this.ToGitTuple(new List<string> { "a==1.0", "b>=2.0,!=2.1,<3.0.0", "c!=1.1" });
var baseRequirementsTextDependencies = this.ToGitTuple(new List<string> { "d~=1.0", "e<=2.0", "f===1.1", "g<3.0", "h>=1.0,<=3.0,!=2.0,!=4.0" });
baseRequirementsTextDependencies.Add((null, new GitComponent(new Uri("https://github.com/example/example"), "deadbee")));

this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "setup.py"), null)).ReturnsAsync(baseSetupPyDependencies);
this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "requirements.txt"), null)).ReturnsAsync(baseRequirementsTextDependencies);

var (result, componentRecorder) = await this.DetectorTestUtility
.WithFile("setup.py", string.Empty)
.WithFile("requirements.txt", string.Empty)
.ExecuteDetectorAsync();

result.ResultCode.Should().Be(ProcessingResultCode.Success);

var detectedComponents = componentRecorder.GetDetectedComponents();
detectedComponents.Should().HaveCount(7);

var pipComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Id.Contains("pip")).ToList();
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "a").Component).Version.Should().Be("1.0");
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "b").Component).Version.Should().Be("2.0");
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "d").Component).Version.Should().Be("1.0");
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "e").Component).Version.Should().Be("2.0");
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "f").Component).Version.Should().Be("1.1");
((PipComponent)pipComponents.Single(x => ((PipComponent)x.Component).Name == "h").Component).Version.Should().Be("3.0");

var gitComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Type == ComponentType.Git);
gitComponents.Should().ContainSingle();
var gitComponent = (GitComponent)gitComponents.Single().Component;

gitComponent.RepositoryUrl.Should().Be("https://github.com/example/example");
gitComponent.CommitHash.Should().Be("deadbee");

this.mockLogger.Verify(x => x.Log(
LogLevel.Information,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => o.ToString().StartsWith("PipReport: Found PipReportOverrideBehavior environment variable set to SourceCodeScan.")),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
}

[TestMethod]
public async Task TestPipReportDetector_OverrideSkipAsync()
{
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
this.mockEnvVarService.Setup(x => x.DoesEnvironmentVariableExist("PipReportOverrideBehavior")).Returns(true);
this.mockEnvVarService.Setup(x => x.GetEnvironmentVariable("PipReportOverrideBehavior")).Returns("skip");

var baseSetupPyDependencies = this.ToGitTuple(new List<string> { "a==1.0", "b>=2.0,!=2.1,<3.0.0", "c!=1.1" });
var baseRequirementsTextDependencies = this.ToGitTuple(new List<string> { "d~=1.0", "e<=2.0", "f===1.1", "g<3.0", "h>=1.0,<=3.0,!=2.0,!=4.0" });
baseRequirementsTextDependencies.Add((null, new GitComponent(new Uri("https://github.com/example/example"), "deadbee")));

this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "setup.py"), null)).ReturnsAsync(baseSetupPyDependencies);
this.pythonCommandService.Setup(x => x.ParseFileAsync(Path.Join(Path.GetTempPath(), "requirements.txt"), null)).ReturnsAsync(baseRequirementsTextDependencies);

var (result, componentRecorder) = await this.DetectorTestUtility
.WithFile("setup.py", string.Empty)
.WithFile("requirements.txt", string.Empty)
.ExecuteDetectorAsync();

result.ResultCode.Should().Be(ProcessingResultCode.Success);

var detectedComponents = componentRecorder.GetDetectedComponents();
detectedComponents.Should().BeEmpty();

this.mockLogger.Verify(x => x.Log(
LogLevel.Information,
It.IsAny<EventId>(),
It.Is<It.IsAnyType>((o, t) => o.ToString().StartsWith("PipReport: Found PipReportOverrideBehavior environment variable set to Skip.")),
It.IsAny<Exception>(),
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
}

private List<(string PackageString, GitComponent Component)> ToGitTuple(IList<string> components)
{
return components.Select<string, (string, GitComponent)>(dep => (dep, null)).ToList();
}
}
Loading

0 comments on commit fb9423e

Please sign in to comment.