-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce a dependency extractor abstraction (#5092)
* Dependency extractor abstraction This commit introduces a new dependency extractor absdtracion in a new deps package. This allows us to easily swap the implementation minder uses to extract dependencies. Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]> * Move scalibr to extractor implementation This commit rebuilds the osv-scalibr scanner as the first dependency extractor. It also moves the scanfs test to the new package. Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]> * deps ingest: use extractor We now modify the dependency ingester to use the new deps.Extractor abstraction. It removes all the scalibr logic from the ingester but defaults to the new scalibr implementation introduced in the previous commit. Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]> --------- Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]>
- Loading branch information
Showing
5 changed files
with
237 additions
and
155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Package deps abstracts a dependency extractor | ||
package deps | ||
|
||
import ( | ||
"context" | ||
"io/fs" | ||
|
||
"github.com/protobom/protobom/pkg/sbom" | ||
|
||
"github.com/mindersec/minder/internal/deps/scalibr" | ||
) | ||
|
||
var _ Extractor = (*scalibr.Extractor)(nil) | ||
|
||
// Extractor is the object that groups the dependency extractor. It shields the | ||
// implementations that Minder uses behinf a common interface to extract depencies | ||
// from filesystems. | ||
type Extractor interface { | ||
ScanFilesystem(context.Context, fs.FS) (*sbom.NodeList, error) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
// Package scalibr implements a dependency extractor using the osv-scalibr | ||
// library. | ||
package scalibr | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"io/fs" | ||
|
||
scalibr "github.com/google/osv-scalibr" | ||
"github.com/google/osv-scalibr/extractor/filesystem/list" | ||
scalibr_fs "github.com/google/osv-scalibr/fs" | ||
scalibr_plugin "github.com/google/osv-scalibr/plugin" | ||
"github.com/google/uuid" | ||
"github.com/protobom/protobom/pkg/sbom" | ||
) | ||
|
||
// Extractor is a dependency extractor based on osv-scalibr. | ||
type Extractor struct { | ||
} | ||
|
||
// NewExtractor creates a new scalibr dependency extractor | ||
func NewExtractor() *Extractor { | ||
return &Extractor{} | ||
} | ||
|
||
// ScanFilesystem takes | ||
func (*Extractor) ScanFilesystem(ctx context.Context, iofs fs.FS) (*sbom.NodeList, error) { | ||
return scanFilesystem(ctx, iofs) | ||
} | ||
|
||
func scanFilesystem(ctx context.Context, iofs fs.FS) (*sbom.NodeList, error) { | ||
if iofs == nil { | ||
return nil, errors.New("unable to scan dependencies, no active defined") | ||
} | ||
// have to down-cast here, because scalibr needs multiple io/fs types | ||
wrapped, ok := iofs.(scalibr_fs.FS) | ||
if !ok { | ||
return nil, fmt.Errorf("error converting filesystem to ReadDirFS") | ||
} | ||
|
||
desiredCaps := scalibr_plugin.Capabilities{ | ||
OS: scalibr_plugin.OSLinux, | ||
Network: true, | ||
DirectFS: false, | ||
RunningSystem: false, | ||
} | ||
|
||
scalibrFs := scalibr_fs.ScanRoot{FS: wrapped} | ||
scanConfig := scalibr.ScanConfig{ | ||
ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs}, | ||
// All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set. | ||
FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps), | ||
Capabilities: &desiredCaps, | ||
} | ||
|
||
scanner := scalibr.New() | ||
scanResults := scanner.Scan(ctx, &scanConfig) | ||
|
||
if scanResults == nil || scanResults.Status == nil { | ||
return nil, fmt.Errorf("error scanning files: no results") | ||
} | ||
if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded { | ||
return nil, fmt.Errorf("error scanning files: %s", scanResults.Status) | ||
} | ||
|
||
res := sbom.NewNodeList() | ||
for _, inv := range scanResults.Inventories { | ||
node := &sbom.Node{ | ||
Type: sbom.Node_PACKAGE, | ||
Id: uuid.New().String(), | ||
Name: inv.Name, | ||
Version: inv.Version, | ||
Identifiers: map[int32]string{ | ||
int32(sbom.SoftwareIdentifierType_PURL): inv.Extractor.ToPURL(inv).String(), | ||
// TODO: scalibr returns a _list_ of CPEs, but protobom will store one. | ||
// use the first? | ||
// int32(sbom.SoftwareIdentifierType_CPE23): inv.Extractor.ToCPEs(inv), | ||
}, | ||
} | ||
for _, l := range inv.Locations { | ||
node.Properties = append(node.Properties, &sbom.Property{ | ||
Name: "sourceFile", | ||
Data: l, | ||
}) | ||
} | ||
res.AddNode(node) | ||
} | ||
|
||
return res, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package scalibr | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"io/fs" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/go-git/go-billy/v5/helper/iofs" | ||
"github.com/go-git/go-billy/v5/memfs" | ||
"github.com/protobom/protobom/pkg/sbom" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestScanFilesystem(t *testing.T) { | ||
t.Parallel() | ||
for _, tc := range []struct { | ||
name string | ||
makeFs func() fs.FS | ||
mustErr bool | ||
expect *sbom.NodeList | ||
expectedLen int | ||
}{ | ||
{ | ||
name: "python-reqs-txt", | ||
makeFs: func() fs.FS { | ||
t.Helper() | ||
memFS := memfs.New() | ||
f, err := memFS.Create("requirements.txt") | ||
require.NoError(t, err) | ||
_, err = f.Write([]byte("Flask>=1\nrequestts>=1\n")) | ||
require.NoError(t, err) | ||
require.NoError(t, f.Close()) | ||
return iofs.New(memFS) | ||
}, | ||
expectedLen: 2, | ||
expect: &sbom.NodeList{ | ||
Nodes: []*sbom.Node{ | ||
{ | ||
Id: "0000000000", | ||
Type: sbom.Node_PACKAGE, | ||
Name: "Flask", | ||
Version: "1", | ||
Identifiers: map[int32]string{ | ||
1: "pkg:pypi/flask@1", | ||
}, | ||
Properties: []*sbom.Property{ | ||
{ | ||
Name: "sourceFile", | ||
Data: "requirements.txt", | ||
}, | ||
}, | ||
}, | ||
{ | ||
Id: "1111111111", | ||
Type: sbom.Node_PACKAGE, | ||
Name: "requestts", | ||
Version: "1", | ||
Identifiers: map[int32]string{ | ||
1: "pkg:pypi/requestts@1", | ||
}, | ||
Properties: []*sbom.Property{ | ||
{ | ||
Name: "sourceFile", | ||
Data: "requirements.txt", | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
name: "bad-fs", | ||
makeFs: func() fs.FS { | ||
return nil | ||
}, | ||
mustErr: true, | ||
}, | ||
} { | ||
t.Run(tc.name, func(t *testing.T) { | ||
t.Parallel() | ||
fs := tc.makeFs() | ||
nodelist, err := scanFilesystem(context.Background(), fs) | ||
if tc.mustErr { | ||
require.Error(t, err) | ||
return | ||
} | ||
require.NoError(t, err) | ||
require.Len(t, nodelist.Nodes, tc.expectedLen) | ||
|
||
// Compare the nodes, make sure they are equal | ||
for i := range nodelist.Nodes { | ||
nodelist.Nodes[i].Id = strings.Repeat(fmt.Sprintf("%d", i), 10) | ||
require.Equal(t, tc.expect.Nodes[i].Checksum(), nodelist.Nodes[i].Checksum()) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.