Skip to content

Commit

Permalink
Introduce a dependency extractor abstraction (#5092)
Browse files Browse the repository at this point in the history
* Dependency extractor abstraction

This commit introduces a new dependency extractor absdtracion in
a new deps package. This allows us to easily swap the implementation
minder uses to extract dependencies.

Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]>

* Move scalibr to extractor implementation

This commit rebuilds the osv-scalibr scanner as the first dependency extractor.
It also moves the scanfs test to the new package.

Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]>

* deps ingest: use extractor

We now modify the dependency ingester to use the new deps.Extractor abstraction.
It removes all the scalibr logic from the ingester but defaults to the new scalibr
implementation introduced in the previous commit.

Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]>

---------

Signed-off-by: Adolfo García Veytia (Puerco) <[email protected]>
  • Loading branch information
puerco authored Nov 29, 2024
1 parent 2853910 commit b84d504
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 155 deletions.
23 changes: 23 additions & 0 deletions internal/deps/extractor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package deps abstracts a dependency extractor
package deps

import (
"context"
"io/fs"

"github.com/protobom/protobom/pkg/sbom"

"github.com/mindersec/minder/internal/deps/scalibr"
)

var _ Extractor = (*scalibr.Extractor)(nil)

// Extractor is the object that groups the dependency extractor. It shields the
// implementations that Minder uses behinf a common interface to extract depencies
// from filesystems.
type Extractor interface {
ScanFilesystem(context.Context, fs.FS) (*sbom.NodeList, error)
}
95 changes: 95 additions & 0 deletions internal/deps/scalibr/scalibr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package scalibr implements a dependency extractor using the osv-scalibr
// library.
package scalibr

import (
"context"
"errors"
"fmt"
"io/fs"

scalibr "github.com/google/osv-scalibr"
"github.com/google/osv-scalibr/extractor/filesystem/list"
scalibr_fs "github.com/google/osv-scalibr/fs"
scalibr_plugin "github.com/google/osv-scalibr/plugin"
"github.com/google/uuid"
"github.com/protobom/protobom/pkg/sbom"
)

// Extractor is a dependency extractor based on osv-scalibr.
type Extractor struct {
}

// NewExtractor creates a new scalibr dependency extractor
func NewExtractor() *Extractor {
return &Extractor{}
}

// ScanFilesystem takes
func (*Extractor) ScanFilesystem(ctx context.Context, iofs fs.FS) (*sbom.NodeList, error) {
return scanFilesystem(ctx, iofs)
}

func scanFilesystem(ctx context.Context, iofs fs.FS) (*sbom.NodeList, error) {
if iofs == nil {
return nil, errors.New("unable to scan dependencies, no active defined")
}
// have to down-cast here, because scalibr needs multiple io/fs types
wrapped, ok := iofs.(scalibr_fs.FS)
if !ok {
return nil, fmt.Errorf("error converting filesystem to ReadDirFS")
}

desiredCaps := scalibr_plugin.Capabilities{
OS: scalibr_plugin.OSLinux,
Network: true,
DirectFS: false,
RunningSystem: false,
}

scalibrFs := scalibr_fs.ScanRoot{FS: wrapped}
scanConfig := scalibr.ScanConfig{
ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs},
// All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set.
FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps),
Capabilities: &desiredCaps,
}

scanner := scalibr.New()
scanResults := scanner.Scan(ctx, &scanConfig)

if scanResults == nil || scanResults.Status == nil {
return nil, fmt.Errorf("error scanning files: no results")
}
if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded {
return nil, fmt.Errorf("error scanning files: %s", scanResults.Status)
}

res := sbom.NewNodeList()
for _, inv := range scanResults.Inventories {
node := &sbom.Node{
Type: sbom.Node_PACKAGE,
Id: uuid.New().String(),
Name: inv.Name,
Version: inv.Version,
Identifiers: map[int32]string{
int32(sbom.SoftwareIdentifierType_PURL): inv.Extractor.ToPURL(inv).String(),
// TODO: scalibr returns a _list_ of CPEs, but protobom will store one.
// use the first?
// int32(sbom.SoftwareIdentifierType_CPE23): inv.Extractor.ToCPEs(inv),
},
}
for _, l := range inv.Locations {
node.Properties = append(node.Properties, &sbom.Property{
Name: "sourceFile",
Data: l,
})
}
res.AddNode(node)
}

return res, nil
}
102 changes: 102 additions & 0 deletions internal/deps/scalibr/scalibr_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

package scalibr

import (
"context"
"fmt"
"io/fs"
"strings"
"testing"

"github.com/go-git/go-billy/v5/helper/iofs"
"github.com/go-git/go-billy/v5/memfs"
"github.com/protobom/protobom/pkg/sbom"
"github.com/stretchr/testify/require"
)

func TestScanFilesystem(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
makeFs func() fs.FS
mustErr bool
expect *sbom.NodeList
expectedLen int
}{
{
name: "python-reqs-txt",
makeFs: func() fs.FS {
t.Helper()
memFS := memfs.New()
f, err := memFS.Create("requirements.txt")
require.NoError(t, err)
_, err = f.Write([]byte("Flask>=1\nrequestts>=1\n"))
require.NoError(t, err)
require.NoError(t, f.Close())
return iofs.New(memFS)
},
expectedLen: 2,
expect: &sbom.NodeList{
Nodes: []*sbom.Node{
{
Id: "0000000000",
Type: sbom.Node_PACKAGE,
Name: "Flask",
Version: "1",
Identifiers: map[int32]string{
1: "pkg:pypi/flask@1",
},
Properties: []*sbom.Property{
{
Name: "sourceFile",
Data: "requirements.txt",
},
},
},
{
Id: "1111111111",
Type: sbom.Node_PACKAGE,
Name: "requestts",
Version: "1",
Identifiers: map[int32]string{
1: "pkg:pypi/requestts@1",
},
Properties: []*sbom.Property{
{
Name: "sourceFile",
Data: "requirements.txt",
},
},
},
},
},
},
{
name: "bad-fs",
makeFs: func() fs.FS {
return nil
},
mustErr: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
fs := tc.makeFs()
nodelist, err := scanFilesystem(context.Background(), fs)
if tc.mustErr {
require.Error(t, err)
return
}
require.NoError(t, err)
require.Len(t, nodelist.Nodes, tc.expectedLen)

// Compare the nodes, make sure they are equal
for i := range nodelist.Nodes {
nodelist.Nodes[i].Id = strings.Repeat(fmt.Sprintf("%d", i), 10)
require.Equal(t, tc.expect.Nodes[i].Checksum(), nodelist.Nodes[i].Checksum())
}
})
}
}
81 changes: 17 additions & 64 deletions internal/engine/ingester/deps/deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@ import (
"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/helper/iofs"
"github.com/go-viper/mapstructure/v2"
scalibr "github.com/google/osv-scalibr"
"github.com/google/osv-scalibr/extractor/filesystem/list"
scalibr_fs "github.com/google/osv-scalibr/fs"
scalibr_plugin "github.com/google/osv-scalibr/plugin"
"github.com/google/uuid"
"github.com/protobom/protobom/pkg/sbom"
"github.com/rs/zerolog"
"google.golang.org/protobuf/reflect/protoreflect"

mdeps "github.com/mindersec/minder/internal/deps"
"github.com/mindersec/minder/internal/deps/scalibr"
engerrors "github.com/mindersec/minder/internal/engine/errors"
pb "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
"github.com/mindersec/minder/pkg/engine/v1/interfaces"
Expand All @@ -36,8 +33,9 @@ const (

// Deps is the engine for a rule type that uses deps data ingest
type Deps struct {
cfg *pb.DepsType
gitprov provifv1.Git
cfg *pb.DepsType
gitprov provifv1.Git
extractor mdeps.Extractor
}

// Config is the set of parameters to the deps rule data ingest engine
Expand All @@ -56,8 +54,9 @@ func NewDepsIngester(cfg *pb.DepsType, gitprov provifv1.Git) (*Deps, error) {
}

return &Deps{
cfg: cfg,
gitprov: gitprov,
cfg: cfg,
gitprov: gitprov,
extractor: scalibr.NewExtractor(),
}, nil
}

Expand All @@ -72,7 +71,7 @@ func (gi *Deps) GetConfig() protoreflect.ProtoMessage {
}

// Ingest does the actual data ingestion for a rule type by cloning a git repo,
// and scanning it for dependencies with scalibr.
// and scanning it for dependencies with a dependency extractor
func (gi *Deps) Ingest(ctx context.Context, ent protoreflect.ProtoMessage, params map[string]any) (*interfaces.Result, error) {
switch entity := ent.(type) {
case *pb.Repository:
Expand All @@ -81,6 +80,7 @@ func (gi *Deps) Ingest(ctx context.Context, ent protoreflect.ProtoMessage, param
return nil, fmt.Errorf("deps is only supported for repositories")
}
}

func (gi *Deps) ingestRepository(ctx context.Context, repo *pb.Repository, params map[string]any) (*interfaces.Result, error) {
var logger = zerolog.Ctx(ctx)
userCfg := &Config{
Expand Down Expand Up @@ -117,7 +117,7 @@ func (gi *Deps) ingestRepository(ctx context.Context, repo *pb.Repository, param
return nil, fmt.Errorf("could not get worktree: %w", err)
}

deps, err := scanFs(ctx, wt.Filesystem)
deps, err := gi.scanMemFs(ctx, wt.Filesystem)
if err != nil {
return nil, fmt.Errorf("could not scan filesystem: %w", err)
}
Expand Down Expand Up @@ -163,63 +163,16 @@ func (gi *Deps) getBranch(repo *pb.Repository, branch string) string {
return defaultBranch
}

func scanFs(ctx context.Context, memFS billy.Filesystem) (*sbom.NodeList, error) {
// scanMemFs scans a billy memory filesystem for software dependencies.
func (gi *Deps) scanMemFs(ctx context.Context, memFS billy.Filesystem) (*sbom.NodeList, error) {
if memFS == nil {
return nil, fmt.Errorf("unable to scan dependencies, no active defined")
}
// have to down-cast here, because scalibr needs multiple io/fs types
wrapped, ok := iofs.New(memFS).(scalibr_fs.FS)
if !ok {
return nil, fmt.Errorf("error converting filesystem to ReadDirFS")
}

desiredCaps := scalibr_plugin.Capabilities{
OS: scalibr_plugin.OSLinux,
Network: true,
DirectFS: false,
RunningSystem: false,
}

scalibrFs := scalibr_fs.ScanRoot{FS: wrapped}
scanConfig := scalibr.ScanConfig{
ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs},
// All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set.
FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps),
Capabilities: &desiredCaps,
}

scanner := scalibr.New()
scanResults := scanner.Scan(ctx, &scanConfig)

if scanResults == nil || scanResults.Status == nil {
return nil, fmt.Errorf("error scanning files: no results")
}
if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded {
return nil, fmt.Errorf("error scanning files: %s", scanResults.Status)
}

res := sbom.NewNodeList()
for _, inv := range scanResults.Inventories {
node := &sbom.Node{
Type: sbom.Node_PACKAGE,
Id: uuid.New().String(),
Name: inv.Name,
Version: inv.Version,
Identifiers: map[int32]string{
int32(sbom.SoftwareIdentifierType_PURL): inv.Extractor.ToPURL(inv).String(),
// TODO: scalibr returns a _list_ of CPEs, but protobom will store one.
// use the first?
// int32(sbom.SoftwareIdentifierType_CPE23): inv.Extractor.ToCPEs(inv),
},
}
for _, l := range inv.Locations {
node.Properties = append(node.Properties, &sbom.Property{
Name: "sourceFile",
Data: l,
})
}
res.AddNode(node)
nl, err := gi.extractor.ScanFilesystem(ctx, iofs.New(memFS))
if err != nil {
return nil, fmt.Errorf("%T extractor: %w", gi.extractor, err)
}

return res, nil
return nl, err
}
Loading

0 comments on commit b84d504

Please sign in to comment.