Skip to content

Commit d40f795

Browse files
committed
feat: add merge sboms feature
sbomtool merge feature allows multiple sboms of the same type to be merged into a single sbom. This change allows us to merge the sboms of multiple individual packages into a large sbom for an image. Signed-off-by: Richard Kelly <[email protected]>
1 parent bcc15cd commit d40f795

File tree

7 files changed

+804
-55
lines changed

7 files changed

+804
-55
lines changed

sbomtool/README.md

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ A Software Bill of Materials (SBOM) generation tool for the Bottlerocket SDK.
1212
- Generate SBOM files in multiple formats:
1313
- SPDX 2.3 (JSON)
1414
- CycloneDX 1.6 (JSON)
15-
- Future support for merging multiple SBOM files
15+
- Merge multiple SBOM files with intelligent deduplication
16+
- Filter SBOM files based on buildroot contents
1617

1718
## Installation
1819

@@ -46,18 +47,25 @@ Options:
4647
- `--spdx`: Generate an SPDX SBOM
4748
- `--cyclonedx`: Generate a CycloneDX SBOM
4849

49-
#### Merge (Future Feature)
50+
#### Merge
5051

51-
Merge multiple SBOM files:
52+
Merge multiple SBOM files into a single comprehensive SBOM:
5253

5354
```
5455
sbomtool merge [options] file1 file2 [file3...]
5556
```
5657

5758
Options:
58-
- `--level int`: Merge level (default 0)
59+
- `--output string`: Output file path for merged SBOM (required)
60+
- `--level int`: Merge level (reserved for future use) (default 0)
5961

60-
Note: This feature is not yet implemented.
62+
The merge command combines multiple SBOM files while:
63+
- Deduplicating packages using CPE-based matching
64+
- Preserving all dependency relationships
65+
- Maintaining SBOM format integrity
66+
- Providing comprehensive merge statistics
67+
68+
All input files must be in the same format (SPDX or CycloneDX).
6169

6270
### Examples
6371

@@ -76,12 +84,38 @@ Generate both SPDX and CycloneDX SBOMs:
7684
sbomtool generate --name mypackage --build-dir ./build --out-dir ./sbom --spdx --cyclonedx
7785
```
7886

87+
Merge multiple SPDX SBOMs:
88+
```
89+
sbomtool merge --output merged.json app1-spdx.json app2-spdx.json lib1-spdx.json
90+
```
91+
92+
Merge with debug logging:
93+
```
94+
sbomtool --log-level debug merge --output final.json app1.json app2.json app3.json
95+
```
96+
7997
## Output
8098

8199
The tool generates SBOM files in the specified output directory:
82100
- `{name}-spdx.json`: SPDX format SBOM
83101
- `{name}-cyclonedx.json`: CycloneDX format SBOM
84102

103+
## Deduplication Behavior
104+
105+
The merge command uses deduplication to combine packages from multiple SBOMs:
106+
107+
### CPE-Based Deduplication
108+
- **Primary Strategy**: Uses CPE as the canonical identifier
109+
- **Fallback Strategy**: Uses name + version + type for packages without CPE
110+
- **Metadata Merging**: Combines licenses, files, and other metadata from duplicate packages
111+
- **Relationship Preservation**: Updates all dependency relationships to reference canonical packages
112+
113+
### Deduplication Process
114+
1. **Package Identity**: Generates canonical keys using CPE or fallback strategy
115+
2. **Conflict Resolution**: First occurrence with CPE becomes canonical
116+
3. **Metadata Consolidation**: Merges all metadata from duplicate packages
117+
4. **Relationship Updates**: Updates all relationships to use canonical package IDs
118+
85119
## Implementation Details
86120

87121
`sbomtool` uses the [Anchore Syft](https://github.com/anchore/syft) library for SBOM generation, which provides comprehensive package detection across various ecosystems.

sbomtool/cmd/sbomtool/main.go

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ and includes CPE-based package deduplication for both merge and filter operation
9797
return rootCmd
9898
}
9999

100-
// createGenerateCommand creates and configures the generate subcommand.
100+
// createGenerateCommand creates the generate subcommand for SBOM file creation.
101101
func createGenerateCommand() *cobra.Command {
102102
generateCmd := &cobra.Command{
103103
Use: "generate",
@@ -447,32 +447,71 @@ func createMergeCommand() *cobra.Command {
447447
Short: "Merge multiple SBOM files",
448448
Long: `Merge multiple SBOM files into a single SBOM.
449449
450-
This feature is not yet implemented and will return an error.
451-
Future versions will support merging SBOM files with configurable merge levels.`,
450+
DESCRIPTION:
451+
The merge command combines multiple SBOM files while:
452+
- Deduplicating packages that appear in multiple inputs
453+
- Preserving all dependency relationships
454+
- Maintaining SBOM format integrity
455+
- Providing comprehensive merge statistics
456+
457+
DEDUPLICATION:
458+
Packages are deduplicated based on CPE when available, with fallback to name+version+type.
459+
When duplicates are found:
460+
- First occurrence becomes the canonical package
461+
- File lists and metadata are merged from all duplicates
462+
- All relationships are updated to reference canonical packages
463+
464+
SUPPORTED FORMATS:
465+
- SPDX 2.3 (JSON)
466+
- CycloneDX 1.6 (JSON)
467+
All input files must be the same format.`,
468+
469+
Example: ` # Merge multiple SPDX SBOMs
470+
sbomtool merge --output merged.json app1-spdx.json app2-spdx.json lib1-spdx.json
471+
472+
# Merge with debug logging
473+
sbomtool --log-level debug merge --output final.json app1.json app2.json app3.json`,
474+
452475
Args: cobra.MinimumNArgs(2),
453476
RunE: runMerge,
454477
}
455478

456-
mergeCmd.Flags().Int("level", 0, "Merge level")
479+
mergeCmd.Flags().String("output", "", "Output file path for merged SBOM (required)")
480+
mergeCmd.Flags().Int("level", 0, "Merge level (reserved for future use)")
481+
if err := mergeCmd.MarkFlagRequired("output"); err != nil {
482+
slog.Error("Failed to mark output flag as required", "error", err)
483+
os.Exit(1)
484+
}
457485

458486
return mergeCmd
459487
}
460488

461489
// runMerge executes the SBOM merge process.
462-
//
463-
// Currently returns ErrNotImplemented as the merge functionality is planned for future implementation.
464490
func runMerge(cmd *cobra.Command, args []string) error {
491+
outputPath, _ := cmd.Flags().GetString("output")
465492
level, _ := cmd.Flags().GetInt("level")
466493

467-
slog.Debug("Starting sbomtool merge",
468-
"level", level,
469-
"file_count", len(args))
494+
config := merge.MergeConfig{
495+
OutputPath: outputPath,
496+
Level: level,
497+
}
470498

471-
_, err := merge.Merge(level, args)
499+
slog.Info("Starting SBOM merge process",
500+
"input_files", len(args),
501+
"output_path", outputPath)
502+
503+
result, err := merge.Merge(config, args)
472504
if err != nil {
473-
return fmt.Errorf("SBOM merge failed: %w", err)
505+
return fmt.Errorf("sbom merge failed: %w", err)
474506
}
475507

508+
slog.Info("SBOM merge completed successfully",
509+
"input_sboms", result.Statistics.InputSBOMs,
510+
"input_packages", result.Statistics.TotalInputPackages,
511+
"output_packages", result.Statistics.OutputPackages,
512+
"deduplicated", result.Statistics.DeduplicatedPackages,
513+
"processing_time", result.Statistics.ProcessingTime)
514+
476515
return nil
477516
}
478517

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
"path/filepath"
7+
"testing"
8+
9+
"github.com/anchore/syft/syft/pkg"
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
13+
"github.com/bottlerocket-os/bottlerocket-sdk/sbomtool/go/internal/commands/merge"
14+
)
15+
16+
func TestMergeIntegration(t *testing.T) {
17+
// GIVEN: Two SPDX SBOM files with overlapping packages
18+
// WHEN: Merge command is executed
19+
// THEN: Merged SBOM should be created with proper deduplication
20+
21+
tempDir := t.TempDir()
22+
23+
// Create test SPDX SBOMs
24+
sbom1 := createTestSPDXSBOM("app1", []TestPackage{
25+
{Name: "pkg1", Version: "1.0.0"},
26+
{Name: "pkg2", Version: "2.0.0"},
27+
})
28+
sbom2 := createTestSPDXSBOM("app2", []TestPackage{
29+
{Name: "pkg1", Version: "1.0.0"}, // Duplicate
30+
{Name: "pkg3", Version: "3.0.0"},
31+
})
32+
33+
// Write test files
34+
file1 := filepath.Join(tempDir, "sbom1.json")
35+
file2 := filepath.Join(tempDir, "sbom2.json")
36+
outputFile := filepath.Join(tempDir, "merged.json")
37+
38+
require.NoError(t, writeJSONFile(file1, sbom1))
39+
require.NoError(t, writeJSONFile(file2, sbom2))
40+
41+
// Execute merge command
42+
rootCmd := createRootCommand()
43+
rootCmd.SetArgs([]string{
44+
"merge",
45+
"--output", outputFile,
46+
file1, file2,
47+
})
48+
49+
err := rootCmd.Execute()
50+
require.NoError(t, err, "Merge command should execute successfully")
51+
52+
// Verify output file exists
53+
assert.FileExists(t, outputFile, "Merged SBOM file should be created")
54+
55+
// Test the in-memory SBOM directly (like deduplication tests)
56+
// Re-run the merge logic to get the in-memory result
57+
config := merge.MergeConfig{
58+
OutputPath: outputFile,
59+
Level: 0,
60+
}
61+
result, err := merge.Merge(config, []string{file1, file2})
62+
require.NoError(t, err, "Direct merge should work")
63+
64+
// Test the in-memory SBOM packages
65+
packages := make([]pkg.Package, 0)
66+
for p := range result.MergedSBOM.Artifacts.Packages.Enumerate() {
67+
packages = append(packages, p)
68+
}
69+
70+
// Basic structure validation on in-memory SBOM
71+
assert.Greater(t, len(packages), 0, "Should have packages after merge")
72+
assert.Equal(t, 3, len(packages), "Should have exactly 3 packages after deduplication (pkg1, pkg2, pkg3)")
73+
74+
// Verify specific packages exist
75+
packageNames := make([]string, len(packages))
76+
for i, p := range packages {
77+
packageNames[i] = p.Name
78+
}
79+
assert.Contains(t, packageNames, "pkg1")
80+
assert.Contains(t, packageNames, "pkg2")
81+
assert.Contains(t, packageNames, "pkg3")
82+
}
83+
84+
func TestMergeIntegrationCycloneDX(t *testing.T) {
85+
// GIVEN: Two CycloneDX SBOM files with overlapping packages
86+
// WHEN: Merge command is executed
87+
// THEN: Merged SBOM should be created with proper deduplication
88+
89+
tempDir := t.TempDir()
90+
91+
// Create test CycloneDX SBOMs
92+
sbom1 := createTestCycloneDXSBOM("app1", []TestPackage{
93+
{Name: "pkg1", Version: "1.0.0"},
94+
{Name: "pkg2", Version: "2.0.0"},
95+
})
96+
sbom2 := createTestCycloneDXSBOM("app2", []TestPackage{
97+
{Name: "pkg1", Version: "1.0.0"}, // Duplicate
98+
{Name: "pkg3", Version: "3.0.0"},
99+
})
100+
101+
// Write test files
102+
file1 := filepath.Join(tempDir, "sbom1.json")
103+
file2 := filepath.Join(tempDir, "sbom2.json")
104+
outputFile := filepath.Join(tempDir, "merged.json")
105+
106+
require.NoError(t, writeJSONFile(file1, sbom1))
107+
require.NoError(t, writeJSONFile(file2, sbom2))
108+
109+
// Execute merge command
110+
rootCmd := createRootCommand()
111+
rootCmd.SetArgs([]string{
112+
"merge",
113+
"--output", outputFile,
114+
file1, file2,
115+
})
116+
117+
err := rootCmd.Execute()
118+
require.NoError(t, err, "Merge command should execute successfully")
119+
120+
// Verify output file exists
121+
assert.FileExists(t, outputFile, "Merged SBOM file should be created")
122+
123+
// Test the in-memory SBOM directly (like deduplication tests)
124+
// Re-run the merge logic to get the in-memory result
125+
config := merge.MergeConfig{
126+
OutputPath: outputFile,
127+
Level: 0,
128+
}
129+
result, err := merge.Merge(config, []string{file1, file2})
130+
require.NoError(t, err, "Direct merge should work")
131+
132+
// Test the in-memory SBOM packages
133+
packages := make([]pkg.Package, 0)
134+
for p := range result.MergedSBOM.Artifacts.Packages.Enumerate() {
135+
packages = append(packages, p)
136+
}
137+
138+
// Basic structure validation on in-memory SBOM
139+
assert.Greater(t, len(packages), 0, "Should have packages after merge")
140+
// CycloneDX includes metadata components (app1, app2) plus deduplicated libraries (pkg1, pkg2, pkg3)
141+
assert.Equal(t, 5, len(packages), "Should have 5 packages: 2 app components + 3 deduplicated libraries")
142+
143+
// Verify specific library packages exist (the actual dependencies)
144+
packageNames := make([]string, len(packages))
145+
for i, p := range packages {
146+
packageNames[i] = p.Name
147+
}
148+
assert.Contains(t, packageNames, "pkg1")
149+
assert.Contains(t, packageNames, "pkg2")
150+
assert.Contains(t, packageNames, "pkg3")
151+
assert.Contains(t, packageNames, "app1") // Metadata component
152+
assert.Contains(t, packageNames, "app2") // Metadata component
153+
}
154+
155+
// TestPackage represents a simple package for testing
156+
type TestPackage struct {
157+
Name string
158+
Version string
159+
}
160+
161+
// createTestSPDXSBOM creates a minimal SPDX SBOM for testing
162+
func createTestSPDXSBOM(name string, packages []TestPackage) map[string]interface{} {
163+
spdxPackages := make([]map[string]interface{}, len(packages))
164+
for i, pkg := range packages {
165+
spdxPackages[i] = map[string]interface{}{
166+
"SPDXID": "SPDXRef-Package-" + pkg.Name,
167+
"name": pkg.Name,
168+
"versionInfo": pkg.Version,
169+
}
170+
}
171+
172+
return map[string]interface{}{
173+
"spdxVersion": "SPDX-2.3",
174+
"dataLicense": "CC0-1.0",
175+
"SPDXID": "SPDXRef-DOCUMENT",
176+
"name": name,
177+
"documentNamespace": "https://example.com/" + name,
178+
"packages": spdxPackages,
179+
}
180+
}
181+
182+
// createTestCycloneDXSBOM creates a minimal CycloneDX SBOM for testing
183+
func createTestCycloneDXSBOM(name string, packages []TestPackage) map[string]interface{} {
184+
components := make([]map[string]interface{}, len(packages))
185+
for i, pkg := range packages {
186+
components[i] = map[string]interface{}{
187+
"type": "library",
188+
"name": pkg.Name,
189+
"version": pkg.Version,
190+
}
191+
}
192+
193+
return map[string]interface{}{
194+
"bomFormat": "CycloneDX",
195+
"specVersion": "1.6",
196+
"version": 1,
197+
"metadata": map[string]interface{}{
198+
"component": map[string]interface{}{
199+
"type": "application",
200+
"name": name,
201+
},
202+
},
203+
"components": components,
204+
}
205+
}
206+
207+
// writeJSONFile writes data as JSON to a file
208+
func writeJSONFile(path string, data interface{}) error {
209+
jsonData, err := json.MarshalIndent(data, "", " ")
210+
if err != nil {
211+
return err
212+
}
213+
return os.WriteFile(path, jsonData, 0644)
214+
}

0 commit comments

Comments
 (0)