Skip to content

Commit 5596a7d

Browse files
committed
Improve the CycloneDX SBOM pre-validation fixes #1230
Signed-off-by: tdruez <[email protected]>
1 parent 3df1a0b commit 5596a7d

File tree

5 files changed

+125
-32
lines changed

5 files changed

+125
-32
lines changed

CHANGELOG.rst

+3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ v34.5.0 (unreleased)
1717

1818
- Workaround an issue with the cyclonedx-python-lib that does not allow to load
1919
SBOMs that contains properties with no values.
20+
Also, a few fixes pre-validation are applyed before deserializing thr SBOM for
21+
maximum compatibility.
2022
https://github.com/nexB/scancode.io/issues/1185
23+
https://github.com/nexB/scancode.io/issues/1230
2124

2225
- Add a new `CollectTreeSitterSymbolsAndStrings` pipeline (addon) for collecting source
2326
symbol and string using tree-sitter.

scanpipe/pipes/cyclonedx.py

+50-20
Original file line numberDiff line numberDiff line change
@@ -171,43 +171,73 @@ def cyclonedx_component_to_package_data(cdx_component):
171171

172172

173173
def get_components(bom):
174-
"""Return list of components from CycloneDX BOM."""
175-
return list(bom._get_all_components())
174+
"""Return components from CycloneDX BOM except for the metadata.component."""
175+
for component in bom.components:
176+
yield from component.get_all_nested_components(include_self=True)
176177

177178

178-
def delete_tools(cyclonedx_document_json):
179+
def delete_ignored_root_properties(cyclonedx_document_json):
179180
"""
180-
Remove the ``tools`` section, if defined, from the SBOM as it can
181-
be in the way of loading a SBOM that is valid regarding the spec, but fails the
182-
deserialization.
181+
Remove root properties from the CycloneDX document that are irrelevant
182+
when loading SBOM component data as packages.
183183
184-
The ``metadata.tools`` as an array was deprecated in 1.5 and replaced by an
185-
object structure where you can define a list of ``components`` and ``services``.
184+
This function aims to maximize compatibility by excluding unsupported SPEC
185+
definitions while utilizing the cyclonedx-python-lib library.
186186
187-
The new structure is not yet supported by the cyclonedx-python-lib, neither for
188-
serialization (output) nor deserialization (input).
189-
https://github.com/CycloneDX/cyclonedx-python-lib/issues/578
187+
The data contained in these properties is unnecessary for loading components
188+
from the SBOM and can be safely disregarded.
190189
191-
The tools are not used anyway in the context of loading the SBOM component data as
192-
packages.
190+
https://github.com/CycloneDX/cyclonedx-python-lib/issues/578
193191
"""
194-
if "tools" in cyclonedx_document_json.get("metadata", {}):
195-
del cyclonedx_document_json["metadata"]["tools"]
192+
ignored_root_properties = [
193+
"metadata",
194+
"services",
195+
"externalReferences",
196+
"compositions",
197+
"vulnerabilities",
198+
"annotations",
199+
"formulation",
200+
"declarations",
201+
"definitions",
202+
"properties",
203+
]
204+
205+
cleaned_document = {
206+
key: value
207+
for key, value in cyclonedx_document_json.items()
208+
if key not in ignored_root_properties
209+
}
196210

197-
return cyclonedx_document_json
211+
return cleaned_document
198212

199213

200-
def delete_empty_properties(cyclonedx_document_json):
214+
def cleanup_components_properties(cyclonedx_document_json):
201215
"""
202216
Remove entries for which no values are set, such as ``{"name": ""}`` or
203217
``"licenses":[{}]``.
204218
219+
Also remove the properties that are not used in the context of loading packages
220+
from SBOM and that may be unsupported by the cyclonedx-python-lib library.
221+
205222
Class like cyclonedx.model.contact.OrganizationalEntity raise a
206223
NoPropertiesProvidedException while it is not enforced in the spec.
207224
208225
See https://github.com/CycloneDX/cyclonedx-python-lib/issues/600
209226
"""
210227
entries_to_delete = []
228+
ignored_properties = [
229+
"evidence",
230+
"omniborId",
231+
"swhid",
232+
"swid",
233+
"modified",
234+
"pedigree",
235+
"releaseNotes",
236+
"modelCard",
237+
"data",
238+
"cryptoProperties",
239+
"signature",
240+
]
211241

212242
def is_empty(value):
213243
if isinstance(value, dict) and not any(value.values()):
@@ -217,7 +247,7 @@ def is_empty(value):
217247

218248
for component in cyclonedx_document_json["components"]:
219249
for property_name, property_value in component.items():
220-
if is_empty(property_value):
250+
if is_empty(property_value) or property_name in ignored_properties:
221251
entries_to_delete.append((component, property_name))
222252

223253
# Delete the keys outside the main check loop
@@ -240,8 +270,8 @@ def resolve_cyclonedx_packages(input_location):
240270
cyclonedx_document = json.loads(document_data)
241271

242272
# Apply a few fixes pre-validation for maximum compatibility
243-
cyclonedx_document = delete_tools(cyclonedx_document)
244-
cyclonedx_document = delete_empty_properties(cyclonedx_document)
273+
cyclonedx_document = delete_ignored_root_properties(cyclonedx_document)
274+
cyclonedx_document = cleanup_components_properties(cyclonedx_document)
245275

246276
if errors := validate_document(cyclonedx_document):
247277
error_msg = (
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json",
3+
"bomFormat": "CycloneDX",
4+
"specVersion": "1.6",
5+
"serialNumber": "urn:uuid:b74fe5df-e965-415e-ba65-f38421a0695d",
6+
"version": 1,
7+
"metadata": {
8+
"lifecycles": [
9+
{
10+
"phase": "build"
11+
}
12+
]
13+
},
14+
"components": [
15+
{
16+
"bom-ref": "pkg:pypi/[email protected]",
17+
"name": "asgiref",
18+
"type": "library",
19+
"supplier": {
20+
"name": ""
21+
},
22+
"licenses": [
23+
{}
24+
],
25+
"evidence": {
26+
"identity": [
27+
{
28+
"field": "purl",
29+
"confidence": 1
30+
}
31+
]
32+
}
33+
}
34+
]
35+
}

scanpipe/tests/pipes/test_cyclonedx.py

+34-9
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ def test_scanpipe_cyclonedx_is_cyclonedx_bom(self):
6262

6363
def test_scanpipe_cyclonedx_get_components(self):
6464
empty_bom = Bom()
65-
self.assertEqual([], cyclonedx.get_components(empty_bom))
65+
self.assertEqual([], list(cyclonedx.get_components(empty_bom)))
6666

67-
components = cyclonedx.get_components(self.bom)
67+
components = list(cyclonedx.get_components(self.bom))
6868
self.assertEqual(3, len(components))
6969

7070
purls = [component.bom_ref.value for component in components]
@@ -198,29 +198,36 @@ def test_scanpipe_cyclonedx_resolve_cyclonedx_packages(self):
198198
# JSON v1.2
199199
input_location = self.data_location / "laravel-7.12.0" / "bom.1.2.json"
200200
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
201-
self.assertEqual(63, len(packages))
201+
self.assertEqual(62, len(packages))
202202

203203
# JSON v1.3
204204
input_location = self.data_location / "laravel-7.12.0" / "bom.1.3.json"
205205
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
206-
self.assertEqual(63, len(packages))
206+
self.assertEqual(62, len(packages))
207207

208208
# JSON v1.4
209209
input_location = self.data_location / "laravel-7.12.0" / "bom.1.4.json"
210210
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
211-
self.assertEqual(63, len(packages))
211+
self.assertEqual(62, len(packages))
212212

213213
# JSON v1.5 (this file is generated by the to_cyclonedx)
214214
input_location = self.data_location / "asgiref-3.3.0.cdx.json"
215215
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
216-
self.assertEqual(3, len(packages))
216+
self.assertEqual(1, len(packages))
217217

218218
# XML v1.4
219219
input_location = self.data_location / "laravel-7.12.0" / "bom.1.4.xml"
220220
packages = cyclonedx.resolve_cyclonedx_packages(input_location)
221-
self.assertEqual(63, len(packages))
221+
self.assertEqual(62, len(packages))
222+
223+
def test_scanpipe_cyclonedx_resolve_cyclonedx_packages_pre_validation(self):
224+
# This SBOM includes multiple deserialization issues that are "fixed"
225+
# by the pre-validation cleanup.
226+
input_location = self.data_location / "broken_sbom.json"
227+
package_data = cyclonedx.resolve_cyclonedx_packages(input_location)
228+
self.assertEqual([{"name": "asgiref"}], package_data)
222229

223-
def test_scanpipe_cyclonedx_delete_empty_properties(self):
230+
def test_scanpipe_cyclonedx_cleanup_components_properties(self):
224231
cyclonedx_document_json = {
225232
"components": [
226233
{
@@ -231,6 +238,24 @@ def test_scanpipe_cyclonedx_delete_empty_properties(self):
231238
}
232239
]
233240
}
234-
results = cyclonedx.delete_empty_properties(cyclonedx_document_json)
241+
results = cyclonedx.cleanup_components_properties(cyclonedx_document_json)
235242
expected = {"components": [{"bom-ref": "pkg:type/name"}]}
236243
self.assertEqual(expected, results)
244+
245+
def test_scanpipe_cyclonedx_delete_ignored_root_properties(self):
246+
cyclonedx_document_json = {
247+
"$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json",
248+
"bomFormat": "CycloneDX",
249+
"specVersion": "1.6",
250+
"serialNumber": "urn:uuid:b74fe5df-e965-415e-ba65-f38421a0695d",
251+
"version": 1,
252+
"metadata": {
253+
"component": {
254+
"bom-ref": "8d3058f3-ec1f-487d-8c5f-b2d3b26cda3e",
255+
},
256+
},
257+
"components": [{"bom-ref": "pkg:type/name"}],
258+
}
259+
results = cyclonedx.delete_ignored_root_properties(cyclonedx_document_json)
260+
self.assertIn("components", results)
261+
self.assertNotIn("metadata", results)

setup.cfg

+3-3
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ install_requires =
5252
importlib-metadata==7.1.0
5353
setuptools==69.5.1
5454
# Django related
55-
Django==5.0.4
55+
Django==5.0.6
5656
django-environ==0.11.2
5757
django-crispy-forms==2.1
5858
crispy-bootstrap3==2024.1
5959
django-filter==24.2
6060
djangorestframework==3.15.1
6161
django-taggit==5.0.1
6262
# Database
63-
psycopg[binary]==3.1.18
63+
psycopg[binary]==3.1.19
6464
# wait_for_database Django management command
6565
django-probes==1.7.0
6666
# Task queue
@@ -91,7 +91,7 @@ install_requires =
9191
# Profiling
9292
pyinstrument==4.6.2
9393
# CycloneDX
94-
cyclonedx-python-lib==7.3.2
94+
cyclonedx-python-lib==7.3.4
9595
jsonschema==4.22.0
9696
# Font Awesome
9797
fontawesomefree==6.5.1

0 commit comments

Comments
 (0)