Skip to content

Commit 8719906

Browse files
committed
Improve XLSX output for Vulnerabilities #1519
Signed-off-by: tdruez <[email protected]>
1 parent 07a0022 commit 8719906

File tree

4 files changed

+108
-23
lines changed

4 files changed

+108
-23
lines changed

CHANGELOG.rst

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ v34.9.4 (unreleased)
3838
- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
3939
https://github.com/aboutcode-org/scancode.io/issues/1524
4040

41+
- Improve XLSX output for Vulnerabilities.
42+
Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
43+
sheets with a dedicated VULNERABILITIES sheet.
44+
https://github.com/aboutcode-org/scancode.io/issues/1519
45+
4146
v34.9.3 (2024-12-31)
4247
--------------------
4348

scanpipe/models.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -3071,9 +3071,22 @@ def vulnerable(self):
30713071
return self.filter(~Q(affected_by_vulnerabilities__in=EMPTY_VALUES))
30723072

30733073

3074+
class OnlyPackageURLFieldsQuerySetMixin:
3075+
def only_package_url_fields(self, extra=None):
3076+
"""
3077+
Only select and return the UUID and PURL fields.
3078+
Minimum requirements to render a Package link in the UI.
3079+
"""
3080+
if not extra:
3081+
extra = []
3082+
3083+
return self.only("pk", *PACKAGE_URL_FIELDS, *extra)
3084+
3085+
30743086
class DiscoveredPackageQuerySet(
30753087
VulnerabilityQuerySetMixin,
30763088
PackageURLQuerySetMixin,
3089+
OnlyPackageURLFieldsQuerySetMixin,
30773090
ComplianceAlertQuerySetMixin,
30783091
ProjectRelatedQuerySet,
30793092
):
@@ -3086,13 +3099,6 @@ def with_resources_count(self):
30863099
)
30873100
return self.annotate(resources_count=count_subquery)
30883101

3089-
def only_package_url_fields(self):
3090-
"""
3091-
Only select and return the UUID and PURL fields.
3092-
Minimum requirements to render a Package link in the UI.
3093-
"""
3094-
return self.only("uuid", *PACKAGE_URL_FIELDS)
3095-
30963102
def filter(self, *args, **kwargs):
30973103
"""Add support for using ``package_url`` as a field lookup."""
30983104
if purl_str := kwargs.pop("package_url", None):
@@ -3671,7 +3677,10 @@ def as_cyclonedx(self):
36713677

36723678

36733679
class DiscoveredDependencyQuerySet(
3674-
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
3680+
PackageURLQuerySetMixin,
3681+
OnlyPackageURLFieldsQuerySetMixin,
3682+
VulnerabilityQuerySetMixin,
3683+
ProjectRelatedQuerySet,
36753684
):
36763685
def prefetch_for_serializer(self):
36773686
"""

scanpipe/pipes/output.py

+80-9
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def get_queryset(project, model_name):
101101

102102
queryset = querysets.get(model_name)
103103
if project:
104-
queryset = queryset.filter(project=project)
104+
queryset = queryset.project(project)
105105

106106
return queryset
107107

@@ -303,6 +303,14 @@ def to_json(project):
303303
"projectmessage": "MESSAGES",
304304
}
305305

306+
model_name_to_object_type = {
307+
"discoveredpackage": "package",
308+
"discovereddependency": "dependency",
309+
"codebaseresource": "resource",
310+
"codebaserelation": "relation",
311+
"projectmessage": "message",
312+
}
313+
306314

307315
def queryset_to_xlsx_worksheet(
308316
queryset,
@@ -333,15 +341,15 @@ def queryset_to_xlsx_worksheet(
333341
if prepend_fields:
334342
fields = prepend_fields + fields
335343

336-
return _add_xlsx_worksheet(
344+
return add_xlsx_worksheet(
337345
workbook=workbook,
338346
worksheet_name=worksheet_name,
339347
rows=queryset,
340348
fields=fields,
341349
)
342350

343351

344-
def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
352+
def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
345353
"""
346354
Add a new ``worksheet_name`` worksheet to the ``workbook``
347355
``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
@@ -478,6 +486,7 @@ def to_xlsx(project):
478486
"license_detections",
479487
"other_license_detections",
480488
"license_clues",
489+
"affected_by_vulnerabilities",
481490
]
482491

483492
if not project.policies_enabled:
@@ -497,17 +506,79 @@ def to_xlsx(project):
497506
queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)
498507

499508
if layers_data := docker.get_layers_data(project):
500-
_add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
509+
add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
501510

502-
todos_queryset = get_queryset(project, "todos")
503-
if todos_queryset:
504-
queryset_to_xlsx_worksheet(
505-
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
506-
)
511+
add_vulnerabilities_sheet(workbook, project)
512+
add_todos_sheet(workbook, project, exclude_fields)
507513

508514
return output_file
509515

510516

517+
def add_vulnerabilities_sheet(workbook, project):
518+
vulnerable_packages_queryset = (
519+
DiscoveredPackage.objects.project(project)
520+
.vulnerable()
521+
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
522+
.order_by_package_url()
523+
)
524+
vulnerable_dependencies_queryset = (
525+
DiscoveredDependency.objects.project(project)
526+
.vulnerable()
527+
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
528+
.order_by_package_url()
529+
)
530+
vulnerable_querysets = [
531+
vulnerable_packages_queryset,
532+
vulnerable_dependencies_queryset,
533+
]
534+
535+
vulnerability_fields = [
536+
"vulnerability_id",
537+
"aliases",
538+
"summary",
539+
"risk_score",
540+
"exploitability",
541+
"weighted_severity",
542+
"resource_url",
543+
]
544+
sheet_fields = ["object_type", "package_url"] + vulnerability_fields
545+
546+
rows = []
547+
for queryset in vulnerable_querysets:
548+
model_name = queryset.model._meta.model_name
549+
object_type = model_name_to_object_type.get(model_name)
550+
551+
for package in queryset:
552+
package_url = package.package_url
553+
554+
for vulnerability_data in package.affected_by_vulnerabilities:
555+
row = {
556+
"object_type": object_type,
557+
"package_url": package_url,
558+
**{
559+
field_name: vulnerability_data.get(field_name, "")
560+
for field_name in vulnerability_fields
561+
},
562+
}
563+
rows.append(row)
564+
565+
if rows:
566+
add_xlsx_worksheet(
567+
workbook=workbook,
568+
worksheet_name="VULNERABILITIES",
569+
rows=rows,
570+
fields=sheet_fields,
571+
)
572+
573+
574+
def add_todos_sheet(workbook, project, exclude_fields):
575+
todos_queryset = get_queryset(project, "todos")
576+
if todos_queryset:
577+
queryset_to_xlsx_worksheet(
578+
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
579+
)
580+
581+
511582
def _get_spdx_extracted_licenses(license_expressions):
512583
"""
513584
Generate and return the SPDX `extracted_licenses` from provided

scanpipe/tests/pipes/test_output.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
217217
project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
218218
)
219219

220-
output_file = output.to_xlsx(project=project)
220+
with self.assertNumQueries(12):
221+
output_file = output.to_xlsx(project=project)
221222
self.assertIn(output_file.name, project.output_root)
222223

223224
# Make sure the output can be generated even if the work_directory was wiped
224225
shutil.rmtree(project.work_directory)
225-
with self.assertNumQueries(10):
226-
output_file = output.to_xlsx(project=project)
226+
output_file = output.to_xlsx(project=project)
227227
self.assertIn(output_file.name, project.output_root)
228228

229229
workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
@@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):
499499

500500

501501
class ScanPipeXLSXOutputPipesTest(TestCase):
502-
def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
502+
def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
503503
# This test verifies that we do not truncate long text silently
504504

505505
test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
532532
if r != x:
533533
self.assertEqual(r[-50:], x)
534534

535-
def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
535+
def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
536536
# This test verifies that we do not truncate long text silently
537537

538538
test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
@@ -736,7 +736,7 @@ def __init__(self, foo):
736736

737737
output_file = test_dir / workbook_name
738738
with xlsxwriter.Workbook(str(output_file)) as workbook:
739-
output._add_xlsx_worksheet(
739+
output.add_xlsx_worksheet(
740740
workbook=workbook,
741741
worksheet_name="packages",
742742
rows=rows,

0 commit comments

Comments
 (0)