Skip to content

Commit

Permalink
Improve XLSX output for Vulnerabilities aboutcode-org#1519 (aboutcod…
Browse files Browse the repository at this point in the history
…e-org#1531)

Signed-off-by: tdruez <[email protected]>
  • Loading branch information
tdruez authored Jan 13, 2025
1 parent 07a0022 commit 1691bd1
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 20 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ v34.9.4 (unreleased)
- Add a "TODOS" sheet containing on REQUIRES_REVIEW resources in XLSX.
https://github.com/aboutcode-org/scancode.io/issues/1524

- Improve XLSX output for Vulnerabilities.
Replace the ``affected_by_vulnerabilities`` field in the PACKAGES and DEPENDENCIES
sheets with a dedicated VULNERABILITIES sheet.
https://github.com/aboutcode-org/scancode.io/issues/1519

v34.9.3 (2024-12-31)
--------------------

Expand Down
21 changes: 18 additions & 3 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3086,12 +3086,15 @@ def with_resources_count(self):
)
return self.annotate(resources_count=count_subquery)

def only_package_url_fields(self):
def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
return self.only("uuid", *PACKAGE_URL_FIELDS)
if not extra:
extra = []

return self.only("uuid", *PACKAGE_URL_FIELDS, *extra)

def filter(self, *args, **kwargs):
"""Add support for using ``package_url`` as a field lookup."""
Expand Down Expand Up @@ -3671,7 +3674,9 @@ def as_cyclonedx(self):


class DiscoveredDependencyQuerySet(
PackageURLQuerySetMixin, VulnerabilityQuerySetMixin, ProjectRelatedQuerySet
PackageURLQuerySetMixin,
VulnerabilityQuerySetMixin,
ProjectRelatedQuerySet,
):
def prefetch_for_serializer(self):
"""
Expand All @@ -3692,6 +3697,16 @@ def prefetch_for_serializer(self):
),
)

def only_package_url_fields(self, extra=None):
"""
Only select and return the UUID and PURL fields.
Minimum requirements to render a Package link in the UI.
"""
if not extra:
extra = []

return self.only("dependency_uid", *PACKAGE_URL_FIELDS, *extra)


class DiscoveredDependency(
ProjectRelatedModel,
Expand Down
89 changes: 80 additions & 9 deletions scanpipe/pipes/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def get_queryset(project, model_name):

queryset = querysets.get(model_name)
if project:
queryset = queryset.filter(project=project)
queryset = queryset.project(project)

return queryset

Expand Down Expand Up @@ -303,6 +303,14 @@ def to_json(project):
"projectmessage": "MESSAGES",
}

model_name_to_object_type = {
"discoveredpackage": "package",
"discovereddependency": "dependency",
"codebaseresource": "resource",
"codebaserelation": "relation",
"projectmessage": "message",
}


def queryset_to_xlsx_worksheet(
queryset,
Expand Down Expand Up @@ -333,15 +341,15 @@ def queryset_to_xlsx_worksheet(
if prepend_fields:
fields = prepend_fields + fields

return _add_xlsx_worksheet(
return add_xlsx_worksheet(
workbook=workbook,
worksheet_name=worksheet_name,
rows=queryset,
fields=fields,
)


def _add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
def add_xlsx_worksheet(workbook, worksheet_name, rows, fields):
"""
Add a new ``worksheet_name`` worksheet to the ``workbook``
``xlsxwriter.Workbook``. Write the iterable of ``rows`` objects using their
Expand Down Expand Up @@ -478,6 +486,7 @@ def to_xlsx(project):
"license_detections",
"other_license_detections",
"license_clues",
"affected_by_vulnerabilities",
]

if not project.policies_enabled:
Expand All @@ -497,17 +506,79 @@ def to_xlsx(project):
queryset_to_xlsx_worksheet(queryset, workbook, exclude_fields)

if layers_data := docker.get_layers_data(project):
_add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)
add_xlsx_worksheet(workbook, "LAYERS", layers_data, docker.layer_fields)

todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)
add_vulnerabilities_sheet(workbook, project)
add_todos_sheet(workbook, project, exclude_fields)

return output_file


def add_vulnerabilities_sheet(workbook, project):
vulnerable_packages_queryset = (
DiscoveredPackage.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_dependencies_queryset = (
DiscoveredDependency.objects.project(project)
.vulnerable()
.only_package_url_fields(extra=["affected_by_vulnerabilities"])
.order_by_package_url()
)
vulnerable_querysets = [
vulnerable_packages_queryset,
vulnerable_dependencies_queryset,
]

vulnerability_fields = [
"vulnerability_id",
"aliases",
"summary",
"risk_score",
"exploitability",
"weighted_severity",
"resource_url",
]
sheet_fields = ["object_type", "package_url"] + vulnerability_fields

rows = []
for queryset in vulnerable_querysets:
model_name = queryset.model._meta.model_name
object_type = model_name_to_object_type.get(model_name)

for package in queryset:
package_url = package.package_url

for vulnerability_data in package.affected_by_vulnerabilities:
row = {
"object_type": object_type,
"package_url": package_url,
**{
field_name: vulnerability_data.get(field_name, "")
for field_name in vulnerability_fields
},
}
rows.append(row)

if rows:
add_xlsx_worksheet(
workbook=workbook,
worksheet_name="VULNERABILITIES",
rows=rows,
fields=sheet_fields,
)


def add_todos_sheet(workbook, project, exclude_fields):
todos_queryset = get_queryset(project, "todos")
if todos_queryset:
queryset_to_xlsx_worksheet(
todos_queryset, workbook, exclude_fields, worksheet_name="TODOS"
)


def _get_spdx_extracted_licenses(license_expressions):
"""
Generate and return the SPDX `extracted_licenses` from provided
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<span style="vertical-align: middle">
{{ paginator.count|intcomma }} relations
</span>
<a href="{% url 'project_resources' project.slug %}?tag=to&relation_map_type=none&status=_EMPTY_" target="_blank" class="button is-small is-info is-outlined">
<a href="{% url 'project_resources' project.slug %}?tag=to&relation_map_type=none" target="_blank" class="button is-small is-info is-outlined">
<span>Un-mapped <strong>to/</strong> resources</span>
<span class="icon">
<i class="fa-solid fa-external-link-alt"></i>
</span>
</a>
<a href="{% url 'project_resources' project.slug %}?tag=from&relation_map_type=none&status=_EMPTY_" target="_blank" class="button is-small is-info is-outlined">
<a href="{% url 'project_resources' project.slug %}?tag=from&relation_map_type=none" target="_blank" class="button is-small is-info is-outlined">
<span>Un-mapped <strong>from/</strong> resources</span>
<span class="icon">
<i class="fa-solid fa-external-link-alt"></i>
Expand Down
12 changes: 6 additions & 6 deletions scanpipe/tests/pipes/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,13 @@ def test_scanpipe_pipes_outputs_to_xlsx(self):
project=project, path="path/file1.ext", status=flag.REQUIRES_REVIEW
)

output_file = output.to_xlsx(project=project)
with self.assertNumQueries(12):
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

# Make sure the output can be generated even if the work_directory was wiped
shutil.rmtree(project.work_directory)
with self.assertNumQueries(10):
output_file = output.to_xlsx(project=project)
output_file = output.to_xlsx(project=project)
self.assertIn(output_file.name, project.output_root)

workbook = openpyxl.load_workbook(output_file, read_only=True, data_only=True)
Expand Down Expand Up @@ -499,7 +499,7 @@ def test_scanpipe_pipes_outputs_to_attribution(self):


class ScanPipeXLSXOutputPipesTest(TestCase):
def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
def test_add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -532,7 +532,7 @@ def test__add_xlsx_worksheet_does_truncates_long_strings_over_max_len(self):
if r != x:
self.assertEqual(r[-50:], x)

def test__add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
def test_add_xlsx_worksheet_does_not_munge_long_strings_of_over_1024_lines(self):
# This test verifies that we do not truncate long text silently

test_dir = Path(tempfile.mkdtemp(prefix="scancode-io-test"))
Expand Down Expand Up @@ -736,7 +736,7 @@ def __init__(self, foo):

output_file = test_dir / workbook_name
with xlsxwriter.Workbook(str(output_file)) as workbook:
output._add_xlsx_worksheet(
output.add_xlsx_worksheet(
workbook=workbook,
worksheet_name="packages",
rows=rows,
Expand Down

0 comments on commit 1691bd1

Please sign in to comment.