From c65733ea132dcdb81164d2a762845e53d6645723 Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Sun, 23 Mar 2025 02:11:01 +0530 Subject: [PATCH 1/6] fix(uC/lib): handling of product names with special characters --- cve_bin_tool/output_engine/html.py | 13 +- .../output_engine/html_reports/js/main.js | 6 +- .../html_reports/templates/base.html | 3 +- cve_bin_tool/output_engine/util.py | 8 +- cve_bin_tool/util.py | 39 +++ cve_bin_tool/vex_manager/parse.py | 38 ++- test/test_product_slash_handling.py | 234 ++++++++++++++++++ 7 files changed, 326 insertions(+), 15 deletions(-) create mode 100644 test/test_product_slash_handling.py diff --git a/cve_bin_tool/output_engine/html.py b/cve_bin_tool/output_engine/html.py index 10f345a453..188ee7d7b8 100644 --- a/cve_bin_tool/output_engine/html.py +++ b/cve_bin_tool/output_engine/html.py @@ -3,6 +3,7 @@ from __future__ import annotations +import re from collections import Counter, defaultdict from datetime import datetime from logging import Logger @@ -28,6 +29,12 @@ } +def normalize_id(text): + """Normalize text for use as HTML ID by replacing problematic characters.""" + # Replace slashes, backslashes, spaces, and other problematic characters + return re.sub(r"[\/\\\s.,:;?!@#$%^&*()+=]", "_", text) + + def render_cves( hid: str, cve_row: Template, tag: str, cves: list[dict[str, str]] ) -> str: @@ -280,9 +287,11 @@ def output_html( # hid is unique for each product if product_info.vendor != "UNKNOWN": - hid = f"{product_info.vendor}{product_info.product}{''.join(product_info.version.split('.'))}" + hid = normalize_id( + f"{product_info.vendor}{product_info.product}{''.join(product_info.version.split('.'))}" + ) else: - hid = ( + hid = normalize_id( f"{product_info.product}{''.join(product_info.version.split('.'))}" ) new_cves = render_cves( diff --git a/cve_bin_tool/output_engine/html_reports/js/main.js b/cve_bin_tool/output_engine/html_reports/js/main.js index 57ed8c27df..e65f525dce 100644 --- a/cve_bin_tool/output_engine/html_reports/js/main.js +++ b/cve_bin_tool/output_engine/html_reports/js/main.js @@ -40,9 +40,9 @@ function handleActive(key, id) { document.getElementById(id).children[key].classList.add('active') } -function filterCVEs(remark, id) { +function filterCVEs(remark, id) { const classes = ['new', 'confirmed', 'mitigated', 'unexplored', 'false_positive', 'not_affected'] - for (let i = 0; i < 6; i++) { + for (let i = 0; i < classes.length; i++) { let ele = document .getElementById(`listCVE${id}`) .getElementsByClassName(classes[i])[0] @@ -51,7 +51,7 @@ function filterCVEs(remark, id) { } } -function filterByRemark(key, id) { +function filterByRemark(key, id) { const classes = [ 'all', 'new', diff --git a/cve_bin_tool/output_engine/html_reports/templates/base.html b/cve_bin_tool/output_engine/html_reports/templates/base.html index 28ddd3e3c7..d96dcc6ec0 100644 --- a/cve_bin_tool/output_engine/html_reports/templates/base.html +++ b/cve_bin_tool/output_engine/html_reports/templates/base.html @@ -177,7 +177,8 @@
Paths of Scanned Files
  • {{path}}
    {% for product in all_paths[path]%} - + {{product}} {% endfor %} diff --git a/cve_bin_tool/output_engine/util.py b/cve_bin_tool/output_engine/util.py index ae1c8dad91..56a3fde81e 100644 --- a/cve_bin_tool/output_engine/util.py +++ b/cve_bin_tool/output_engine/util.py @@ -181,6 +181,12 @@ def format_output( for cve in cve_data["cves"]: if isinstance(cve, str): continue + + # Ensure proper remarks string value is used + remarks_value = ( + cve.remarks.name if hasattr(cve.remarks, "name") else str(cve.remarks) + ) + # If EPSS values are not available for a given CVE, assign them a value of "-" probability = "-" percentile = "-" @@ -200,7 +206,7 @@ def format_output( "cvss_version": str(cve.cvss_version), "cvss_vector": cve.cvss_vector, "paths": ", ".join(cve_data["paths"]), - "remarks": cve.remarks.name, + "remarks": remarks_value, "comments": cve.comments, } if metrics: diff --git a/cve_bin_tool/util.py b/cve_bin_tool/util.py index 1c25b273ae..33c706cc88 100644 --- a/cve_bin_tool/util.py +++ b/cve_bin_tool/util.py @@ -403,6 +403,20 @@ def decode_bom_ref(ref: str): or None if the reference cannot be decoded. """ + # If the reference starts with urn:cbt:, use parse_urn to properly handle special characters + if ref.startswith("urn:cbt:"): + try: + vendor, product, version = parse_urn(ref) + location = "location/to/product" + return ProductInfo( + vendor.strip(), product.strip(), version.strip(), location + ) + except (ValueError, AttributeError) as e: + LOGGER.debug(f"Failed to parse URN: {ref} - Error: {e}") + # Don't return None here, continue to try other parsing methods + pass + + # If the reference couldn't be handled by parse_urn, fall back to regex patterns # urn:cbt:{bom_version}/{vendor}#{product}-{version} urn_cbt_ref = re.compile( r"urn:cbt:(?P.*?)\/(?P.*?)#(?P.*?)-(?P.*)" @@ -614,3 +628,28 @@ def decode_cpe22(cpe22) -> list: def windows_fixup(filename): """Replace colon and backslash in filename to avoid a failure on Windows""" return filename.replace(":", "_").replace("\\", "_") + + +def generate_urn(vendor, product, version): + """Generates a URN for a given vendor, product, version combo.""" + return f"urn:cbt:1/{vendor}#{product}:{version}" + + +def parse_urn(urn): + """Parse a URN and return vendor, product, version tuple. + + Properly handles product names with special characters like slashes. + """ + # Remove the prefix + urn = urn.replace("urn:cbt:1/", "") + + # Split vendor and the rest + vendor, rest = urn.split("#", 1) + + # Split product and version, being careful with the first colon only + # This preserves any colons in the version part + product_version_parts = rest.split(":", 1) + product = product_version_parts[0] + version = product_version_parts[1] if len(product_version_parts) > 1 else "" + + return vendor, product, version diff --git a/cve_bin_tool/vex_manager/parse.py b/cve_bin_tool/vex_manager/parse.py index b58d1fe1e0..cdb3a5e520 100644 --- a/cve_bin_tool/vex_manager/parse.py +++ b/cve_bin_tool/vex_manager/parse.py @@ -6,7 +6,13 @@ from lib4vex.parser import VEXParser from cve_bin_tool.log import LOGGER -from cve_bin_tool.util import ProductInfo, Remarks, decode_bom_ref, decode_purl +from cve_bin_tool.util import ( + ProductInfo, + Remarks, + decode_bom_ref, + decode_purl, + parse_urn, +) TriageData = Dict[str, Union[Dict[str, Any], Set[str]]] @@ -124,14 +130,30 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: product_info = None serialNumber = "" if self.vextype == "cyclonedx": - decoded_ref = decode_bom_ref(vuln.get("bom_link")) - if isinstance(decoded_ref, tuple) and not isinstance( - decoded_ref, ProductInfo - ): - product_info, serialNumber = decoded_ref - self.serialNumbers.add(serialNumber) + # First try with the custom parse_urn function to handle slashes + if vuln.get("bom_link") and vuln.get("bom_link").startswith("urn:cbt:"): + try: + vendor, product, version = parse_urn(vuln.get("bom_link")) + location = "location/to/product" + product_info = ProductInfo( + vendor=vendor.strip(), + product=product.strip(), + version=version.strip(), + location=location, + ) + except (ValueError, AttributeError): + # If the custom parse fails, fall back to decode_bom_ref + product_info = decode_bom_ref(vuln.get("bom_link")) else: - product_info = decoded_ref + # Fall back to decode_bom_ref for other formats + decoded_ref = decode_bom_ref(vuln.get("bom_link")) + if isinstance(decoded_ref, tuple) and not isinstance( + decoded_ref, ProductInfo + ): + product_info, serialNumber = decoded_ref + self.serialNumbers.add(serialNumber) + else: + product_info = decoded_ref elif self.vextype in ["openvex", "csaf"]: product_info = decode_purl(vuln.get("purl")) if product_info: diff --git a/test/test_product_slash_handling.py b/test/test_product_slash_handling.py new file mode 100644 index 0000000000..7cc29eee62 --- /dev/null +++ b/test/test_product_slash_handling.py @@ -0,0 +1,234 @@ +import json +import os +import tempfile +import unittest + +from cve_bin_tool.util import decode_bom_ref, parse_urn +from cve_bin_tool.vex_manager.parse import VEXParse + + +class TestProductSlashTriage(unittest.TestCase): + """Test triage functionality for product names with slashes + + This test specifically addresses issue #4417 where triage data for products + with slashes in their names (e.g., 'uc/lib') is lost when processing. + """ + + def setUp(self): + self.tempdir = tempfile.TemporaryDirectory() + + # Create a test SBOM CSV with a product containing forward slash + self.test_sbom_path = os.path.join(self.tempdir.name, "test_sbom.csv") + with open(self.test_sbom_path, "w") as f: + f.write("vendor,product,version,location\n") + f.write("micrium,uc/lib,1.38.01,location/to/product\n") + + # Create triage file with data for the product with slashes + self.test_vex_path = os.path.join(self.tempdir.name, "test_vex.json") + vex_data = { + "bomFormat": "CycloneDX", + "specVersion": "1.4", + "version": 1, + "vulnerabilities": [ + { + "id": "CVE-2021-26706", + "analysis": { + "state": "in_triage", + "detail": "Test comment for micrium uc/lib", + }, + "affects": [{"ref": "urn:cbt:1/micrium#uc/lib:1.38.01"}], + } + ], + } + with open(self.test_vex_path, "w") as f: + json.dump(vex_data, f) + + self.output_json = os.path.join(self.tempdir.name, "output.json") + print(f"Created test files in {self.tempdir.name}") + print(f"Test SBOM: {self.test_sbom_path}") + print(f"Test VEX: {self.test_vex_path}") + print(f"Output JSON: {self.output_json}") + + def tearDown(self): + self.tempdir.cleanup() + + def test_parse_urn_with_slash(self): + """Test if parse_urn function correctly handles product names with slashes""" + urn = "urn:cbt:1/micrium#uc/lib:1.38.01" + vendor, product, version = parse_urn(urn) + + self.assertEqual(vendor, "micrium", "Vendor was not correctly parsed") + self.assertEqual(product, "uc/lib", "Product name with slash was not preserved") + self.assertEqual(version, "1.38.01", "Version was not correctly parsed") + + def test_decode_bom_ref_with_slash(self): + """Test if decode_bom_ref function correctly handles product names with slashes""" + urn = "urn:cbt:1/micrium#uc/lib:1.38.01" + product_info = decode_bom_ref(urn) + + self.assertIsNotNone( + product_info, "Failed to decode URN with slash in product name" + ) + self.assertEqual( + product_info.vendor, "micrium", "Vendor was not correctly decoded" + ) + self.assertEqual( + product_info.product, + "uc/lib", + "Product name with slash was not preserved during decoding", + ) + self.assertEqual( + product_info.version, "1.38.01", "Version was not correctly decoded" + ) + + def test_vex_parse_with_slash(self): + """Test if VEX parsing correctly handles product names with slashes""" + # Parse the VEX file directly using the VEXParse class + vexparser = VEXParse(self.test_vex_path, "cyclonedx") + parsed_data = vexparser.parse_vex() + + # Check if any parsed data contains our product with a slash + found_product = False + for product_info, data in parsed_data.items(): + if ( + product_info.vendor == "micrium" + and product_info.product == "uc/lib" + and product_info.version == "1.38.01" + ): + found_product = True + + # Check if the CVE data is correctly associated with this product + self.assertIn("CVE-2021-26706", data, "CVE not associated with product") + cve_data = data["CVE-2021-26706"] + self.assertEqual( + cve_data["comments"], + "Test comment for micrium uc/lib", + "Comment data was not preserved", + ) + break + + self.assertTrue( + found_product, "Product with slash not found in parsed VEX data" + ) + + def test_product_with_slash_triage(self): + """Test if triage data is preserved for a product with slash in its name""" + # Create a mock output file for testing + with open(self.output_json, "w") as f: + json.dump( + [ + { + "vendor": "micrium", + "product": "uc/lib", + "version": "1.38.01", + "location": "location/to/product", + "cve_number": "CVE-2021-26706", + "severity": "HIGH", + "score": "7.5", + "source": "NVD", + "cvss_version": "3", + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + "paths": "", + "remarks": "In Triage", + "comments": "Test comment for micrium uc/lib", + } + ], + f, + ) + + # Verify the contents of the mock file + with open(self.output_json) as f: + output_json = json.load(f) + + # Verify output has the triage data preserved + found_cve = False + for item in output_json: + if ( + item.get("vendor") == "micrium" + and item.get("product") == "uc/lib" + and item.get("cve_number") == "CVE-2021-26706" + ): + found_cve = True + self.assertNotEqual( + item.get("remarks"), + "Unexplored", + "Triage data for product with slash was not preserved", + ) + self.assertEqual( + item.get("comments"), + "Test comment for micrium uc/lib", + "Comment data was not preserved", + ) + self.assertTrue( + item.get("remarks") in ["In Triage", "InTriage"], + f"Triage state was not correctly preserved: {item.get('remarks')}", + ) + + self.assertTrue(found_cve, "Expected CVE for uc/lib not found in output") + + def test_vex_output_preserves_slash_products(self): + """Test if the VEX output file correctly preserves products with slashes""" + output_vex = os.path.join(self.tempdir.name, "output_vex.json") + + # Create a sample VEX file with the correct format + with open(output_vex, "w") as f: + json.dump( + { + "bomFormat": "CycloneDX", + "specVersion": "1.4", + "version": 1, + "metadata": { + "timestamp": "2023-06-01T00:00:00Z", + "tools": [ + { + "vendor": "intel", + "name": "cve-bin-tool", + "version": "3.0.0", + } + ], + }, + "vulnerabilities": [ + { + "id": "CVE-2021-26706", + "analysis": { + "state": "in_triage", + "detail": "Test comment for micrium uc/lib", + }, + "affects": [{"ref": "urn:cbt:1/micrium#uc/lib:1.38.01"}], + } + ], + }, + f, + ) + + # Check if the output VEX contains the correct triage data + with open(output_vex) as f: + vex_data = json.load(f) + + # Find the vulnerability in the output VEX + found_vuln = False + for vuln in vex_data.get("vulnerabilities", []): + if vuln.get("id") == "CVE-2021-26706": + found_vuln = True + # Check if the analysis data is preserved + self.assertEqual( + vuln.get("analysis", {}).get("state"), + "in_triage", + "Triage state not preserved in output VEX", + ) + self.assertEqual( + vuln.get("analysis", {}).get("detail"), + "Test comment for micrium uc/lib", + "Comment not preserved in output VEX", + ) + + # Check if the product with slash is correctly referenced + for affect in vuln.get("affects", []): + ref = affect.get("ref", "") + self.assertIn( + "uc/lib", + ref, + "Product with slash not correctly referenced in output VEX", + ) + + self.assertTrue(found_vuln, "Expected vulnerability not found in output VEX") From 88bf7333b5c82fb8a367835efcfe19bec8699a5b Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Sun, 23 Mar 2025 02:18:21 +0530 Subject: [PATCH 2/6] fix(uC/lib): removed empty spaces --- cve_bin_tool/output_engine/html_reports/js/main.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cve_bin_tool/output_engine/html_reports/js/main.js b/cve_bin_tool/output_engine/html_reports/js/main.js index e65f525dce..b9d8287352 100644 --- a/cve_bin_tool/output_engine/html_reports/js/main.js +++ b/cve_bin_tool/output_engine/html_reports/js/main.js @@ -40,7 +40,7 @@ function handleActive(key, id) { document.getElementById(id).children[key].classList.add('active') } -function filterCVEs(remark, id) { +function filterCVEs(remark, id) { const classes = ['new', 'confirmed', 'mitigated', 'unexplored', 'false_positive', 'not_affected'] for (let i = 0; i < classes.length; i++) { let ele = document @@ -51,7 +51,7 @@ function filterCVEs(remark, id) { } } -function filterByRemark(key, id) { +function filterByRemark(key, id) { const classes = [ 'all', 'new', From 2156c00d649b2210202a569f5000d393aa56f110 Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Tue, 25 Mar 2025 18:52:55 +0530 Subject: [PATCH 3/6] fix(uC/lib): fixed product_info issue --- cve_bin_tool/vex_manager/parse.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/cve_bin_tool/vex_manager/parse.py b/cve_bin_tool/vex_manager/parse.py index cdb3a5e520..4bf9f9aebb 100644 --- a/cve_bin_tool/vex_manager/parse.py +++ b/cve_bin_tool/vex_manager/parse.py @@ -134,12 +134,11 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: if vuln.get("bom_link") and vuln.get("bom_link").startswith("urn:cbt:"): try: vendor, product, version = parse_urn(vuln.get("bom_link")) - location = "location/to/product" product_info = ProductInfo( vendor=vendor.strip(), product=product.strip(), version=version.strip(), - location=location, + location="NotFound", ) except (ValueError, AttributeError): # If the custom parse fails, fall back to decode_bom_ref @@ -156,6 +155,18 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: product_info = decoded_ref elif self.vextype in ["openvex", "csaf"]: product_info = decode_purl(vuln.get("purl")) + if product_info and not hasattr(product_info, "location"): + # Create a new ProductInfo with the location field + product_info = ProductInfo( + vendor=product_info.vendor, + product=product_info.product, + version=product_info.version, + location="NotFound", + purl=( + product_info.purl if hasattr(product_info, "purl") else None + ), + ) + if product_info: cve_data = { "remarks": remarks, From 34286845dc914eba6bd80b779760676ec8d1694c Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Tue, 25 Mar 2025 19:29:02 +0530 Subject: [PATCH 4/6] fix(uC/lib): fixed flake8 and black issue --- cve_bin_tool/output_engine/html.py | 1 + cve_bin_tool/output_engine/util.py | 7 +++---- cve_bin_tool/util.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cve_bin_tool/output_engine/html.py b/cve_bin_tool/output_engine/html.py index 18369f2c82..f24fb92033 100644 --- a/cve_bin_tool/output_engine/html.py +++ b/cve_bin_tool/output_engine/html.py @@ -34,6 +34,7 @@ def normalize_id(text): # Replace slashes, backslashes, spaces, and other problematic characters return re.sub(r"[\/\\\s.,:;?!@#$%^&*()+=]", "_", text) + def normalize_severity(severity: str) -> str: """Normalize severity values to standard format. diff --git a/cve_bin_tool/output_engine/util.py b/cve_bin_tool/output_engine/util.py index 55c7684afa..94b2a29c85 100644 --- a/cve_bin_tool/output_engine/util.py +++ b/cve_bin_tool/output_engine/util.py @@ -211,9 +211,8 @@ def format_output( "source": cve.data_source, "cvss_version": str(cve.cvss_version), "cvss_vector": cve.cvss_vector, - "paths": ", ".join(cve_data["paths"]), + "paths": paths, "remarks": remarks_value, - "comments": cve.comments, } if metrics: @@ -319,12 +318,12 @@ def group_cve_by_remark( """Return a dict containing CVE details dict mapped to Remark as Key. Example: - cve_by_remark = { + cve_by_remarks = { "NEW":[ { "cve_number": "CVE-XXX-XXX", "severity": "High", - "decription: "Lorem Ipsm", + "description": "Lorem Ipsum", }, {...} ], diff --git a/cve_bin_tool/util.py b/cve_bin_tool/util.py index 7c7a9d2808..5dfed6b5b4 100644 --- a/cve_bin_tool/util.py +++ b/cve_bin_tool/util.py @@ -646,6 +646,7 @@ def parse_urn(urn): return vendor, product, version + def strip_path(path_element: str, scanned_dir: str) -> str: path = Path(path_element) return path.drive + path.root + os.path.relpath(path_element, scanned_dir) From 20233e554fb6a89c3e389d5ca0ec57179712aa4f Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Mon, 21 Apr 2025 00:34:55 +0530 Subject: [PATCH 5/6] fix(uC/lib): improved HTML ID,csv normalization and URN parsing --- cve_bin_tool/cli.py | 61 +++++++++++++++++- cve_bin_tool/output_engine/html.py | 46 ++++++++++++-- cve_bin_tool/sbom_manager/parse_csv.py | 57 +++++++++++++++++ cve_bin_tool/util.py | 48 +++++++++----- cve_bin_tool/vex_manager/parse.py | 27 +++++--- test/test_product_slash_handling.py | 86 +++++++++++++++----------- test/test_util.py | 22 ++++++- 7 files changed, 278 insertions(+), 69 deletions(-) create mode 100644 cve_bin_tool/sbom_manager/parse_csv.py diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py index f1bfd7342e..a3e9cfc752 100644 --- a/cve_bin_tool/cli.py +++ b/cve_bin_tool/cli.py @@ -1220,7 +1220,66 @@ def main(argv=None): return ERROR_CODES[InsufficientArgs] if args["vex_file"] and args["filter_triage"]: - cve_scanner.filter_triage_data() + if triage_data: + for parsed_data_key, cve_dict in triage_data.items(): + # Skip paths key + if "paths" in cve_dict: + continue + + # Check if the product is in the scanned data + matching_key = None + for product_info in cve_scanner.all_cve_data.keys(): + # Normalize the product name for comparison + if ( + parsed_data_key.vendor == product_info.vendor + and parsed_data_key.product.replace("\\/", "/") + == product_info.product.replace("\\/", "/") + and parsed_data_key.version == product_info.version + ): + matching_key = product_info + break + + if not matching_key: + LOGGER.info( + f"Product: {parsed_data_key.product} with Version: {parsed_data_key.version} " + f"not found in Parsed Data, is valid vex file being used?" + ) + continue + + # Apply triage data + for cve_id, cve_triage_data in cve_dict.items(): + if cve_id in cve_scanner.all_cve_data[matching_key]["cves"]: + for i, cve in enumerate( + cve_scanner.all_cve_data[matching_key]["cves"] + ): + if cve.cve_number == cve_id: + # Create a new object with the updated values + updated_cve = cve + # Apply triage data to the found CVE + if "remarks" in cve_triage_data: + updated_cve = updated_cve._replace( + remarks=cve_triage_data["remarks"] + ) + if "comments" in cve_triage_data: + updated_cve = updated_cve._replace( + comments=cve_triage_data["comments"] + ) + if "justification" in cve_triage_data: + updated_cve = updated_cve._replace( + justification=cve_triage_data[ + "justification" + ] + ) + if "response" in cve_triage_data: + updated_cve = updated_cve._replace( + response=cve_triage_data["response"] + ) + + # Store the updated CVE back in the list + cve_scanner.all_cve_data[matching_key]["cves"][ + i + ] = updated_cve + # Creates an Object for OutputEngine output = OutputEngine( all_cve_data=cve_scanner.all_cve_data, diff --git a/cve_bin_tool/output_engine/html.py b/cve_bin_tool/output_engine/html.py index f24fb92033..bd3819cc57 100644 --- a/cve_bin_tool/output_engine/html.py +++ b/cve_bin_tool/output_engine/html.py @@ -29,10 +29,48 @@ } -def normalize_id(text): - """Normalize text for use as HTML ID by replacing problematic characters.""" - # Replace slashes, backslashes, spaces, and other problematic characters - return re.sub(r"[\/\\\s.,:;?!@#$%^&*()+=]", "_", text) +def normalize_id(text, existing_ids=None): + """ + Normalize text for use as HTML ID by replacing problematic characters. + + Handles special cases to ensure valid HTML IDs: + 1. Ensures IDs start with a letter + 2. Encodes slashes as '_slash_' to preserve CPE identifiers while maintaining valid HTML + 3. Ensures uniqueness when existing_ids is provided + + Args: + text: The text to normalize + existing_ids: Optional set of existing IDs to ensure uniqueness + + Returns: + A normalized string suitable for use as an HTML ID + """ + if not text: + return "id_empty" + + # Ensure the ID starts with a letter + if not text[0].isalpha(): + text = "id_" + text + + # Replace slashes with _slash_ for valid HTML IDs + text = text.replace("/", "_slash_") + + # Replace other problematic characters + result = re.sub(r"[\s.,:;?!@#$%^&*()+=\\]", "_", text) + + # Clean up multiple/trailing underscores + result = re.sub(r"__+", "_", result).rstrip("_") + + # Ensure uniqueness if tracking IDs + if existing_ids is not None: + original_result = result + counter = 1 + while result in existing_ids: + result = f"{original_result}_{counter}" + counter += 1 + existing_ids.add(result) + + return result def normalize_severity(severity: str) -> str: diff --git a/cve_bin_tool/sbom_manager/parse_csv.py b/cve_bin_tool/sbom_manager/parse_csv.py new file mode 100644 index 0000000000..dff9f4b3cf --- /dev/null +++ b/cve_bin_tool/sbom_manager/parse_csv.py @@ -0,0 +1,57 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +CSV Parser for SBOM Manager +""" + +import csv + +from cve_bin_tool.util import ProductInfo + + +def normalize_product_name(product_name): + """ + Ensure product name is consistently normalized + """ + # Normalize product names with slashes + return product_name.replace("/", "\\/") + + +class CSVParse: + """ + A class for parsing CSV files and extracting necessary fields. + + Attributes: + - filename (str): The path to the CSV file. + + Methods: + - __init__(self, filename: str): Initializes the CSVParse object. + - parse_csv(self) -> list[ProductInfo]: Parses the CSV file and extracts the necessary fields. + """ + + def __init__(self, filename: str): + self.filename = filename + + def parse_csv(self) -> list[ProductInfo]: + """Parse a CSV file and yield product info""" + product_info_list = [] + with open(self.filename, encoding="utf-8") as csv_file: + csv_reader = csv.DictReader(csv_file) + for row in csv_reader: + vendor = row.get("vendor", "").strip() + product = row.get("product", "").strip() + version = row.get("version", "").strip() + purl = row.get("purl", "").strip() + + product = normalize_product_name(product) + + product_info = ProductInfo( + vendor=vendor, + product=product, + version=version, + purl=purl if purl else None, + ) + product_info_list.append(product_info) + + return product_info_list diff --git a/cve_bin_tool/util.py b/cve_bin_tool/util.py index 5dfed6b5b4..5c5b1abdcf 100644 --- a/cve_bin_tool/util.py +++ b/cve_bin_tool/util.py @@ -402,10 +402,7 @@ def decode_bom_ref(ref: str): if ref.startswith("urn:cbt:"): try: vendor, product, version = parse_urn(ref) - location = "location/to/product" - return ProductInfo( - vendor.strip(), product.strip(), version.strip(), location - ) + return ProductInfo(vendor.strip(), product.strip(), version.strip()) except (ValueError, AttributeError) as e: LOGGER.debug(f"Failed to parse URN: {ref} - Error: {e}") # Don't return None here, continue to try other parsing methods @@ -627,24 +624,41 @@ def generate_urn(vendor, product, version): return f"urn:cbt:1/{vendor}#{product}:{version}" -def parse_urn(urn): - """Parse a URN and return vendor, product, version tuple. +def parse_urn(urn_string): + """ + Parse a URN string of the format urn:cbt:1/vendorname#productname:version + where product name might contain slashes. - Properly handles product names with special characters like slashes. + Returns tuple of (vendor, product, version) """ - # Remove the prefix - urn = urn.replace("urn:cbt:1/", "") + try: + # Remove the urn:cbt: prefix + urn_parts = urn_string.replace("urn:cbt:", "") + + # Split by the first slash to get the version_part and the rest + version_part, rest = urn_parts.split("/", 1) + + # Find the position of the '#' which separates vendor and product + hash_pos = rest.find("#") + if hash_pos == -1: + raise ValueError("Invalid URN format: missing '#' separator") + + vendor = rest[:hash_pos] + + # Find the position of the ':' which separates product and version + colon_pos = rest.find(":", hash_pos) + if colon_pos == -1: + raise ValueError("Invalid URN format: missing ':' separator") - # Split vendor and the rest - vendor, rest = urn.split("#", 1) + product = rest[hash_pos + 1 : colon_pos] + version = rest[colon_pos + 1 :] - # Split product and version, being careful with the first colon only - # This preserves any colons in the version part - product_version_parts = rest.split(":", 1) - product = product_version_parts[0] - version = product_version_parts[1] if len(product_version_parts) > 1 else "" + # Ensure consistent handling of slashes in product names + product = product.replace("\\/", "/") - return vendor, product, version + return vendor, product, version + except (ValueError, AttributeError) as e: + raise ValueError(f"Unable to parse URN '{urn_string}': {str(e)}") def strip_path(path_element: str, scanned_dir: str) -> str: diff --git a/cve_bin_tool/vex_manager/parse.py b/cve_bin_tool/vex_manager/parse.py index 4bf9f9aebb..2ec70d6bf9 100644 --- a/cve_bin_tool/vex_manager/parse.py +++ b/cve_bin_tool/vex_manager/parse.py @@ -130,19 +130,28 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: product_info = None serialNumber = "" if self.vextype == "cyclonedx": - # First try with the custom parse_urn function to handle slashes + # First try with the parse_urn function to handle slashes in product names if vuln.get("bom_link") and vuln.get("bom_link").startswith("urn:cbt:"): try: vendor, product, version = parse_urn(vuln.get("bom_link")) + # Ensure product name is consistent with how it's stored in scanner data + product = product.replace("\\/", "/").replace("/", "\\/") product_info = ProductInfo( vendor=vendor.strip(), product=product.strip(), version=version.strip(), - location="NotFound", ) - except (ValueError, AttributeError): + self.logger.debug( + f"Successfully parsed URN: {vuln.get('bom_link')} to {product_info}" + ) + except (ValueError, AttributeError) as e: + self.logger.debug( + f"Error parsing URN '{vuln.get('bom_link')}': {str(e)}" + ) # If the custom parse fails, fall back to decode_bom_ref - product_info = decode_bom_ref(vuln.get("bom_link")) + decoded_ref = decode_bom_ref(vuln.get("bom_link")) + if decoded_ref: + product_info = decoded_ref else: # Fall back to decode_bom_ref for other formats decoded_ref = decode_bom_ref(vuln.get("bom_link")) @@ -155,19 +164,17 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: product_info = decoded_ref elif self.vextype in ["openvex", "csaf"]: product_info = decode_purl(vuln.get("purl")) - if product_info and not hasattr(product_info, "location"): - # Create a new ProductInfo with the location field + if product_info and hasattr(product_info, "purl"): + # Create a new ProductInfo without the location field product_info = ProductInfo( vendor=product_info.vendor, product=product_info.product, version=product_info.version, - location="NotFound", - purl=( - product_info.purl if hasattr(product_info, "purl") else None - ), + purl=product_info.purl, ) if product_info: + self.logger.debug(f"Processing vuln with product_info: {product_info}") cve_data = { "remarks": remarks, "comments": comments if comments else "", diff --git a/test/test_product_slash_handling.py b/test/test_product_slash_handling.py index 7cc29eee62..e4a5bfa8e4 100644 --- a/test/test_product_slash_handling.py +++ b/test/test_product_slash_handling.py @@ -3,7 +3,7 @@ import tempfile import unittest -from cve_bin_tool.util import decode_bom_ref, parse_urn +from cve_bin_tool.util import Remarks, decode_bom_ref, parse_urn from cve_bin_tool.vex_manager.parse import VEXParse @@ -20,8 +20,8 @@ def setUp(self): # Create a test SBOM CSV with a product containing forward slash self.test_sbom_path = os.path.join(self.tempdir.name, "test_sbom.csv") with open(self.test_sbom_path, "w") as f: - f.write("vendor,product,version,location\n") - f.write("micrium,uc/lib,1.38.01,location/to/product\n") + f.write("vendor,product,version\n") + f.write("micrium,uc/lib,1.38.01\n") # Create triage file with data for the product with slashes self.test_vex_path = os.path.join(self.tempdir.name, "test_vex.json") @@ -32,9 +32,15 @@ def setUp(self): "vulnerabilities": [ { "id": "CVE-2021-26706", + "source": { + "name": "NVD", + "url": "https://nvd.nist.gov/vuln/detail/CVE-2021-26706", + }, "analysis": { - "state": "in_triage", - "detail": "Test comment for micrium uc/lib", + "state": "not_affected", + "response": ["code_not_reachable"], + "detail": "NotAffected: affects micrium uC/LIB however those functions NOT USED by Embedded apps", + "justification": "code_not_reachable", }, "affects": [{"ref": "urn:cbt:1/micrium#uc/lib:1.38.01"}], } @@ -61,6 +67,17 @@ def test_parse_urn_with_slash(self): self.assertEqual(product, "uc/lib", "Product name with slash was not preserved") self.assertEqual(version, "1.38.01", "Version was not correctly parsed") + def test_parse_urn_with_escaped_slash(self): + """Test if parse_urn function correctly handles product names with escaped slashes""" + urn = "urn:cbt:1/micrium#uc\\/lib:1.38.01" + vendor, product, version = parse_urn(urn) + + self.assertEqual(vendor, "micrium", "Vendor was not correctly parsed") + self.assertEqual( + product, "uc/lib", "Product name with escaped slash was not normalized" + ) + self.assertEqual(version, "1.38.01", "Version was not correctly parsed") + def test_decode_bom_ref_with_slash(self): """Test if decode_bom_ref function correctly handles product names with slashes""" urn = "urn:cbt:1/micrium#uc/lib:1.38.01" @@ -92,7 +109,7 @@ def test_vex_parse_with_slash(self): for product_info, data in parsed_data.items(): if ( product_info.vendor == "micrium" - and product_info.product == "uc/lib" + and (product_info.product.replace("\\/", "/") == "uc/lib") and product_info.version == "1.38.01" ): found_product = True @@ -101,9 +118,14 @@ def test_vex_parse_with_slash(self): self.assertIn("CVE-2021-26706", data, "CVE not associated with product") cve_data = data["CVE-2021-26706"] self.assertEqual( - cve_data["comments"], - "Test comment for micrium uc/lib", - "Comment data was not preserved", + cve_data["remarks"], + Remarks.NotAffected, + "Remarks not correctly set to NotAffected", + ) + self.assertIn( + "code_not_reachable", + cve_data["justification"], + "Justification data was not preserved", ) break @@ -121,16 +143,16 @@ def test_product_with_slash_triage(self): "vendor": "micrium", "product": "uc/lib", "version": "1.38.01", - "location": "location/to/product", "cve_number": "CVE-2021-26706", "severity": "HIGH", - "score": "7.5", + "score": "9.8", "source": "NVD", "cvss_version": "3", - "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + "cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", "paths": "", - "remarks": "In Triage", - "comments": "Test comment for micrium uc/lib", + "remarks": "Not Affected", + "comments": "code_not_reachable: NotAffected: affects micrium uC/LIB however those functions NOT USED by Embedded apps", + "justification": "code_not_reachable", } ], f, @@ -155,13 +177,14 @@ def test_product_with_slash_triage(self): "Triage data for product with slash was not preserved", ) self.assertEqual( - item.get("comments"), - "Test comment for micrium uc/lib", - "Comment data was not preserved", + item.get("remarks"), + "Not Affected", + "Triage remarks were not properly preserved", ) - self.assertTrue( - item.get("remarks") in ["In Triage", "InTriage"], - f"Triage state was not correctly preserved: {item.get('remarks')}", + self.assertIn( + "code_not_reachable", + item.get("comments"), + "Justification data was not preserved in comments", ) self.assertTrue(found_cve, "Expected CVE for uc/lib not found in output") @@ -177,22 +200,13 @@ def test_vex_output_preserves_slash_products(self): "bomFormat": "CycloneDX", "specVersion": "1.4", "version": 1, - "metadata": { - "timestamp": "2023-06-01T00:00:00Z", - "tools": [ - { - "vendor": "intel", - "name": "cve-bin-tool", - "version": "3.0.0", - } - ], - }, "vulnerabilities": [ { "id": "CVE-2021-26706", "analysis": { - "state": "in_triage", - "detail": "Test comment for micrium uc/lib", + "state": "not_affected", + "detail": "code_not_reachable: NotAffected: affects micrium uC/LIB however those functions NOT USED by Embedded apps", + "justification": "code_not_reachable", }, "affects": [{"ref": "urn:cbt:1/micrium#uc/lib:1.38.01"}], } @@ -213,13 +227,13 @@ def test_vex_output_preserves_slash_products(self): # Check if the analysis data is preserved self.assertEqual( vuln.get("analysis", {}).get("state"), - "in_triage", + "not_affected", "Triage state not preserved in output VEX", ) self.assertEqual( - vuln.get("analysis", {}).get("detail"), - "Test comment for micrium uc/lib", - "Comment not preserved in output VEX", + vuln.get("analysis", {}).get("justification"), + "code_not_reachable", + "Justification not preserved in output VEX", ) # Check if the product with slash is correctly referenced diff --git a/test/test_util.py b/test/test_util.py index ed191ca4a0..ec609b5fdf 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -8,7 +8,7 @@ from typing import DefaultDict from cve_bin_tool.cve_scanner import CVEScanner -from cve_bin_tool.util import CVEData, ProductInfo, inpath +from cve_bin_tool.util import CVEData, ProductInfo, inpath, parse_urn class TestUtil: @@ -105,3 +105,23 @@ def test_product_info_hashing(self): product_info_2 = ProductInfo(vendor=vendor, product=product, version=version) assert hash(product_info_1) == hash(product_info_2) # Hashes should be the same + + +class TestURNParsing: + """Tests for parsing URNs""" + + def test_parse_urn_with_slash_in_product(self): + """Test parsing URN with slashes in product name""" + urn = "urn:cbt:1/micrium#uc/lib:1.38.01" + vendor, product, version = parse_urn(urn) + assert vendor == "micrium" + assert product == "uc/lib" + assert version == "1.38.01" + + def test_parse_urn_complex(self): + """Test parsing URN with multiple slashes and special characters""" + urn = "urn:cbt:1/vendor-name#product/with/slashes:1.2.3" + vendor, product, version = parse_urn(urn) + assert vendor == "vendor-name" + assert product == "product/with/slashes" + assert version == "1.2.3" From b6561bbe032cfbec0984bb50e436bdee107ff5fd Mon Sep 17 00:00:00 2001 From: Jigyasu Rajput Date: Sun, 27 Apr 2025 23:26:47 +0530 Subject: [PATCH 6/6] fix(uC/lib): add a utility function --- cve_bin_tool/cli.py | 14 +++++-- cve_bin_tool/sbom_manager/parse_csv.py | 57 -------------------------- cve_bin_tool/util.py | 25 +++++++++++ cve_bin_tool/vex_manager/parse.py | 3 +- test/test_product_slash_handling.py | 48 +++++++++++++++++++++- test/test_util.py | 22 +++++++++- 6 files changed, 105 insertions(+), 64 deletions(-) delete mode 100644 cve_bin_tool/sbom_manager/parse_csv.py diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py index a3e9cfc752..0116cb113c 100644 --- a/cve_bin_tool/cli.py +++ b/cve_bin_tool/cli.py @@ -75,7 +75,7 @@ from cve_bin_tool.package_list_parser import PackageListParser from cve_bin_tool.sbom_manager.parse import SBOMParse from cve_bin_tool.sbom_manager.sbom_detection import sbom_detection -from cve_bin_tool.util import ProductInfo +from cve_bin_tool.util import ProductInfo, normalize_product_name from cve_bin_tool.version import VERSION from cve_bin_tool.version_scanner import VersionScanner from cve_bin_tool.vex_manager.parse import VEXParse @@ -1229,11 +1229,17 @@ def main(argv=None): # Check if the product is in the scanned data matching_key = None for product_info in cve_scanner.all_cve_data.keys(): - # Normalize the product name for comparison + # Use normalize_product_name for comparison + normalized_parsed_product = normalize_product_name( + parsed_data_key.product + ) + normalized_scanner_product = normalize_product_name( + product_info.product + ) + if ( parsed_data_key.vendor == product_info.vendor - and parsed_data_key.product.replace("\\/", "/") - == product_info.product.replace("\\/", "/") + and normalized_parsed_product == normalized_scanner_product and parsed_data_key.version == product_info.version ): matching_key = product_info diff --git a/cve_bin_tool/sbom_manager/parse_csv.py b/cve_bin_tool/sbom_manager/parse_csv.py deleted file mode 100644 index dff9f4b3cf..0000000000 --- a/cve_bin_tool/sbom_manager/parse_csv.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: GPL-3.0-or-later - -""" -CSV Parser for SBOM Manager -""" - -import csv - -from cve_bin_tool.util import ProductInfo - - -def normalize_product_name(product_name): - """ - Ensure product name is consistently normalized - """ - # Normalize product names with slashes - return product_name.replace("/", "\\/") - - -class CSVParse: - """ - A class for parsing CSV files and extracting necessary fields. - - Attributes: - - filename (str): The path to the CSV file. - - Methods: - - __init__(self, filename: str): Initializes the CSVParse object. - - parse_csv(self) -> list[ProductInfo]: Parses the CSV file and extracts the necessary fields. - """ - - def __init__(self, filename: str): - self.filename = filename - - def parse_csv(self) -> list[ProductInfo]: - """Parse a CSV file and yield product info""" - product_info_list = [] - with open(self.filename, encoding="utf-8") as csv_file: - csv_reader = csv.DictReader(csv_file) - for row in csv_reader: - vendor = row.get("vendor", "").strip() - product = row.get("product", "").strip() - version = row.get("version", "").strip() - purl = row.get("purl", "").strip() - - product = normalize_product_name(product) - - product_info = ProductInfo( - vendor=vendor, - product=product, - version=version, - purl=purl if purl else None, - ) - product_info_list.append(product_info) - - return product_info_list diff --git a/cve_bin_tool/util.py b/cve_bin_tool/util.py index 5c5b1abdcf..80e975d7f4 100644 --- a/cve_bin_tool/util.py +++ b/cve_bin_tool/util.py @@ -19,6 +19,31 @@ from cve_bin_tool.log import LOGGER +def normalize_product_name(product_name: str) -> str: + r""" + Ensure product name is consistently normalized + + This function handles normalization of product names, + particularly for handling slashes consistently. + + Args: + product_name: The product name that may contain slashes + + Returns: + A normalized product name with slashes escaped as \/ + """ + # First, make sure any existing escaped slashes (\/) are temporarily marked + # to prevent double-escaping + temp_marker = "###ESCAPED_SLASH###" + marked_name = product_name.replace("\\/", temp_marker) + + # Now normalize remaining unescaped slashes + normalized = marked_name.replace("/", "\\/") + + # Finally, restore the original escaped slashes + return normalized.replace(temp_marker, "\\/") + + class OrderedEnum(Enum): """ An enumeration that supports order comparisons. diff --git a/cve_bin_tool/vex_manager/parse.py b/cve_bin_tool/vex_manager/parse.py index 2ec70d6bf9..fa140f65d9 100644 --- a/cve_bin_tool/vex_manager/parse.py +++ b/cve_bin_tool/vex_manager/parse.py @@ -11,6 +11,7 @@ Remarks, decode_bom_ref, decode_purl, + normalize_product_name, parse_urn, ) @@ -135,7 +136,7 @@ def __process_vulnerabilities(self, vulnerabilities) -> None: try: vendor, product, version = parse_urn(vuln.get("bom_link")) # Ensure product name is consistent with how it's stored in scanner data - product = product.replace("\\/", "/").replace("/", "\\/") + product = normalize_product_name(product) product_info = ProductInfo( vendor=vendor.strip(), product=product.strip(), diff --git a/test/test_product_slash_handling.py b/test/test_product_slash_handling.py index e4a5bfa8e4..d6d3d01f5c 100644 --- a/test/test_product_slash_handling.py +++ b/test/test_product_slash_handling.py @@ -3,7 +3,7 @@ import tempfile import unittest -from cve_bin_tool.util import Remarks, decode_bom_ref, parse_urn +from cve_bin_tool.util import Remarks, decode_bom_ref, normalize_product_name, parse_urn from cve_bin_tool.vex_manager.parse import VEXParse @@ -246,3 +246,49 @@ def test_vex_output_preserves_slash_products(self): ) self.assertTrue(found_vuln, "Expected vulnerability not found in output VEX") + + def test_normalized_products_in_vex_parse(self): + """Test if normalize_product_name is properly applied in parse.py""" + # Test with a product name containing slashes + urn = "urn:cbt:1/vendor#product/with/slash:1.0.0" + vendor, product, version = parse_urn(urn) + + # Verify the product name is properly normalized by parse_urn + self.assertEqual( + product, "product/with/slash", "Product with slashes not properly parsed" + ) + + # Verify normalize_product_name produces the expected output + normalized_product = normalize_product_name(product) + self.assertEqual( + normalized_product, + "product\\/with\\/slash", + "Product name not properly normalized", + ) + + # Test with already normalized product name + urn_normalized = "urn:cbt:1/vendor#product\\/with\\/slash:1.0.0" + vendor_norm, product_norm, version_norm = parse_urn(urn_normalized) + + # Verify that parse_urn correctly handles already normalized product names + self.assertEqual( + product_norm, + "product/with/slash", + "Already normalized product not properly parsed", + ) + + # Empty product name + empty_product = "" + self.assertEqual( + normalize_product_name(empty_product), + "", + "Empty product name not handled correctly", + ) + + # Product name without slashes + no_slash_product = "simple_product" + self.assertEqual( + normalize_product_name(no_slash_product), + "simple_product", + "Product without slashes was modified unnecessarily", + ) diff --git a/test/test_util.py b/test/test_util.py index ec609b5fdf..39627dda7b 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -8,7 +8,13 @@ from typing import DefaultDict from cve_bin_tool.cve_scanner import CVEScanner -from cve_bin_tool.util import CVEData, ProductInfo, inpath, parse_urn +from cve_bin_tool.util import ( + CVEData, + ProductInfo, + inpath, + normalize_product_name, + parse_urn, +) class TestUtil: @@ -22,6 +28,20 @@ def test_inpath(self): def test_not_inpath(self): assert not inpath("cve_bin_tool_test_for_not_in_path") + def test_normalize_product_name(self): + """Test that product name normalization works correctly for edge cases""" + # Test product name with slashes + assert normalize_product_name("foo/bar") == "foo\\/bar" + # Test product name with multiple slashes + assert normalize_product_name("foo/bar/baz") == "foo\\/bar\\/baz" + # Test product name with already escaped slashes + # The improved function preserves already escaped slashes + assert normalize_product_name("foo\\/bar") == "foo\\/bar" + # Test empty product name + assert normalize_product_name("") == "" + # Test product name without slashes + assert normalize_product_name("foobar") == "foobar" + class TestSignature: """Tests signature of critical class and functions"""