Skip to content

Migrate Xen, Curl, Istio and OSS-Fuzz importer #1946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from typing import Optional
from typing import Set
from typing import Tuple
from typing import Union

import pytz
from dateutil import parser as dateparser
Expand Down Expand Up @@ -361,6 +362,7 @@ class AdvisoryData:
weaknesses: List[int] = dataclasses.field(default_factory=list)
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
url: Optional[str] = None
original_advisory_text: Optional[str] = None

def __post_init__(self):
if self.date_published and not self.date_published.tzinfo:
Expand Down
16 changes: 14 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,29 +42,41 @@
from vulnerabilities.pipelines import pypa_importer
from vulnerabilities.pipelines import pysec_importer
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
from vulnerabilities.pipelines.v2_importers import (
elixir_security_importer as elixir_security_importer_v2,
)
from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
from vulnerabilities.pipelines.v2_importers import mozilla_importer as mozilla_importer_v2
from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
from vulnerabilities.pipelines.v2_importers import oss_fuzz as oss_fuzz_v2
from vulnerabilities.pipelines.v2_importers import postgresql_importer as postgresql_importer_v2
from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
from vulnerabilities.pipelines.v2_importers import xen_importer as xen_importer_v2
from vulnerabilities.utils import create_registry

IMPORTERS_REGISTRY = create_registry(
[
nvd_importer_v2.NVDImporterPipeline,
elixir_security_importer_v2.ElixirSecurityImporterPipeline,
github_importer_v2.GitHubAPIImporterPipeline,
npm_importer_v2.NpmImporterPipeline,
vulnrichment_importer_v2.VulnrichImporterPipeline,
apache_httpd_v2.ApacheHTTPDImporterPipeline,
pypa_importer_v2.PyPaImporterPipeline,
gitlab_importer_v2.GitLabImporterPipeline,
pysec_importer_v2.PyPIImporterPipeline,
xen_importer_v2.XenImporterPipeline,
curl_importer_v2.CurlImporterPipeline,
oss_fuzz_v2.OSSFuzzImporterPipeline,
istio_importer_v2.IstioImporterPipeline,
postgresql_importer_v2.PostgreSQLImporterPipeline,
mozilla_importer_v2.MozillaImporterPipeline,
github_osv_importer_v2.GithubOSVImporterPipeline,
nvd_importer.NVDImporterPipeline,
github_importer.GitHubAPIImporterPipeline,
gitlab_importer.GitLabImporterPipeline,
Expand Down
2 changes: 1 addition & 1 deletion vulnerabilities/importers/curl.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
... ]
... }
>>> parse_advisory_data(raw_data)
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json')
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
"""

affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/importers/osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import logging
from typing import Iterable
from typing import List
Expand Down Expand Up @@ -109,7 +110,7 @@ def parse_advisory_data(


def parse_advisory_data_v2(
raw_data: dict, supported_ecosystems, advisory_url: str
raw_data: dict, supported_ecosystems, advisory_url: str, advisory_text: str
) -> Optional[AdvisoryData]:
"""
Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and
Expand Down Expand Up @@ -173,6 +174,7 @@ def parse_advisory_data_v2(
date_published=date_published,
weaknesses=weaknesses,
url=advisory_url,
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
)


Expand Down
4 changes: 0 additions & 4 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_status
from vulnerabilities.pipelines import add_cvss31_to_CVEs
from vulnerabilities.pipelines import collect_commits
from vulnerabilities.pipelines import compute_advisory_todo
from vulnerabilities.pipelines import compute_package_risk
from vulnerabilities.pipelines import compute_package_version_rank
Expand All @@ -20,7 +19,6 @@
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
from vulnerabilities.pipelines.v2_improvers import (
computer_package_version_rank as compute_version_rank_v2,
Expand Down Expand Up @@ -58,7 +56,6 @@
enhance_with_exploitdb.ExploitDBImproverPipeline,
compute_package_risk.ComputePackageRiskPipeline,
compute_package_version_rank.ComputeVersionRankPipeline,
collect_commits.CollectFixCommitsPipeline,
add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline,
remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline,
populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline,
Expand All @@ -68,7 +65,6 @@
enhance_with_metasploit_v2.MetasploitImproverPipeline,
compute_package_risk_v2.ComputePackageRiskPipeline,
compute_version_rank_v2.ComputeVersionRankPipeline,
collect_commits_v2.CollectFixCommitsPipeline,
compute_advisory_todo.ComputeToDo,
]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.22 on 2025-07-16 08:39

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0098_alter_advisory_options_alter_advisoryalias_options_and_more"),
]

operations = [
migrations.AddField(
model_name="advisoryv2",
name="original_advisory_text",
field=models.TextField(
blank=True,
help_text="Raw advisory data as collected from the upstream datasource.",
null=True,
),
),
]
6 changes: 6 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2744,6 +2744,12 @@ class AdvisoryV2(models.Model):
blank=True, null=True, help_text="UTC Date on which the advisory was imported"
)

original_advisory_text = models.TextField(
blank=True,
null=True,
help_text="Raw advisory data as collected from the upstream datasource.",
)

affecting_packages = models.ManyToManyField(
"PackageV2",
related_name="affected_by_advisories",
Expand Down
21 changes: 14 additions & 7 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,20 @@ def collect_and_store_advisories(self):
if advisory is None:
self.log("Advisory is None, skipping")
continue
if _obj := insert_advisory_v2(
advisory=advisory,
pipeline_id=self.pipeline_id,
get_advisory_packages=self.get_advisory_packages,
logger=self.log,
):
collected_advisory_count += 1
try:
if _obj := insert_advisory_v2(
advisory=advisory,
pipeline_id=self.pipeline_id,
get_advisory_packages=self.get_advisory_packages,
logger=self.log,
):
collected_advisory_count += 1
except Exception as e:
self.log(
f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}",
level=logging.ERROR,
)
continue

self.log(f"Successfully collected {collected_advisory_count:,d} advisories")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import logging
import re
import urllib.parse
from typing import Iterable

import requests
from bs4 import BeautifulSoup
from dateutil import parser as date_parser
from packageurl import PackageURL
from univers.version_constraint import VersionConstraint
from univers.version_range import ApacheVersionRange
Expand Down Expand Up @@ -272,8 +274,11 @@ def to_advisory(self, data):
versions_data.append(version_data)

fixed_versions = []
date_published = None
for timeline_object in data.get("timeline") or []:
timeline_value = timeline_object.get("value")
if timeline_value == "public":
date_published = timeline_object.get("time")
if "release" in timeline_value:
split_timeline_value = timeline_value.split(" ")
if "never" in timeline_value:
Expand Down Expand Up @@ -307,6 +312,8 @@ def to_advisory(self, data):
weaknesses=weaknesses,
url=reference.url,
severities=severities,
original_advisory_text=json.dumps(data, indent=2, ensure_ascii=False),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need indent.

date_published=date_parser.parse(date_published) if date_published else None,
)

def to_version_ranges(self, versions_data, fixed_versions):
Expand Down
156 changes: 156 additions & 0 deletions vulnerabilities/pipelines/v2_importers/curl_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#

import json
import logging
from datetime import datetime
from datetime import timezone
from typing import Iterable

from cwe2.database import Database
from packageurl import PackageURL
from univers.version_range import GenericVersionRange
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import ReferenceV2
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
from vulnerabilities.severity_systems import SCORING_SYSTEMS
from vulnerabilities.utils import fetch_response
from vulnerabilities.utils import get_cwe_id
from vulnerabilities.utils import get_item

logger = logging.getLogger(__name__)


class CurlImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
"""
Pipeline-based importer for curl advisories from curl.se.
"""

pipeline_id = "curl_importer_v2"
spdx_license_expression = "curl"
license_url = "https://curl.se/docs/copyright.html"
repo_url = "https://github.com/curl/curl-www/"
url = "https://curl.se/docs/vuln.json"
unfurl_version_ranges = True

@classmethod
def steps(cls):
return (cls.collect_and_store_advisories,)

def fetch_data(self):
return fetch_response(self.url).json()

def advisories_count(self) -> int:
return len(self.fetch_data())

def collect_advisories(self) -> Iterable[AdvisoryData]:
for entry in self.fetch_data():
cve_id = entry.get("aliases") or []
cve_id = cve_id[0] if cve_id else None
if not cve_id or not cve_id.startswith("CVE"):
package = get_item(entry, "database_specific", "package")
logger.error(f"Invalid CVE ID: {cve_id} in package {package}")
continue
yield parse_curl_advisory(entry)


def parse_curl_advisory(raw_data) -> AdvisoryData:
"""
Parse advisory data from raw JSON data and return an AdvisoryData object.

Args:
raw_data (dict): Raw JSON data containing advisory information.

Returns:
AdvisoryData: Parsed advisory data as an AdvisoryData object.
"""
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

ranges = get_item(affected, "ranges")[0] if len(get_item(affected, "ranges")) > 0 else []
events = get_item(ranges, "events")[1] if len(get_item(ranges, "events")) > 1 else {}
version_type = get_item(ranges, "type") if get_item(ranges, "type") else ""
fixed_version = events.get("fixed")
if version_type == "SEMVER" and fixed_version:
fixed_version = SemverVersion(fixed_version)

purl = PackageURL(type="generic", namespace="curl.se", name="curl")
versions = affected.get("versions") or []
affected_version_range = GenericVersionRange.from_versions(versions)

affected_package = AffectedPackage(
package=purl,
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)

database_specific = raw_data.get("database_specific") or {}

references = []
www_url = database_specific.get("www")
issue_url = database_specific.get("issue")
json_url = database_specific.get("URL")

if www_url:
references.append(ReferenceV2(url=www_url))
if issue_url:
references.append(ReferenceV2(url=issue_url))
severity = VulnerabilitySeverity(
system=SCORING_SYSTEMS["cvssv3.1"], value=database_specific.get("severity", ""), url=www_url
)

published = raw_data.get("published", "")
date_published = (
datetime.strptime(published, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc)
if published
else None
)

weaknesses = get_cwe_from_curl_advisory(raw_data)

aliases = raw_data.get("aliases", [])
advisory_id = raw_data.get("id") or ""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of returning an advisory data with no advisory_id, we should log error and continue.


if advisory_id in aliases:
aliases.remove(advisory_id)

return AdvisoryData(
advisory_id=advisory_id,
aliases=aliases,
summary=raw_data.get("summary") or "",
affected_packages=[affected_package],
references_v2=references,
date_published=date_published,
weaknesses=weaknesses,
url=json_url,
severities=[severity],
original_advisory_text=json.dumps(raw_data, indent=2, ensure_ascii=False),
)


def get_cwe_from_curl_advisory(raw_data):
"""
Extracts CWE IDs from the given raw_data and returns a list of CWE IDs.

>>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": "CWE-333"}}})
[333]
>>> get_cwe_from_curl_advisory({"database_specific": {"CWE": {"id": ""}}})
[]
"""
weaknesses = []
db = Database()
cwe_string = get_item(raw_data, "database_specific", "CWE", "id") or ""

if cwe_string:
try:
cwe_id = get_cwe_id(cwe_string)
db.get(cwe_id) # validate CWE exists
weaknesses.append(cwe_id)
except Exception:
logger.error(f"Invalid CWE id: {cwe_string}")
return weaknesses
Loading