From f5eedc1cd93fd852543ae08f291483cc47a6f4a8 Mon Sep 17 00:00:00 2001 From: Steve Springett Date: Mon, 24 Feb 2025 22:14:27 -0600 Subject: [PATCH] Initial commit Signed-off-by: Steve Springett --- insights/openapi.json | 178 ++++++++++++++++++++++++++++++++++++++++++ insights/readme.md | 112 ++++++++++++++++++++++++++ 2 files changed, 290 insertions(+) create mode 100644 insights/openapi.json create mode 100644 insights/readme.md diff --git a/insights/openapi.json b/insights/openapi.json new file mode 100644 index 0000000..f4282a8 --- /dev/null +++ b/insights/openapi.json @@ -0,0 +1,178 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Insights API", + "version": "1.0.0" + }, + "servers": [ + { + "url": "https://api.example.com", + "description": "Production server" + } + ], + "paths": { + "/insights/static": { + "post": { + "summary": "Static Insights Query (CEL)", + "description": "Use a CEL (Common Expression Language) expression to query internal data and return a CycloneDX fragment. Server-side validation ensures the expression is valid CEL. If the expression fails validation, a 400 error is returned. Implementations may also limit or omit data if the query requests restricted information.", + "operationId": "postStaticInsights", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "expression" + ], + "properties": { + "expression": { + "type": "string", + "description": "A valid CEL expression. The server will return 400 if invalid. If the expression requests restricted data, a 403 may be returned or the data may be omitted." + } + } + } + } + } + }, + "responses": { + "200": { + "description": "A CycloneDX fragment based on the static (CEL) query. Sensitive data may be omitted if the client is not authorized to view it.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CycloneDXBom" + } + } + } + }, + "400": { + "description": "Invalid CEL expression or request error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string" + } + }, + "example": { + "error": "Invalid CEL expression syntax" + } + } + } + } + }, + "403": { + "description": "Forbidden - the requested data is restricted for this client", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "description": "A message indicating that this query is not allowed or contains restricted data." + } + }, + "example": { + "error": "You are not permitted to access this information." + } + } + } + } + } + } + } + }, + "/insights/dynamic": { + "post": { + "summary": "Dynamic Insights Query (AI)", + "description": "Accept a natural-language prompt to be used by an AI system for querying or generating insights, returning a CycloneDX fragment. Implementations may also limit or omit data if the query requests restricted information.", + "operationId": "postDynamicInsights", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "required": [ + "prompt" + ], + "properties": { + "prompt": { + "type": "string", + "description": "The user-provided natural-language prompt or question." + }, + "systemPrompt": { + "type": "string", + "description": "An optional higher-level instruction (e.g., a system persona)." + }, + "modelSettings": { + "type": "object", + "description": "Optional model tuning parameters or flags. Implementation specific.", + "additionalProperties": true + } + } + } + } + } + }, + "responses": { + "200": { + "description": "A CycloneDX fragment derived from the AI interpretation of the prompt. Sensitive data may be omitted if the client is not authorized to view it.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CycloneDXBom" + } + } + } + }, + "400": { + "description": "Invalid request data", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string" + } + } + } + } + } + }, + "403": { + "description": "Forbidden - the requested data is restricted for this client", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "error": { + "type": "string", + "description": "A message indicating that this query is not allowed or contains restricted data." + } + }, + "example": { + "error": "You are not permitted to access this information." + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "CycloneDXBom": { + "$ref": "https://raw.githubusercontent.com/CycloneDX/specification/master/schema/bom-1.6.schema.json" + } + } + } +} diff --git a/insights/readme.md b/insights/readme.md new file mode 100644 index 0000000..4c1e0c2 --- /dev/null +++ b/insights/readme.md @@ -0,0 +1,112 @@ +# Insights API + +Much of the focus on Software Transparency from the U.S. Government (and others) centers around the concept of “full transparency.” In practice, this often means **consumers** need to ingest, process, and analyze SBOMs or VEX documents just to answer **simple questions**, such as: + +- *Do any of my licensed products from Vendor A use Apache Log4J?* +- *Are any of my licensed products from Vendor A vulnerable to log4shell, and is there any action I need to take?* + +However, “full transparency” can be cumbersome for consumers, requiring them to parse potentially large and complex SBOM or VEX data. + +## Overview + +**Insights** provides a more streamlined approach: **“limited transparency.”** It enables consumers to: + +1. Ask specific, outcome-driven queries in an **expression language** (CEL) or via **natural-language prompts** (AI/LLM). +2. Receive just the **essential information** needed—without having to manage BOM conversions themselves. + +Under the hood, Insights uses an **object model derived from CycloneDX** to generate responses. However, the **actual BOM consumption and processing** on the implementation side is **not** format-specific—implementers can support **CycloneDX, SPDX, Syft, or any other** current or future BOM format. Insights simply **normalizes** data into a CycloneDX-based response format for consistency and interoperability. + +## Endpoints + +Insights provides two primary endpoints to query the system: + +### 1. **Static Insights** (`POST /insights/static`) + +- **Method**: `POST` +- **Request Body**: + - `expression` (string) – A [CEL](https://github.com/google/cel-spec) (Common Expression Language) expression describing the data you want to retrieve from the underlying model. +- **Response**: Returns a **CycloneDX** fragment (JSON) containing the filtered results. + +Use this when you have a **precise, automatable condition** (e.g., `"component.name == 'Apache Struts'"`). + +### 2. **Dynamic Insights** (`POST /insights/dynamic`) + +- **Method**: `POST` +- **Request Body**: + - `prompt` (string, **required**) – The main user-provided natural-language query. + - `systemPrompt` (string, *optional*) – A higher-level or “system” instruction guiding the AI’s persona or context (e.g., “You are a security expert.”). + - `modelSettings` (object, *optional*) – Additional parameters/flags for fine-tuning the AI model (e.g., `temperature`, `maxTokens`). +- **Response**: Returns a **CycloneDX** fragment (JSON) based on the AI’s interpretation of your prompt. + +Use this when you want to **ask in natural language** without necessarily writing a formal query expression. + +## Why “Limited Transparency”? + +Instead of requiring **full ingestion and parsing** of entire SBOMs or VEX documents by the consumer, Insights: + +- Lets you **ask specifically** for what you need. +- Returns **only** the relevant data. +- Offloads the complexity of BOM handling and format conversions to the **server** side, **regardless of the original BOM format**. + +## Example Queries & Use Cases + +Below are **typical questions** you might ask using either the static **(CEL)** or dynamic **(AI)** approach: + +1. **List all third-party dependencies for a given product** + - Static example (`expression`): `"productId == 'XYZ' && component.type == 'third-party'"` + - Dynamic example (`prompt`): “Which third-party dependencies does product XYZ use?” + +2. **Show only the open-source dependencies for a given product** + - Static example: `"productId == 'XYZ' && component.licenseType == 'Open Source'"` + - Dynamic example: “List all open-source dependencies in product XYZ.” + +3. **List all vulnerabilities in a given product** + - Static example: `"productId == 'XYZ' && component.vulnerabilities.size() > 0"` + - Dynamic example: “What vulnerabilities exist in product XYZ?” + +4. **Does the product use log4j and, if so, is it vulnerable to log4shell? If it is, what actions are needed to minimize risk?** + - Static example: `"productId == 'XYZ' && component.name == 'log4j' && component.vulnerabilities.contains('log4shell')"` + - Dynamic example: “Does product XYZ use log4j? If so, is it impacted by log4shell and how can I mitigate the risk?” + +5. **Which cryptographic algorithms does a product use?** + - Static example: `"productId == 'XYZ' && component.cryptography != null"` + - Dynamic example: “Identify all cryptographic algorithms in product XYZ.” + +6. **Provide an SSDF, BSIMM, or OWASP SAMM attestation on how a product was developed** + - Static example: `"productId == 'XYZ' && product.processAttestation.type == 'OWASP SAMM'"` + - Dynamic example: “Generate an attestation for product XYZ based on OWASP SAMM (or SSDF/BSIMM).” + +Each of these examples demonstrates how you can **quickly retrieve insights** from product data that’s anchored in CycloneDX—either using structured CEL queries or direct natural-language questions. + +## Architecture Highlights + +- **CycloneDX-Derived Model** + While the **response** is always a **CycloneDX** structure (to maintain consistency), the underlying engine can **ingest data from any BOM format**—CycloneDX, SPDX, Syft, etc.—and normalize it for output. + +- **Expression-Language Queries (CEL)** + A concise, powerful way to specify filtering and logic—ideal for CI pipelines or automated workflows. + +- **AI/Natural Language Queries** + Enables a more conversational style, letting you “speak” to the system in everyday language while still retrieving detailed SBOM or VEX insights. + +- **Implementation Flexibility** + - `systemPrompt`: Optional top-level context (e.g., “You are a security guru”). + - `modelSettings`: An open-ended object for advanced model parameters (e.g., `temperature`, `maxTokens`). + +## Error Handling + +- **400 Bad Request**: Returned if the CEL expression is invalid or if the request body is malformed. +- **Other 4xx/5xx**: Could be used for authentication issues, server errors, etc., depending on your deployment environment. + +## Future Enhancements + +- **Session/Conversation Support**: To enable multi-turn conversations with context retained across requests. +- **Streaming Responses**: For long or incremental AI responses, possibly via WebSockets or Server-Sent Events. +- **Extended Model Parameters**: For more sophisticated control over AI behavior. + +## Conclusion + +**Insights** streamlines software transparency by letting consumers query or ask questions **on-demand**—without the burden of parsing raw SBOM or VEX formats. **Regardless of which BOM standard** you use—CycloneDX, SPDX, Syft, or another—Insights can unify the data and produce a consistent, CycloneDX-based response. By leveraging **CEL** and **AI** queries, it accommodates both automated and human-friendly workflows alike. + +For more details on the endpoints and schema definitions, refer to the [OpenAPI specification](./openapi.json) in this repository. If you have further questions, please open an issue or reach out to the maintainers. +