diff --git a/docs.json b/docs.json index 10cd1ce..f973461 100644 --- a/docs.json +++ b/docs.json @@ -89,6 +89,14 @@ { "href": "https://docs.phala.network/dstack/design-documents", "label": "Design Documents" + }, + { + "href": "https://docs.phala.network/dstack-cloud/overview", + "label": "Cloud Overview" + }, + { + "href": "https://docs.phala.network/dstack-cloud/get-started", + "label": "Cloud Quick Start" } ] }, @@ -774,6 +782,59 @@ ], "tab": "dstack" }, + { + "icon": "server", + "pages": [ + "/dstack-cloud/overview", + "/dstack-cloud/get-started", + { + "group": "Concepts", + "pages": [ + "/dstack-cloud/kms-and-key-delivery", + "/dstack-cloud/nitro-enclave", + "/dstack-cloud/attestation-integration", + "/dstack-cloud/governance", + "/dstack-cloud/security-model" + ] + }, + { + "group": "How-to Guides", + "pages": [ + "/dstack-cloud/run-on-gcp", + "/dstack-cloud/run-on-nitro", + "/dstack-cloud/run-kms-on-gcp", + "/dstack-cloud/deploy-onchain-kms", + "/dstack-cloud/register-enclave-measurement", + "/dstack-cloud/manage-governance" + ] + }, + { + "group": "Operations", + "pages": [ + "/dstack-cloud/monitoring-alerting", + "/dstack-cloud/upgrade", + "/dstack-cloud/runbook" + ] + }, + { + "group": "Reference", + "pages": [ + "/dstack-cloud/api-reference", + "/dstack-cloud/configuration", + "/dstack-cloud/glossary" + ] + }, + { + "group": "Appendix", + "pages": [ + "/dstack-cloud/code-walkthrough", + "/dstack-cloud/e2e-test-report", + "/dstack-cloud/release-notes" + ] + } + ], + "tab": "dstack Cloud" + }, { "icon": "network", "pages": [ @@ -2042,4 +2103,4 @@ "thumbnails": { "background": "/images/phala-docs-og.png" } -} +} \ No newline at end of file diff --git a/dstack-cloud/api-reference.mdx b/dstack-cloud/api-reference.mdx new file mode 100644 index 0000000..3132c15 --- /dev/null +++ b/dstack-cloud/api-reference.mdx @@ -0,0 +1,195 @@ +--- +title: API Reference +description: API reference for Guest Agent (Unix Socket), KMS Onboard API, and management endpoints. +--- + +# API Reference + +dstack-cloud exposes three API surfaces: the Guest Agent's Unix socket (for apps inside the CVM), the KMS RPC (for key delivery over RA-TLS), and the Onboard HTTP endpoints (for first-time KMS bootstrap). + +## Guest Agent API (Unix Socket) + +The Guest Agent runs inside each dstack CVM and provides local APIs via a Unix socket at `/var/run/dstack.sock`. + +### Get TDX Quote + +Obtain a TDX attestation quote from the hardware. + +**GCP (TDX) only.** + +```bash +curl --unix-socket /var/run/dstack.sock \ + "http://localhost/GetQuote?report_data=0x1234deadbeef" +``` + +**Parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `report_data` | string (hex) | Optional challenge value (32 bytes). Used to prevent replay attacks. | + +**Response:** + +```json +{ + "quote": "", + "rtmr0": "...", + "rtmr1": "...", + "rtmr2": "...", + "rtmr3": "..." +} +``` + +### Get NSM Attestation + +Obtain a Nitro Attestation Document from the NSM. + +**AWS Nitro only.** + +```bash +curl --unix-socket /var/run/dstack.sock \ + "http://localhost/GetAttestation?user_data=0x1234deadbeef" +``` + +**Parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `user_data` | string (hex) | Optional challenge value. | + +**Response:** + +```json +{ + "document": "" +} +``` + +### Get Attestation (HTTP) + +External attestation endpoint, accessible via HTTPS. + +```bash +curl https://your-app.example.com/attestation +``` + +**Response:** Full attestation data (Quote or Document) for external verification. + +--- + +## KMS API + +The KMS exposes an RPC interface for key management. All communication uses RA-TLS — the KMS verifies the workload's attestation before processing any request. + +### getKey(name) + +Request a key from the KMS. + +```bash +# Called from within the application via dstack SDK +# Not directly callable via curl (requires RA-TLS handshake) +``` + +**Parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `name` | string | The name of the requested key. Keys are scoped per application. | + +**Returns:** The requested key as raw bytes. + +**Error Codes:** + +| Error | Description | +|-------|-------------| +| `UNAUTHORIZED` | Workload attestation verification failed. The workload is not running in a verified TEE. | +| `MEASUREMENT_NOT_FOUND` | The workload's measurement is not registered on-chain. | +| `KEY_NOT_FOUND` | No key exists for the requested name. | +| `INTERNAL_ERROR` | KMS encountered an internal error. | + +### How Applications Use getKey + +Applications retrieve keys through the dstack SDK, which handles attestation and the RA-TLS connection automatically: + +**Python:** +```python +from dstack import DstackClient + +client = DstackClient() +key = client.get_key("my-api-key") +``` + +**TypeScript:** +```typescript +import { DstackClient } from '@dstack/sdk'; + +const client = new DstackClient(); +const key = await client.getKey("my-api-key"); +``` + +**Rust:** +```rust +use dstack_sdk::DstackClient; + +let client = DstackClient::new()?; +let key = client.get_key("my-api-key")?; +``` + +--- + +## KMS Onboard API (HTTP, Bootstrap Only) + +These endpoints are only available during the first-time bootstrap (Onboard mode). After bootstrap is completed, KMS switches to RA-TLS-only mode. + +### Onboard.Bootstrap + +Generate the KMS key pair and obtain attestation information. + +```bash +curl -s "http://:12001/prpc/Onboard.Bootstrap?json" \ + -d '{"domain": ""}' +``` + +**Parameters:** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `domain` | string | The domain name or IP address where KMS is accessible. Used in the attestation data. | + +**Response:** +```json +{ + "publicKey": "...", + "attestation": "...", + "measurement": "..." +} +``` + +### /finish + +Complete the bootstrap process. KMS restarts and switches to Normal mode (HTTPS + RA-TLS). + +```bash +curl "http://:12001/finish" +``` + +**Response:** HTTP 200 on success. + +--- + +## Docker Compose Volume for Guest Agent + +To access the Guest Agent from within a Docker container, mount the socket: + +```yaml +services: + my-app: + image: my-app:latest + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock +``` + +## Next Steps + +- **[Configuration Reference](configuration)** — Configuration file formats +- **[Glossary](glossary)** — Term definitions diff --git a/dstack-cloud/attestation-integration.mdx b/dstack-cloud/attestation-integration.mdx new file mode 100644 index 0000000..16f2f16 --- /dev/null +++ b/dstack-cloud/attestation-integration.mdx @@ -0,0 +1,167 @@ +--- +title: Attestation Integration +description: How dstack-cloud integrates remote attestation mechanisms on GCP (TDX Quote) and AWS Nitro (NSM Attestation Document). +--- + +# Attestation Integration + +Remote attestation is how a TEE proves it's running genuine, unmodified code on real hardware. dstack-cloud integrates two different attestation mechanisms — Intel TDX on GCP and AWS NSM on Nitro — to give KMS confidence before dispatching keys. + +> For the fundamentals of dstack Remote Attestation, the RA-TLS protocol design, and ZT-TLS / ACME certificate management, refer to the official Phala documentation: +> +> - [TEE Attestation Guide](https://docs.phala.com/dstack/trust-center-technical) +> - [TEE-Controlled Domain Certificates](https://docs.phala.com/dstack/design-documents/tee-controlled-domain-certificates) + +## What Is Remote Attestation + +Remote Attestation is the process by which a TEE proves its identity and integrity to an external party. The proof contains: + +1. **Hardware signature** — Confirms the workload is running in genuine TEE hardware (not emulated) +2. **Workload measurements** — Cryptographic hash of the code and configuration running inside the TEE +3. **User data** — An optional challenge value to prevent replay attacks + +The verifier (e.g., KMS) checks the hardware signature and measurements to confirm the workload has not been tampered with. + +## Attestation on GCP (Intel TDX + vTPM) + +On GCP, dstack uses a **dual-attestation approach** combining Intel TDX and Google-managed vTPM to build a comprehensive trust chain. + +### Architecture Overview + +GCP Confidential VMs provide two independent attestation mechanisms: + +| Source | Type | Coverage | Trust Root | +|--------|------|----------|------------| +| **Intel TDX Module** | Hardware-based | Firmware + RTMR registers | Intel TDX hardware | +| **Google vTPM** | Software-based | Boot loader + kernel integrity | Google's certificate authority | + +dstack binds these two mechanisms together to provide defense-in-depth attestation. + +### Attestation Creation Flow + +When a dstack CVM starts on GCP, the Guest Agent performs the following steps: + +1. **Collect report_data** (64 bytes), optionally bound to an RA-TLS public key +2. **Generate TDX Quote** via `tdx-attest::get_quote(report_data)`: + - Includes all measurement values (MRTD, RTMR0-3) + - Signed by the Intel TDX module +3. **Read TDX event log** via `cc-eventlog::tdx::read_event_log()` +4. **Compute TPM qualifying data** as `sha256(tdx_quote)` — this binds the two attestations together +5. **Generate vTPM Quote** via `tpm-attest::TpmContext::create_quote(qualifying_data, policy)`: + - Signed by Google's vTPM attestation key + - Includes PCR values representing boot chain integrity +6. **Package attestation** as `DstackGcpTdxQuote { tdx_quote, tpm_quote }` + +### Trust Chain Binding + +The key innovation is the **qualifying data binding**: + +``` +tpm_quote.qualifying_data = sha256(tdx_quote) +``` + +This creates a cryptographic link between the two independent attestation mechanisms: +- **TDX Quote** proves the workload runs in genuine Intel TDX hardware with specific RTMR values +- **vTPM Quote** proves the boot chain integrity via PCR values +- The SHA-256 binding ensures both attestations come from the same CVM instance + +### Verification Flow + +The verifier (e.g., KMS) performs dual verification: + +**TDX Verification:** +1. Retrieve TDX collateral and verify quote signature using Intel's TCB info +2. Verify TCB status (debug mode disabled, mr_signer_seam valid) +3. Replay runtime events and compare RTMR3 with quote's RTMR3 +4. Verify `report_data` matches expected value + +**vTPM Verification:** +1. Retrieve vTPM collateral and verify quote signature using Google's certificate chain +2. Replay runtime events and compare PCR values +3. Verify `qualifying_data == sha256(tdx_quote)` — confirms binding + +Only when **both** verifications succeed is the attestation considered valid + +### Measurement Registers (GCP) + +**TDX RTMR Registers:** + +| Register | Contents | +|----------|----------| +| **MRTD** | Measured firmware (virtual firmware measurement) | +| **RTMR0** | OS kernel and initramfs | +| **RTMR1** | OS kernel command-line parameters | +| **RTMR2** | OS runtime and init scripts | +| **RTMR3** | Application `compose-hash` + KMS binding + instance ID | + +**vTPM PCR Registers:** + +The vTPM provides an independent measurement of the boot chain. While dstack primarily relies on TDX RTMRs for application attestation, the vTPM PCRs offer an additional layer of verification for the boot process. + +> **Note:** For detailed PCR assignments and event log structure, refer to the [GCP Confidential VM Attestation Documentation](https://cloud.google.com/confidential-computing/confidential-vm/docs/attestation). + +## Attestation on AWS Nitro (NSM) + +When a Nitro Enclave launches from its EIF (Enclave Image File): + +1. **NSM auto-generation** — The Nitro Secure Module automatically generates an Attestation Document when the Enclave launches +2. **Document contents** — The Attestation Document includes: + - PCR0-2 values (Platform Configuration Registers) — 3 measurements generated during EIF build + - NSM signature + - Enclave ID and launch timestamp +3. **OS_IMAGE_HASH** — The 3 PCR values (PCR0, PCR1, PCR2) are combined into a single `OS_IMAGE_HASH`, which serves as the unique workload identifier +4. **Verification** — KMS verifies the Attestation Document: + - Validates the NSM signature using AWS root certificates + - Checks that the `OS_IMAGE_HASH` matches an authorized value registered on-chain + - Confirms the Enclave is running on genuine Nitro hardware + +### Measurement Registers (Nitro) + +| Register | Contents | +|----------|----------| +| **PCR0** | OS image (kernel + initramfs) | +| **PCR1** | Application code and runtime | +| **PCR2** | Additional configuration and dstack-util | + +These 3 PCR values are combined into a single `OS_IMAGE_HASH`, which is the unique identifier used for on-chain authorization. Any change to the Dockerfile, application code, or `dstack-util` version produces a different `OS_IMAGE_HASH`. + +## GCP vs. Nitro: Attestation Comparison + +Both platforms generate hardware-signed attestations that KMS verifies before dispatching keys. The main differences are in the attestation format (TDX Quote vs. NSM Document), verification method (Intel DCAP QVL vs. AWS certificate chain), and how measurements are stored (RTMR registers vs. PCR values). + +For platform-specific details, see the sections above. For a broader architectural comparison, see [AWS Nitro Enclave Integration](nitro-enclave). + +## Measurements and On-chain Authorization + +Measurements are the foundation of on-chain key access control: + +- Before KMS dispatches keys to a workload, the workload's measurements must be **registered on-chain** +- This ensures only audited, approved code versions can obtain keys +- If you update your application code, the measurements change, and you must re-register them through governance + +![Measurement Registration Flow](/images/dstack-cloud/measurement-registration-flow.png) + +For the registration process, see [Register Workload Measurements](/dstack-cloud/register-enclave-measurement). + +## Verifying Attestation + +You can verify that a dstack CVM is running in genuine TEE by requesting its attestation: + +```bash +# Request attestation from a running CVM +curl https://your-app.example.com/attestation +``` + +The response includes the full attestation data that can be independently verified using: + +- **dstack-verifier** — HTTP service or CLI tool provided by dstack-cloud +- **dcap-qvl** — Open-source Intel DCAP Quote Verification Library +- **dstack SDK** — `replayRtmrs()` function for local verification + +For more details, see the [dstack-cloud Verification Guide](https://github.com/Phala-Network/dstack-cloud/blob/master/docs/verification.md). + +## Next Steps + +- **[Security Model](security-model)** — Trust boundaries and security guarantees +- **[Register Workload Measurements](/dstack-cloud/register-enclave-measurement)** — How to register measurements on-chain +- **[Runbook](/dstack-cloud/runbook)** — Troubleshooting attestation failures diff --git a/dstack-cloud/code-walkthrough.mdx b/dstack-cloud/code-walkthrough.mdx new file mode 100644 index 0000000..e576989 --- /dev/null +++ b/dstack-cloud/code-walkthrough.mdx @@ -0,0 +1,133 @@ +--- +title: Code Walkthrough and KT Materials +description: Code walkthrough and knowledge transfer materials for the dstack-cloud project. +--- + +# Code Walkthrough and KT Materials + +This page is an internal knowledge-transfer resource. It maps the codebase structure, traces the key request path from application to KMS, and highlights the files you'll need to understand when contributing to dstack-cloud. + +> **Note:** This document is an internal deliverable for new contributors and team members. + +## Guide for New Contributors + +If you're new to dstack-cloud, here's the recommended reading order: + +1. **Understand the architecture** — Read [overview](overview) first to get the big picture +2. **Set up the development environment** — Follow the README in the repository root +3. **Start with a simple workload** — Deploy a basic nginx container on GCP using the Quick Start +4. **Explore the KMS** — Deploy KMS and observe the bootstrap flow +5. **Read the attestation code** — Understand how measurements are generated and verified +6. **Review the contracts** — Understand the governance model and on-chain authorization + +## Repository Structure + +The codebase is organized into these key areas: + +``` +dstack-cloud/ +├── cli/ # dstack-cloud CLI tool +├── kms/ # dstack-kms service +├── packages/ +│ ├── attestation/ # Attestation modules (TDX, NSM, TPM) +│ ├── guest-agent/ # Guest Agent (runs inside CVM) +│ ├── gateway/ # TLS termination and RA-TLS gateway +│ └── vmm/ # Virtual Machine Monitor +├── contracts/ # Smart contracts (DstackKms, DstackApp) +├── scripts/ # Build and deployment scripts +└── docs/ # Documentation +``` + +## Core Request Paths + +Understanding how a key request flows through the system is the best way to learn the codebase. Here's what happens when an application asks KMS for a key: + +### Key Request Flow + +**GCP (via Guest Agent):** +``` +Application (in CVM) + → dstack SDK (via /var/run/dstack.sock) + → Guest Agent + → RA-TLS connection + → dstack-kms (in separate TEE) + → Attestation verification + → On-chain measurement check + → Key derivation and dispatch + → RA-TLS response + → Guest Agent + → Application receives key +``` + +**AWS Nitro (via dstack-util):** +``` +dstack-util (in Enclave) + → NSM attestation document obtained + → VSOCK → VSOCK Proxy → dstack-kms (in separate TEE) + → Attestation verification (NSM + OS_IMAGE_HASH) + → On-chain measurement check + → Key derivation and dispatch + → Key returned to dstack-util + → Application receives key (user decides usage) +``` + +![Key Request Flow](/images/dstack-cloud/key-request-flow.png) + +### CVM Deployment Flow + +When you run `dstack-cloud deploy`, the CLI parses your configuration and orchestrates the creation of a TEE environment. The flow differs by platform: + +**GCP (dstack CVM):** +``` +dstack-cloud deploy + → Parse docker-compose.yaml + → Build CVM image (dstack-os + containers) + → Generate measurements (RTMR values) + → Create Confidential VM with TDX + → Guest Agent starts inside CVM + → Attestation obtained from hardware +``` + +**AWS Nitro (Enclave):** +``` +dstack-cloud deploy + → Build Docker image from Dockerfile + → Run nitro-cli build-enclave → generates EIF + → 3 PCRs (PCR0-2) produced at build time + → Combine 3 PCRs into 1 OS_IMAGE_HASH + → Register OS_IMAGE_HASH on-chain (via governance) + → Launch Enclave on EC2 instance + → dstack-util handles attestation and key retrieval +``` + +![CVM Deployment Flow](/images/dstack-cloud/cvm-deployment-flow.png) + +## Attestation Module + +The attestation module abstracts platform-specific hardware attestation behind a common interface. Each platform has its own module: + +| Platform | Module | Input | Output | +|----------|--------|-------|--------| +| GCP (TDX) | `tdx-attest` | TDX hardware | TDX Quote | +| AWS Nitro | `nsm-attest` | NSM device | Attestation Document | +| GCP (TPM) | `tpm-attest` | TPM device | TPM Quote | + +## Key Files + +These are the files you'll spend the most time in when contributing: + +| File | Purpose | +|------|---------| +| `kms/src/main.rs` | KMS service entry point, RPC handlers, bootstrap logic | +| `packages/guest-agent/src/main.rs` | Guest Agent entry point, local API server | +| `packages/attestation/src/lib.rs` | Platform-agnostic attestation interface | +| `cli/src/main.rs` | CLI entry point, deploy/status/logs commands | +| `contracts/contracts/DstackKms.sol` | KMS policy contract | +| `contracts/contracts/DstackApp.sol` | Application contract | + +## Resources + +- [dstack-cloud GitHub](https://github.com/Phala-Network/dstack-cloud) +- [dstack framework GitHub](https://github.com/Dstack-TEE/dstack) +- [dstack Official Documentation](https://docs.phala.com/dstack/overview) +- [dstack Whitepaper](https://docs.phala.com/dstack/design-documents/whitepaper) diff --git a/dstack-cloud/configuration.mdx b/dstack-cloud/configuration.mdx new file mode 100644 index 0000000..797d0a2 --- /dev/null +++ b/dstack-cloud/configuration.mdx @@ -0,0 +1,199 @@ +--- +title: Configuration Reference +description: Configuration reference for dstack-cloud global config, app.json, docker-compose.yaml, and CLI commands. +--- + +# Configuration Reference + +dstack-cloud uses three layers of configuration: a global CLI config, a per-project `app.json`, and a standard `docker-compose.yaml` for your application. This page documents every field and available option. + +## dstack-cloud Global Configuration + +**Location:** `~/.config/dstack-cloud/config.json` + +```json +{ + "image_search_paths": ["/path/to/your/images"], + "gcp": { + "project": "your-gcp-project-id", + "zone": "us-central1-a", + "bucket": "gs://your-bucket-name" + }, + "nitro": { + "region": "us-east-1" + } +} +``` + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `image_search_paths` | array of string | Local paths where dstack-cloud looks for OS images | +| `gcp.project` | string | GCP project ID | +| `gcp.zone` | string | GCP zone for VM deployment | +| `gcp.bucket` | string | GCS bucket for storing CVM images | +| `nitro.region` | string | AWS region for Nitro Enclave deployment | + +## app.json (Project Configuration) + +**Location:** `/app.json` + +Generated by `dstack-cloud new` and edited manually for advanced configuration. + +```json +{ + "os_image": "dstack-cloud-0.6.0", + "key_provider": "local", + "instance_name": "my-app", + "platform": "nitro" +} +``` + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `os_image` | string | The dstack OS image version to use | +| `key_provider` | string | Key provider mode: `local` (default), `tpm`, or `kms` | +| `instance_name` | string | Human-readable name for the instance | +| `platform` | string | Target platform: `gcp` or `nitro` | + +### key_provider Values + +| Value | Description | Use Case | +|-------|-------------|----------| +| `local` | Keys generated locally within the CVM | Development, testing, single-node | +| `tpm` | Use the platform TPM as root of trust | KMS instances on GCP | +| `kms` | Use an external dstack-kms for key delivery | Production workloads that need attested key delivery | + +## docker-compose.yaml + +The standard Docker Compose file defines your application. dstack-cloud reads this file and packages all containers into the CVM. + +### dstack-specific Extensions + +dstack-cloud reads the standard `docker-compose.yaml` format. No special extensions are required. + +**Important notes:** + +- All images must be pullable by the build system (use public registries or pre-pull images) +- Use SHA256 digests for pinned images (recommended for reproducible measurements): + ```yaml + services: + web: + image: nginx:latest@sha256:abc123... + ``` +- The `runtime: nvidia` field is supported for GPU workloads on compatible instances +- `volumes` that reference `/var/run/dstack.sock` are automatically mounted for Guest Agent access + +### Example: Web Application + +```yaml +services: + web: + image: nginx:latest + ports: + - "80:80" +``` + +### Example: AI Inference with GPU + +```yaml +services: + vllm: + image: vllm/vllm-openai:latest + runtime: nvidia + command: --model Qwen/Qwen2.5-7B-Instruct + ports: + - "8000:8000" + volumes: + - /var/run/dstack.sock:/var/run/dstack.sock +``` + +### Example: KMS Instance + +```yaml +services: + dstack-kms: + image: phalanetwork/dstack-kms:latest + environment: + - KMS_HTTPS_PORT=12001 + - ETH_RPC_URL=https://sepolia.base.org + - KMS_CONTRACT_ADDR=0x... + - APP_CONTRACT_ADDR=0x... + ports: + - "12001:12001" +``` + +## .env (Environment Variables) + +**Location:** `/.env` + +Standard dotenv format. Used for environment variables that are injected into the CVM. + +```bash +API_KEY=your-api-key-here +DATABASE_URL=postgres://user:pass@host:5432/db +``` + +### KMS Environment Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `KMS_HTTPS_PORT` | Port for KMS HTTPS/RA-TLS service | `12001` | +| `ETH_RPC_URL` | Ethereum RPC endpoint URL | `https://sepolia.base.org` | +| `KMS_CONTRACT_ADDR` | DstackKms contract address | `0x1234...abcd` | +| `APP_CONTRACT_ADDR` | DstackApp contract address | `0x5678...efgh` | +| `USE_LIGHT_CLIENT` | Use helios light client instead of direct RPC | `true` or `false` | + +### Security of Environment Variables + +- Environment variables are encrypted before leaving your machine +- They are decrypted only inside the CVM/TEE +- The cloud provider and host OS cannot read them in plaintext + +## prelaunch.sh (Pre-launch Script) + +**Location:** `/prelaunch.sh` + +An optional shell script that runs before the CVM/Enclave launches. Common uses: + +- Start the VSOCK proxy (Nitro) +- Inject environment variables +- Generate dynamic configuration + +```bash +#!/bin/bash +# prelaunch.sh — runs before CVM launch + +# Start VSOCK proxy (Nitro only) +socat VSOCK-LISTEN:8000,reuseaddr,fork TCP:localhost:8000 & + +# Inject environment variables +export KMS_HTTPS_PORT=12001 +export ETH_RPC_URL=https://sepolia.base.org + +echo "Prelaunch complete" +``` + +## dstack-cloud CLI Commands + +| Command | Description | +|---------|-------------| +| `dstack-cloud new ` | Create a new project directory | +| `dstack-cloud deploy` | Build and deploy the CVM/Enclave | +| `dstack-cloud status` | Show deployment status and measurements | +| `dstack-cloud logs [--follow]` | View container logs | +| `dstack-cloud stop` | Stop the running CVM/Enclave | +| `dstack-cloud start` | Start a stopped CVM/Enclave | +| `dstack-cloud remove` | Remove the deployment and clean up resources | +| `dstack-cloud fw allow ` | Allow inbound traffic on a port | +| `dstack-cloud fw deny ` | Deny inbound traffic on a port | +| `dstack-cloud pull --os-image ` | Download an OS image | +| `dstack-cloud config-edit` | Edit the global configuration | + +## Next Steps + +- **[API Reference](api-reference)** — Guest Agent and KMS APIs +- **[Glossary](glossary)** — Term definitions diff --git a/dstack-cloud/deploy-onchain-kms.mdx b/dstack-cloud/deploy-onchain-kms.mdx new file mode 100644 index 0000000..a9feab0 --- /dev/null +++ b/dstack-cloud/deploy-onchain-kms.mdx @@ -0,0 +1,323 @@ +--- +title: Deploy On-chain KMS Smart Contracts +description: Deploy DstackKms smart contracts and configure governance with Multisig and Timelock. +--- + +# Deploy On-chain KMS Smart Contracts + +Deploy the `DstackKms` smart contract to enforce that only authorized workloads can receive keys. This page covers the full contract deployment workflow, including the recommended Safe + Timelock governance setup for production. + +## Overview + +| Component | Required | Description | +|-----------|----------|-------------| +| **DstackKms** | Yes | Stores authorized workload measurements, admin roles, and KMS configuration | +| **ERC1967Proxy** | Yes | Proxy contract for upgradeable DstackKms (UUPS pattern) | +| **TimelockController** | Optional | Enforces a delay on governance actions | +| **Safe (Multisig)** | Optional | Multi-signature wallet for governance | + +> **Note:** Safe and Timelock are **optional security enhancements**, not part of dstack. They are recommended for production but not required for development. + +## Governance Models + +### Model A: Direct Admin (Simplest) + +- Admin is a single EOA (externally owned account) +- No multisig, no timelock +- Governance actions execute immediately +- Suitable for development and testing + +### Model B: Timelock Only + +- Admin is a TimelockController contract +- Governance actions require a delay before execution +- Anyone can execute after delay (or restricted to specific executors) +- Suitable for simple production setups + +### Model C: Safe + Timelock (Recommended for Production) + +- Admin is a Safe multisig wallet +- Timelock enforces a delay +- Requires multi-party approval + delay period +- Maximum security and transparency + +--- + +## Prerequisites + +- [Foundry](https://book.getfoundry.sh/getting-started/installation) installed (`forge`, `cast`) +- A wallet with funds for deployment gas + - Testnet: Use a faucet (e.g., [Base Sepolia faucet](https://www.alchemy.com/faucets/base-sepolia)) + - Mainnet: Sufficient ETH for contract deployment +- RPC endpoint for the target network + +--- + +## Step 1: Set Up the Project + +```bash +# Clone the dstack repository (contains KMS contracts) +git clone https://github.com/Dstack-TEE/dstack.git +cd dstack/kms/auth-eth/contracts + +# Install dependencies (OpenZeppelin) +forge install OpenZeppelin/openzeppelin-contracts@v5.6.1 --no-git +forge install OpenZeppelin/openzeppelin-contracts-upgradeable@v5.6.1 --no-git + +# Build contracts +forge build +``` + +--- + +## Step 2: Configure Environment + +Create a `.env` file: + +```bash +# .env +RPC_URL=https://sepolia.base.org +PRIVATE_KEY=0x... +``` + +> **Security:** Never commit your private key. Add `.env` to `.gitignore`. + +--- + +## Step 3: Deploy DstackKms (Basic, No Timelock) + +For development/testing, you can deploy DstackKms with direct EOA admin: + +```bash +# Deploy DstackKms implementation +KMS_IMPL=$(forge create src/DstackKms.sol:DstackKms \ + --broadcast \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY | grep "Deployed to:" | tail -1 | cut -d' ' -f3) + +echo "DstackKms implementation: $KMS_IMPL" + +# Encode initializer calldata +DEPLOYER=$(cast wallet address --private-key $PRIVATE_KEY) +INIT_DATA=$(cast calldata "initialize(address,address)" $DEPLOYER $ZERO_ADDRESS) + +# Deploy ERC1967Proxy +KMS_PROXY=$(forge create lib/openzeppelin-contracts/contracts/proxy/ERC1967/ERC1967Proxy.sol:ERC1967Proxy \ + --broadcast \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY \ + --constructor-args $KMS_IMPL $INIT_DATA | grep "Deployed to:" | tail -1 | cut -d' ' -f3) + +echo "DstackKms proxy: $KMS_PROXY" +``` + +> **Why ERC1967Proxy?** DstackKms uses UUPS upgradeable pattern. You must deploy a proxy to have a working upgradeable instance. The proxy is the actual application address. + +--- + +## Step 4: Deploy with Timelock (Recommended for Production) + +### Timelock Configuration + +The timelock delay depends on your deployment environment: + +| Parameter | Testnet | Mainnet | +|-----------|---------|---------| +| **Timelock delay** | 1-4 hours | 24-72 hours | +| **Safe signers** | 2-3 addresses | 5-7 addresses (from multiple organizations) | +| **Safe threshold** | 2/3 | ≥ 2/3 | +| **Executor role** | Open or EOA | Safe only (strict control) | +| **Admin role** | EOA or 0x0 | 0x0 (self-managed by timelock) | + +### 4.1 Prepare Timelock Configuration + +```bash +# Set environment variables +export MIN_DELAY=86400 # 1 day in seconds (production: 2-3 days) +export PROPOSER=0x... # Address that can schedule operations (your Safe or EOA) +export EXECUTOR=0x... # Address that can execute operations (Safe, or 0x0 for open execution) +export ADMIN=0x... # Admin address (can grant/revoke roles) +``` + +### 4.2 Deploy All Contracts + +```bash +# Get deployer address +DEPLOYER=$(cast wallet address --private-key $PRIVATE_KEY) + +# 1. Deploy DstackKms implementation +KMS_IMPL=$(forge create src/DstackKms.sol:DstackKms \ + --broadcast \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY | grep "Deployed to:" | tail -1 | cut -d' ' -f3) + +# 2. Encode initializer (owner = deployer initially) +INIT_DATA=$(cast calldata "initialize(address,address)" $DEPLOYER $ZERO_ADDRESS) + +# 3. Deploy ERC1967Proxy +KMS_PROXY=$(forge create lib/openzeppelin-contracts/contracts/proxy/ERC1967/ERC1967Proxy.sol:ERC1967Proxy \ + --broadcast \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY \ + --constructor-args $KMS_IMPL $INIT_DATA | grep "Deployed to:" | tail -1 | cut -d' ' -f3) + +# 4. Deploy TimelockController +TIMELOCK=$(forge create lib/openzeppelin-contracts/contracts/governance/TimelockController.sol:TimelockController \ + --broadcast \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY \ + --constructor-args $MIN_DELAY "[$PROPOSER]" "[$EXECUTOR]" $ADMIN | grep "Deployed to:" | tail -1 | cut -d' ' -f3) + +# 5. Transfer ownership to Timelock +cast send $KMS_PROXY "transferOwnership(address)" $TIMELOCK \ + --rpc-url $RPC_URL \ + --private-key $PRIVATE_KEY + +# 6. Verify ownership +OWNER=$(cast call $KMS_PROXY "owner()(address)" --rpc-url $RPC_URL) + +echo "=== Deployment Result ===" +echo "DstackKms implementation: $KMS_IMPL" +echo "DstackKms proxy: $KMS_PROXY" +echo "TimelockController: $TIMELOCK" +echo "DstackKms owner: $OWNER" +``` + +### 4.3 Understanding TimelockController Roles + +| Role | Description | Who Should Have It | +|------|-------------|-------------------| +| **Proposer** | Can schedule operations | Safe multisig or trusted EOA | +| **Executor** | Can execute operations after delay | Safe, or `address(0)` for open execution | +| **Admin** | Can grant/revoke roles | Should be `address(0)` after setup (self-managed by timelock) | + +--- + +## Step 5: Configure Safe (Optional but Recommended) + +For production, use a Safe multisig as the proposer/executor: + +### 5.1 Create a Safe + +1. Go to [Safe web app](https://app.safe.global) +2. Connect your wallet +3. Create a new Safe on your target network +4. Add signers (3-7 addresses recommended) +5. Set threshold (≥ 2/3 of signers) + +### 5.2 Use Safe Address in Deployment + +When deploying the TimelockController, use your Safe address: + +```bash +export PROPOSER= +export EXECUTOR= +export ADMIN=0x0000000000000000000000000000000000000000 # Let timelock manage itself +``` + +### 5.3 Governance Flow with Safe + Timelock + +1. **Draft transaction** — Use Safe web interface to create a transaction +2. **Collect signatures** — Required signers approve +3. **Schedule in timelock** — Safe calls `timelock.schedule()` +4. **Wait for delay** — Wait the configured delay period +5. **Execute** — Anyone (or only executor) calls `timelock.execute()` + +--- + +## Step 6: Verify Deployment + +Verify on block explorer: + +```bash +# Verify implementation +forge verify-contract $KMS_IMPL src/DstackKms.sol:DstackKms \ + --chain base-sepolia \ + --verifier etherscan + +# Verify proxy +forge verify-contract $KMS_PROXY lib/openzeppelin-contracts/contracts/proxy/ERC1967/ERC1967Proxy.sol:ERC1967Proxy \ + --chain base-sepolia \ + --verifier etherscan \ + --constructor-args $(cast abi-encode "constructor(address,bytes)" $KMS_IMPL $INIT_DATA) +``` + +Check on block explorer: +- `DstackKms` owner is set to the TimelockController address +- TimelockController has correct proposer/executor roles + +--- + +## How to Execute Governance Actions (With Timelock) + +Once deployed, all `onlyOwner` operations must go through the timelock: + +### Schedule an Operation + +```bash +# Example: Add an authorized measurement +cast send $TIMELOCK "schedule(address,uint256,bytes,bytes32,bytes32,uint256)" \ + $KMS_PROXY \ + 0 \ + $(cast calldata "addKmsAggregatedMr(bytes32)" $YOUR_MEASUREMENT) \ + 0x0000000000000000000000000000000000000000000000000000000000000000 \ + $(cast keccak "unique-operation-id") \ + $MIN_DELAY \ + --rpc-url $RPC_URL \ + --private-key $PROPOSER_KEY +``` + +### Execute After Delay + +```bash +# Wait for MIN_DELAY to pass, then: +cast send $TIMELOCK "execute(address,uint256,bytes,bytes32,bytes32)" \ + $KMS_PROXY \ + 0 \ + $(cast calldata "addKmsAggregatedMr(bytes32)" $YOUR_MEASUREMENT) \ + 0x0000000000000000000000000000000000000000000000000000000000000000 \ + $(cast keccak "unique-operation-id") \ + --rpc-url $RPC_URL \ + --private-key $EXECUTOR_KEY +``` + +--- + +## All Governance-Protected Methods + +Once ownership is transferred to timelock, these methods require timelock governance: + +| Method | Purpose | +|--------|---------| +| `setKmsInfo` | Update KMS public key and attestation info | +| `setKmsQuote` | Update KMS quote | +| `setKmsEventlog` | Update KMS event log | +| `setGatewayAppId` | Set gateway application ID | +| `setAppImplementation` | Update app implementation address | +| `addKmsAggregatedMr` | Authorize a KMS measurement | +| `removeKmsAggregatedMr` | Revoke a KMS measurement | +| `addKmsDevice` | Authorize a KMS device | +| `removeKmsDevice` | Revoke a KMS device | +| `addOsImageHash` | Authorize an OS image hash | +| `removeOsImageHash` | Revoke an OS image hash | + +--- + +## Common Issues + +| Issue | Solution | +|-------|----------| +| "Insufficient funds" | Get testnet ETH from faucet, or ensure mainnet wallet has enough ETH | +| "Ownable: caller is not the owner" | Ownership already transferred to timelock. Use timelock.schedule/execute | +| "Timelock: operation is not ready" | Wait for the delay period to pass before executing | +| "Timelock: operation already scheduled" | Use a different salt (unique operation ID) | +| Proxy verification fails | Use `cast abi-encode` to construct the constructor arguments | + +--- + +## Next Steps + +- **[Register KMS Measurements](run-kms-on-gcp)** — Register KMS measurements before bootstrap +- **[Register Workload Measurements](register-enclave-measurement)** — Authorize workloads to receive keys +- **[Manage Governance](manage-governance)** — How to create proposals and execute governance actions +- **[Concept: Governance](/dstack-cloud/governance)** — Understand the governance model diff --git a/dstack-cloud/e2e-test-report.mdx b/dstack-cloud/e2e-test-report.mdx new file mode 100644 index 0000000..eac7c58 --- /dev/null +++ b/dstack-cloud/e2e-test-report.mdx @@ -0,0 +1,55 @@ +--- +title: E2E Test Report +description: End-to-end test report covering test coverage and pre-production validation results. +--- + +import { Callout } from 'nextra/components' + +# E2E Test Report + + +**This document is a work in progress.** Test results will be populated as they become available. + + +## Test Coverage + +Tests are being conducted across the following scenarios. Results will be added as each test completes. + +| Category | Status | Notes | +|----------|--------|-------| +| GCP CVM deployment | In progress | | +| GCP KMS deployment + bootstrap | In progress | | +| Nitro Enclave deployment | In progress | | +| RA-TLS connection (GCP KMS → GCP workload) | In progress | | +| RA-TLS connection (GCP KMS → Nitro workload) | In progress | | +| getKey() key retrieval | In progress | | +| On-chain measurement registration | In progress | | +| On-chain measurement revocation | In progress | | +| Governance flow (propose → approve → timelock → execute) | In progress | | +| KMS bootstrap (Onboard → Finish) | In progress | | +| Multi-node key replication | Planned | | + +## Test Environment + +| Component | Version | +|-----------|---------| +| dstack-cloud | 0.6.0-test | +| dstack OS | dstack-cloud-0.6.0 | +| GCP zone | us-central1-a | +| AWS region | us-east-1 | +| Blockchain | Base Sepolia (testnet) | + +## Known Limitations + +| Limitation | Impact | Mitigation | +|-----------|--------|-----------| +| KMS root key is single-point | If the KMS TEE is compromised, all keys are at risk | MPC distribution planned for future version | +| Nitro Enclaves are stateless | Workloads lose state on restart | Use GCP for persistent workloads; KMS runs on GCP only | +| Public RPC reliability | Testnet RPCs may return stale state | Use multiple RPC providers | + +## Accepted Risks + +| Risk | Acceptance Criteria | +|------|-------------------| +| TEE hardware side-channels | Accept residual risk. Monitor Intel/AWS advisories. | +| Smart contract vulnerabilities | Mitigate with audits. Re-audit after each contract change. | diff --git a/dstack-cloud/get-started.mdx b/dstack-cloud/get-started.mdx new file mode 100644 index 0000000..8c9ba31 --- /dev/null +++ b/dstack-cloud/get-started.mdx @@ -0,0 +1,242 @@ +--- +title: "Quick Start: Deploy Your First dstack App on GCP" +description: Quick start tutorial to deploy your first dstack CVM on GCP in 15-25 minutes. +--- + +# Quick Start: Deploy Your First dstack App on GCP + +You'll have a Docker application running inside a hardware-encrypted Confidential VM on GCP within 25 minutes. This tutorial covers the full path: install the CLI, configure your project, deploy, and verify that attestation proves your workload runs in genuine TEE hardware. + +**Estimated time:** 15–25 minutes (first run). + +**What you will do:** + +1. Install `dstack-cloud` CLI +2. Configure global GCP/KMS settings +3. Create a project and define workload +4. Deploy to GCP TDX CVM +5. Verify workload access and runtime status + +--- + +## Prerequisites + +Before you begin: + +- GCP project with Intel TDX quota in target zone (for example `us-central1-a`) +- `gcloud` authenticated + ```bash + gcloud auth login + gcloud config set project YOUR_PROJECT_ID + ``` +- **Linux host** (required — `dstack-cloud deploy` uses FAT32 disk images, which don't work on macOS) +- Docker installed +- `gsutil`, `jq`, `mtools` (for `mcopy`), `dosfstools` (for `mkfs.fat`) — these are needed by the deploy process to build a shared disk image + +--- + +## Step 1: Install dstack-cloud CLI + +```bash +curl -fsSL -o ~/.local/bin/dstack-cloud \ + https://raw.githubusercontent.com/Phala-Network/meta-dstack-cloud/main/scripts/bin/dstack-cloud +chmod +x ~/.local/bin/dstack-cloud +dstack-cloud --help +``` + +--- + +## Step 2: Configure global settings + +```bash +dstack-cloud config-edit +``` + +Use JSON config (`~/.config/dstack-cloud/config.json`): + +```json +{ + "services": { + "kms_urls": ["https://kms.tdxlab.dstack.org:12001"], + "gateway_urls": ["https://gateway.tdxlab.dstack.org:12002"], + "pccs_url": "" + }, + "image_search_paths": ["/path/to/images"], + "gcp": { + "project": "YOUR_PROJECT_ID", + "zone": "us-central1-a", + "bucket": "gs://YOUR_BUCKET" + } +} +``` + +If bucket does not exist: + +```bash +gcloud storage buckets create gs://YOUR_BUCKET --project YOUR_PROJECT_ID --location us-central1 +``` + +### Optional: configure external KMS + +If you already deployed your own KMS, replace `services.kms_urls`: + +```json +"services": { + "kms_urls": ["https://YOUR_KMS_IP_OR_DOMAIN:12001"] +} +``` + +--- + +## Step 3: Pull OS image + +```bash +dstack-cloud pull https://github.com/Phala-Network/meta-dstack-cloud/releases/download/v0.6.0-test/dstack-cloud-0.6.0.tar.gz +dstack-cloud pull https://github.com/Phala-Network/meta-dstack-cloud/releases/download/v0.6.0-test/dstack-cloud-0.6.0-uki.tar.gz +``` + +Verify: + +```bash +ls -lh /path/to/images/dstack-cloud-0.6.0/disk.raw +``` + +--- + +## Step 4: Create project + +```bash +dstack-cloud new my-first-app --os-image dstack-cloud-0.6.0 --instance-name dstack-first-app +cd my-first-app +``` + +Project files include: + +``` +my-first-app/ +├── app.json # Application metadata +├── docker-compose.yaml # Your container definition +├── .env # Environment variables (encrypted) +└── prelaunch.sh # Optional pre-launch script (e.g., setup, data download) +``` + +--- + +## Step 5: Configure app + +Edit `app.json` and set: + +- `gcp_config.project = "YOUR_PROJECT_ID"` +- `gcp_config.zone = "us-central1-a"` +- `gcp_config.bucket = "gs://YOUR_BUCKET"` + +Default key mode is `kms`. If you want no external KMS for a basic quick test, switch to: + +- `"key_provider": "tpm"` +- `"gateway_enabled": false` +- remove `.env` file and remove `env_file` field from `app.json` + +--- + +## Step 6: Define workload + +Edit `docker-compose.yaml`: + +```yaml +services: + web: + image: nginx:latest + ports: + - "8080:80" +``` + +--- + +## Step 7: Deploy + +```bash +dstack-cloud deploy --delete +``` + +This will create a TDX CVM and start your workload. + +--- + +## Step 8: Open firewall + +```bash +dstack-cloud fw allow 8080 +``` + +--- + +## Step 9: Verify + +Check that the CVM is running and your workload is accessible: + +```bash +dstack-cloud status +# Expected: shows "RUNNING" with measurements (RTMR values) +dstack-cloud logs --follow + +# Get attestation from your app +curl https://app-abc123.your-gateway-domain.com/attestation + +# Verify using dstack-verifier +dstack-verifier verify +``` + +The attestation proves: +- The workload runs in genuine Intel TDX hardware +- The exact code and measurements match expectations +- The boot chain integrity is verified via TDX + vTPM + +For detailed verification, see [Attestation Integration](/dstack-cloud/attestation-integration). + +**Test workload:** + +```bash +curl http://:8080 +``` + +If gateway is enabled, use the URL shown by `dstack-cloud status`. + +## Understanding What Happened + +When you deployed your application: + +1. **Confidential VM Created** — A GCP VM with Intel TDX was provisioned +2. **dstack OS Booted** — A minimal, attested guest OS started inside the TEE +3. **Automatic Disk Encryption** — All disk I/O is encrypted with keys managed by the Guest Agent +4. **TEE Attestation** — The Guest Agent provides attestation proof via the TDX + vTPM mechanism +5. **TLS Certificate** — Gateway automatically provisions ACME certificates for your domain + +### Key Delivery via KMS + +dstack uses an external **Key Management Service (dstack-kms)** to deliver keys to your confidential workloads. The KMS runs in its own TEE and only dispatches keys to workloads that pass attestation verification. + +--- + +## Managing Your Deployment + +Your application now runs in a hardware-protected environment where even the cloud provider cannot access the memory or data. + +--- + +## Troubleshooting + +| Issue | Fix | +|---|---| +| `Boot image ... not found` | verify image path and `disk.raw` existence | +| VM UEFI boot loop | use valid UKI boot image (`-uki.tar.gz`) | +| `.env found but KMS is not enabled` | remove `.env` and remove `env_file` in `app.json` | +| Port not reachable | ensure firewall rule exists and container has started | +| missing `gsutil` / `mcopy` / `mkfs.fat` | install required dependencies | + +--- + +## Next steps + +- Detailed guide: [Run a Workload on GCP with Self-hosted KMS](/dstack-cloud/run-on-gcp) +- External KMS flow: [Run a dstack-kms CVM on GCP](/dstack-cloud/run-kms-on-gcp) +- Concepts: [Attestation Integration](/dstack-cloud/attestation-integration) diff --git a/dstack-cloud/glossary.mdx b/dstack-cloud/glossary.mdx new file mode 100644 index 0000000..d6cf3b0 --- /dev/null +++ b/dstack-cloud/glossary.mdx @@ -0,0 +1,80 @@ +--- +title: Glossary +description: Glossary of core terminology used in dstack-cloud documentation. +--- + +# Glossary + +Core terminology used in this documentation. + +--- + +## Infrastructure + +| Term | Definition | +|------|-----------| +| **TEE (Trusted Execution Environment)** | A hardware-isolated compute environment that protects code and data from the rest of the system. Memory is encrypted, and the hardware proves the environment's integrity via attestation. | +| **CVM (Confidential Virtual Machine)** | A virtual machine running in a TEE. In dstack-cloud, a CVM runs dstack-os with your Docker containers inside. | +| **Intel TDX (Trust Domain Extensions)** | Intel's TEE technology that provides memory encryption and isolation at the VM level. Supported on GCP Confidential VMs and certain bare-metal servers. | +| **SGX (Software Guard Extensions)** | Intel's earlier TEE technology. Provides memory encryption at the enclave (process) level. | +| **AWS Nitro Enclaves** | AWS's TEE technology. Provides process-level isolation within an EC2 instance. Memory is encrypted and inaccessible to the host. | +| **NSM (Nitro Secure Module)** | The hardware component in AWS Nitro that generates attestation documents and manages Enclave lifecycle. | +| **VSOCK** | A socket interface for communication between a Nitro Enclave and its host EC2 instance. The Enclave cannot directly access the network — all traffic goes through VSOCK. | +| **EIF (Enclave Image File)** | The image format used by AWS Nitro Enclaves. Built from a Docker image, it contains the OS and application code that runs inside the Enclave. | +| **TPM (Trusted Platform Module)** | A hardware security module that provides secure key storage and measurement. Used on GCP Confidential VMs as the root of trust. | +| **PCCS (Provisioning Certificate Caching Service)** | Intel's service for caching attestation certificate chains, used in DCAP (Data Center Attestation Primitives) workflows. | + +--- + +## Security Mechanisms + +| Term | Definition | +|------|-----------| +| **Remote Attestation** | The process by which a TEE proves its identity and integrity to an external party. The TEE generates a cryptographic proof (signed by hardware) containing measurements of the running code. | +| **RA-TLS (Remote Attestation TLS)** | An extension of TLS where both parties verify each other's TEE attestation during the handshake. Used for secure communication between workloads and KMS. | +| **ZT-TLS (Zero Trust TLS)** | A TLS variant that requires TEE attestation for every connection, regardless of network location. | +| **Measurement** | A cryptographic hash of the code and configuration running inside a TEE. Any change to the code produces a different measurement. | +| **PCR (Platform Configuration Register)** | Measurement registers used by AWS Nitro Enclaves (PCR0-3). | +| **RTMR (Runtime Measurement Register)** | Measurement registers used by Intel TDX (RTMR0-3). | +| **OS_IMAGE_HASH** | A composite hash derived from all PCR values, used as the unique identifier for a Nitro Enclave image on-chain. | +| **compose-hash** | The SHA256 hash of the `docker-compose.yaml` file, stored in RTMR3 on GCP TDX deployments. | +| **Quote (TDX Quote)** | A hardware-signed attestation proof generated by Intel TDX. Contains measurement values (RTMR0-3) and an Intel hardware signature. | +| **Attestation Document** | The attestation proof generated by AWS NSM. Contains PCR values and an NSM signature. | + +--- + +## dstack Components + +| Term | Definition | +|------|-----------| +| **dstack** | The open-source confidential computing framework. Provides a CLI, KMS, attestation, and SDK libraries (Python, TypeScript, Rust, Go) for building confidential applications. [GitHub](https://github.com/Dstack-TEE/dstack) | +| **dstack-sdk** | Client libraries (Python, TypeScript, Rust, Go) that applications use to interact with dstack features such as attestation, key retrieval, and storage encryption. Part of the dstack framework. | +| **dstack-cloud** | An extension of dstack that enables deployment on GCP Confidential VMs and AWS Nitro Enclaves. Provides the `dstack-cloud` CLI. [GitHub](https://github.com/Phala-Network/dstack-cloud) | +| **dstack-os** | The minimal operating system that runs inside each dstack CVM. Built from a reproducible Docker image. | +| **Guest Agent** | A process that runs inside each dstack CVM alongside your application containers. Handles attestation, key retrieval from KMS, and storage encryption. Exposes a local API at `/var/run/dstack.sock`. | +| **KMS (Key Management Service)** | A standalone service that runs in its own TEE. Verifies workload attestation and dispatches keys to authorized workloads. Also known as DeRoT (Decentralized Root-of-Trust). | +| **Gateway** | An edge component that handles TLS termination, automatic ACME certificate provisioning, and RA-TLS support for incoming connections. | +| **VMM (Virtual Machine Monitor)** | The component that parses Docker Compose files and boots CVMs from reproducible OS images. | + +--- + +## On-chain Governance + +| Term | Definition | +|------|-----------| +| **DstackKms** | The on-chain KMS policy contract. Stores authorized workload measurements and admin roles. | +| **DstackApp** | The on-chain application entry contract. Holds a reference to DstackKms. | +| **Multisig (Safe)** | A multi-signature wallet that controls governance actions. Requires multiple parties to approve any transaction. | +| **Timelock** | A delay mechanism that enforces a mandatory waiting period between approval and execution of governance transactions. | +| **GovernanceSafe** | The specific Safe wallet instance used for dstack governance. | + +--- + +## Cryptography + +| Term | Definition | +|------|-----------| +| **MPC (Multi-Party Computation)** | A cryptographic technique where multiple parties jointly compute a result without any single party learning the inputs. Planned for future KMS root key generation. | +| **KDF (Key Derivation Function)** | A function that derives application-specific keys from a root key. Each workload gets a unique key derived this way. | +| **SealingKey** | A key used to encrypt data at rest (disk storage) inside the CVM. Derived from the KMS-delivered key. | +| **RootKey** | The top-level key in the KMS hierarchy. All application keys are derived from the RootKey. | diff --git a/dstack-cloud/governance.mdx b/dstack-cloud/governance.mdx new file mode 100644 index 0000000..2e1e2bb --- /dev/null +++ b/dstack-cloud/governance.mdx @@ -0,0 +1,142 @@ +--- +title: On-chain Governance Model +description: The on-chain governance model using Multisig + Timelock to control KMS authorization and prevent covert upgrades. +--- + +# On-chain Governance Model + +On-chain governance ensures that key policy changes — like adding or revoking workload measurements — cannot happen silently. Every authorization change goes through a Multisig + Timelock process and is recorded on-chain for anyone to audit. + +## Core Smart Contracts + +### DstackKms + +The KMS policy contract. It stores: + +- **Authorized measurements** — The list of workload measurements (RTMR / OS_IMAGE_HASH) that are allowed to receive keys +- **Configuration parameters** — KMS-related settings controlled by governance +- **Admin roles** — Addresses authorized to perform governance operations + +The off-chain dstack-kms service queries DstackKms to determine whether a workload is authorized before dispatching keys. + +### DstackApp + +The application-level entry contract. It: + +- Holds a reference to DstackKms +- Enforces application-specific checks before delegating to the KMS +- Acts as the on-chain identity for a specific deployment + +### GovernanceSafe + +**Optional.** A multisig wallet (typically [Safe](https://safe.global)) that you can introduce to enhance governance security: + +- Owns DstackKms and DstackApp +- Is the only entity allowed to execute governance operations (registering measurements, changing admin, upgrading contracts) +- Requires multiple signers to approve any transaction + +> **Note:** GovernanceSafe is not part of dstack or dstack-cloud. It is an optional governance mechanism you can add to prevent unilateral control over key policies. + +### Timelock + +**Optional.** A delay module attached to the multisig that: + +- Enforces a mandatory waiting period between approval and execution +- Maintains a queue of pending governance actions +- Gives the community time to review and respond to proposed changes + +> **Note:** Like GovernanceSafe, Timelock is an optional enhancement. You can use it with a multisig to add a delay before governance actions take effect. + +## Governance Workflow + +### Basic Workflow (without Multisig/Timelock) + +At minimum, governance operations (registering measurements, updating configuration) are performed by the admin address directly: + +1. Admin submits a transaction to DstackKms or DstackApp +2. Transaction executes immediately +3. Changes take effect right away + +This is suitable for development and testing, but lacks the security guarantees needed for production. + +### Enhanced Workflow (with Multisig + Timelock) + +For production deployments, we recommend introducing a multisig wallet (e.g., Safe) and a timelock. The workflow becomes: + +![Governance Workflow](/images/dstack-cloud/governance-workflow.png) + +1. **Draft** — A governance action is proposed (e.g., register new measurement) +2. **Submit to Multisig** — The action is submitted to the multisig wallet +3. **Approve** — Required number of signers approve the transaction +4. **Queue in Timelock** — The approved action is queued and must wait for the delay period +5. **Wait** — The delay period passes (e.g., 48 hours), allowing time for review +6. **Execute** — After the delay, anyone can execute the action + +This ensures: +- No single party can unilaterally modify key policies +- All changes are visible and reviewable before taking effect +- Stakeholders have time to respond to suspicious proposals + +## Role of Multisig + Timelock + +| Mechanism | Purpose | +|-----------|---------| +| **Multisig** | Prevents unilateral actions. No single person can modify key policies — requires multi-party approval. | +| **Timelock** | Enforces a delay, giving all stakeholders time to detect and respond to suspicious changes. | + +### Recommended Configuration + +| Parameter | Production | Non-production | +|-----------|------------|----------------| +| **Number of signers** | 5-7 (from at least 2 organizations) | 2-3 | +| **Signature threshold** | ≥ 2/3 (e.g., 4-of-6) | ≥ 2/3 | +| **Timelock delay** | 24-72 hours | 1-4 hours | +| **Transaction expiration** | 7-14 days | 3-7 days | + +## Typical Governance Scenarios + +### Register New Measurements (New Version Deploy) + +When you update your application code or dstack OS version, the measurements change. You must register the new measurements before KMS will dispatch keys: + +1. Build the updated CVM image +2. Extract new measurements (RTMR3 / OS_IMAGE_HASH) +3. Draft a governance transaction to add the new measurements to DstackKms +4. Submit to multisig for approval +5. Wait for timelock +6. Execute — KMS now authorizes workloads with the new measurements +7. Deploy the updated CVM + +### Revoke a Compromised Measurement (Emergency) + +If a measurement is found to be vulnerable or unauthorized: + +1. Draft a governance transaction to remove the measurement from DstackKms +2. Submit to multisig for expedited approval +3. Wait for timelock (cannot be bypassed — this is by design) +4. Execute — KMS immediately stops dispatching keys to workloads with that measurement + +> **Note:** Even in emergencies, the timelock cannot be skipped. This is intentional — it ensures all changes are visible and reviewable. Plan your security incident response accordingly. + +### Upgrade Smart Contracts + +If the contracts need to be upgraded (e.g., bug fix, new feature): + +1. Deploy the new contract implementation +2. Draft a governance transaction to update the proxy's implementation address +3. Submit to multisig for approval +4. Wait for timelock +5. Execute — the proxy now points to the new implementation + +## Security Considerations + +- **Use hardware wallets:** All multisig signers should use hardware wallets (e.g., Ledger, Trezor) to protect their signing keys. +- **Set appropriate thresholds:** A threshold of ≥ 2/3 ensures that compromising a minority of signers is insufficient to push through malicious changes. +- **Regular health checks:** Periodically verify that all signers are still accessible and the governance configuration is as expected. +- **Monitor governance activity:** Set up alerts for governance transactions (proposals, approvals, executions) to detect suspicious activity early. + +## Next Steps + +- **[Deploy On-chain KMS Smart Contracts](/dstack-cloud/deploy-onchain-kms)** — Set up contracts on testnet or mainnet +- **[Manage Governance](/dstack-cloud/manage-governance)** — How to create proposals and execute governance actions +- **[Security Model](security-model)** — Trust boundaries and security guarantees diff --git a/dstack-cloud/kms-and-key-delivery.mdx b/dstack-cloud/kms-and-key-delivery.mdx new file mode 100644 index 0000000..a18bf5d --- /dev/null +++ b/dstack-cloud/kms-and-key-delivery.mdx @@ -0,0 +1,134 @@ +--- +title: KMS and Key Delivery +description: Understand KMS operating modes, key delivery flow, and GCP vs Nitro differences in dstack-cloud deployments. +--- + +# KMS and Key Delivery + +KMS is the component that decides whether a workload is trustworthy enough to receive keys. This page explains the two operating modes (local vs. external KMS), the key delivery flow on each platform, and how on-chain smart contracts add an auditable authorization layer. + +> For the core design of dstack's Key Management System — including the Decentralized Root-of-Trust (DeRoT) protocol, MPC key generation, key derivation, and rotation — refer to the official Phala documentation: +> +> - [Key Management Protocol](https://docs.phala.com/dstack/design-documents/key-management-protocol) +> - [Decentralized Root-of-Trust](https://docs.phala.com/dstack/design-documents/decentralized-root-of-trust) + +## Operating Modes + +dstack supports two KMS operating modes: + +### Local Key Provider + +Keys are generated and managed locally within each dstack CVM. No separate KMS service is needed. + +- **How it works:** The Guest Agent generates keys using hardware-rooted entropy (TPM on GCP, NSM on Nitro). Keys are sealed to the TEE and never leave the CVM. +- **Use when:** Development, testing, or single-node deployments where key sharing between instances is not required. +- **Limitation:** Keys cannot be shared across multiple CVMs. If you need the same key in different workloads, use KMS mode. + +### KMS Mode + +A standalone **dstack-kms** service runs in its own TEE (a dedicated dstack CVM). It verifies workload identity before dispatching keys. + +- **How it works:** When a dstack application CVM starts, it obtains a hardware attestation and sends it to KMS. KMS verifies the attestation and checks whether the workload is authorized. If verification passes, KMS dispatches the requested key. +- **Use when:** Production deployments, multi-node setups, or when multiple CVM instances need access to the same keys. +- **Requirement:** A dstack-kms instance must be deployed and running before application's CVM can request keys. + +### On-chain Smart Contracts (Security Enhancement) + +KMS mode can optionally integrate with on-chain smart contracts (`DstackKms`, `DstackApp`) to provide an additional layer of security: + +- On-chain contracts control which application CVM are authorized to receive keys (via registered measurements) +- All authorization changes are visible on-chain — anyone can audit which measurements are approved +- Changes follow a Multisig + Timelock process, preventing covert key policy modifications +- This is a **security enhancement** to KMS mode, not a separate operating mode + +For details on governance, see [On-chain Governance Model](governance). + +## Key Delivery Flow (KMS Mode) + +The following describes the high-level key delivery flow: + +### GCP (dstack CVM with Guest Agent) + +1. **CVM Startup** — A dstack CVM boots and its Guest Agent obtains a TDX Quote from hardware. + +2. **Secure Channel** — The CVM establishes a secure connection with KMS using RA-TLS (Remote Attestation TLS). Both sides verify each other's attestation during the TLS handshake. + +3. **Attestation Verification** — KMS verifies the TDX Quote: + - Confirms the CVM is running in genuine TEE hardware + - Extracts the workload measurement (RTMR values) + - Checks whether the measurement matches an authorized value + +4. **On-chain Check** *(if on-chain contracts are configured)* — KMS queries the blockchain to verify that the workload's measurement is registered and authorized. + +5. **Key Dispatch** — If all checks pass, KMS derives the requested key and sends it to the CVM over the encrypted channel. + +6. **Key Usage** — The Guest Agent manages the key lifecycle. Your application retrieves the key via the dstack SDK (Python, TypeScript, Rust, or Go) by connecting to the Guest Agent at `/var/run/dstack.sock`. The Guest Agent can also automatically handle disk encryption. + +### AWS Nitro (Enclave with dstack-util) + +1. **Enclave Startup** — A Nitro Enclave launches from the EIF. `dstack-util` inside the Enclave obtains an NSM Attestation Document. + +2. **Secure Channel** — `dstack-util` establishes a connection with KMS through the VSOCK proxy on the host. The connection uses TLS (RA-TLS if available). + +3. **Attestation Verification** — KMS verifies the NSM Attestation Document: + - Confirms the Enclave is running on genuine Nitro hardware + - Extracts the PCR values (measurements) + - Checks whether the `OS_IMAGE_HASH` matches an authorized value + +4. **On-chain Check** *(if on-chain contracts are configured)* — KMS queries the blockchain to verify that the measurement is registered and authorized. + +5. **Key Dispatch** — If all checks pass, KMS derives the requested key and sends it back through the encrypted channel. + +6. **Key Usage** — `dstack-util` makes the key available to your application. **You decide how to use the key** — common patterns include encrypting model weights in memory, decrypting configuration secrets, or encrypting a RAM-disk. Unlike GCP, there is no automatic disk encryption on Nitro. + +> **Key difference:** On GCP, the Guest Agent manages the entire key lifecycle including automatic disk encryption. On Nitro, `dstack-util` only retrieves the key — your application is responsible for using it. + +## GCP vs. Nitro: Key Delivery Differences + +The key delivery flow is similar on both platforms, but the implementation details differ: + +- **Attestation format:** TDX Quote (GCP) vs. NSM Attestation Document (Nitro) +- **Communication with KMS:** Direct network on GCP vs. VSOCK proxy on Nitro +- **Key usage:** Guest Agent manages the full key lifecycle on GCP (including automatic disk encryption), while on Nitro `dstack-util` only retrieves the key — your application decides how to use it +- **Persistent storage:** Available on GCP, not on Nitro (stateless Enclave) + +For a complete comparison, see [AWS Nitro Enclave Integration](nitro-enclave). + +### VSOCK Proxy (Nitro-specific) + +On AWS Nitro, the Enclave cannot directly access the external network. A **VSOCK proxy** runs on the host EC2 instance to forward network requests: + +- `dstack-util` inside the Enclave sends requests to the VSOCK proxy +- The proxy forwards them to the destination (KMS, RPC endpoints, etc.) +- Responses are routed back through the proxy to the Enclave + +This means that on Nitro, you must deploy and configure a VSOCK proxy on the host machine before `dstack-util` can communicate with KMS or any external service. + +## KMS Bootstrap (First-time Setup) + +When a dstack-kms CVM starts for the first time, it enters **Onboard mode** (HTTP, unauthenticated). This is a one-time initialization process: + +1. Call `Onboard.Bootstrap` to generate the KMS key pair and obtain attestation info. +2. Register the KMS attestation on-chain (if using on-chain contracts). +3. Call `/finish` to complete initialization. KMS restarts and switches to HTTPS mode. + +After bootstrap, KMS operates in **Normal mode** and only accepts RA-TLS connections from verified workloads. + +> **Implementation:** The KMS bootstrap logic is implemented in the [dstack repository](https://github.com/Phala-Network/dstack). + +## Key Lifecycle + +The full key lifecycle is documented in the [Key Management Protocol](https://docs.phala.com/dstack/design-documents/key-management-protocol). At a high level: + +| Stage | Description | +|-------|-------------| +| **Generation** | Root keys are generated within the KMS TEE using hardware-rooted entropy | +| **Derivation** | Application-specific keys are derived from root keys using KDF (Key Derivation Function) | +| **Delivery** | Keys are delivered to verified CVMs over RA-TLS encrypted channels | +| **Rotation** | Keys can be rotated by updating the derivation inputs; old keys are naturally phased out | +| **Revocation** | Workloads can be de-authorized by revoking their on-chain measurement registration | + +## Next Steps + +- **[Run a dstack-kms CVM on GCP](/dstack-cloud/run-kms-on-gcp)** — Deploy KMS on GCP +- **[Deploy On-chain KMS Smart Contracts](/dstack-cloud/deploy-onchain-kms)** — Set up on-chain authorization diff --git a/dstack-cloud/manage-governance.mdx b/dstack-cloud/manage-governance.mdx new file mode 100644 index 0000000..bddf968 --- /dev/null +++ b/dstack-cloud/manage-governance.mdx @@ -0,0 +1,165 @@ +--- +title: Governance Operations +description: Create governance proposals, manage Multisig signing, and execute Timelock transactions. +--- + +# Governance Operations + +Every governance action — registering measurements, revoking access, upgrading contracts — follows the same pattern: propose, collect signatures, wait for timelock, execute. This page walks through that pattern step by step. + +## Prerequisites + +- Access to a signer wallet in the GovernanceSafe +- The Safe web interface (https://app.safe.global) or Safe CLI +- Understanding of the governance model — see [On-chain Governance Model](/dstack-cloud/governance) + +## Creating a Governance Proposal + +All governance changes start as a transaction submitted to the GovernanceSafe. + +### Via Safe Web Interface + +1. Go to https://app.safe.global and connect your signer wallet +2. Select your governance Safe +3. Click **"New Transaction"** +4. Choose the transaction type: + - **Contract interaction** — Call a function on DstackKms, DstackApp, or other contracts + - **Transfer** — Send funds (if needed for gas or contract operations) + - **Raw transaction** — For complex multi-call transactions + +### Common Governance Actions + +| Action | Contract | Function | +|--------|----------|----------| +| Register new measurement | DstackKms | `addOsImageHash(bytes32)` | +| Revoke measurement | DstackKms | `removeOsImageHash(bytes32)` | +| Update admin role | DstackKms | `setAdmin(address)` | +| Update KMS reference | DstackApp | `setKms(address)` | +| Upgrade contract implementation | Proxy | `upgradeTo(address)` | + +### Example: Register a New Measurement + +1. **New Transaction** → **Contract interaction** +2. Select `DstackKms` contract +3. Select `addMeasurement(bytes32)` function +4. Enter the measurement hash: `0x1234abcd...` +5. Click **"Create"** +6. The transaction enters the Safe queue + +## Multisig Signing and Approval + +After a transaction is created, it must be approved by enough signers to meet the threshold. + +### Approving a Transaction + +1. Each signer connects to the Safe web interface +2. Opens the pending transaction +3. Reviews: + - Target contract address + - Function name and parameters + - Simulation result (if available) +4. Clicks **"Confirm"** (signs with their wallet) + +### Tracking Approval Progress + +The Safe interface shows: +- Number of confirmations collected +- Required threshold (e.g., "3 of 5 confirmations") +- Status: Pending / Awaiting execution / Executed + +### Rejecting a Transaction + +If a signer disagrees with a proposal, they should: +1. Not sign the transaction +2. Communicate concerns to other signers +3. If the transaction has already been approved, wait for the timelock — there is no "cancel" after execution + +## Timelock + +After the required number of signatures is collected, the transaction enters the Timelock queue. + +### How It Works + +1. The transaction is queued with a mandatory delay +2. No one can execute the transaction until the delay expires +3. During the delay, anyone can review the transaction on-chain +4. After the delay, any authorized address (typically a signer) can execute it + +### Timelock Duration + +| Environment | Typical Duration | +|-------------|-----------------| +| Testnet | 1-4 hours | +| Staging | 4-24 hours | +| Production | 24-72 hours | + +### Monitoring the Timelock + +- **Safe web interface:** Shows the queue position and remaining time +- **Block explorer:** Look for the Timelock contract's `QueuedTransactions` event + +## Execution + +After the timelock expires: + +1. Open the Safe web interface +2. Find the transaction (status: "Ready to execute") +3. Click **"Execute"** +4. Confirm the transaction in your wallet +5. The transaction is submitted on-chain + +After execution, the change takes effect immediately. KMS syncs the latest on-chain state on its next query. + +## Emergency Operations + +### Revoke a Compromised Measurement + +If you discover that a measurement is compromised: + +1. Draft a transaction to call `DstackKms.removeOsImageHash(bytes32)` with the compromised hash +2. Submit to the Safe for priority approval +3. Collect signatures as quickly as possible +4. **Wait for the timelock** — this cannot be bypassed +5. Execute after the delay + +> **Important:** The timelock cannot be skipped, even in emergencies. This is by design — it prevents covert changes. Plan your incident response to account for the delay. + +### Replace a Signer + +If a signer key is compromised or a signer needs to be replaced: + +1. Draft a transaction to call `Safe.addOwner(address)` and `Safe.removeOwner(address, address)` +2. Submit for multisig approval +3. Wait for timelock +4. Execute + +The replacement signer should use a hardware wallet. + +### Governance Health Check + +Periodically verify the health of your governance setup: + +| Check | How | +|-------|-----| +| All signers are reachable | Contact each signer; confirm they have access to their wallet | +| Signer keys are secure | Verify signers are using hardware wallets | +| Threshold is appropriate | Review the threshold against the number of active signers | +| Timelock duration is appropriate | Adjust if the threat landscape has changed | +| No stale transactions in queue | Review and cancel any outdated proposals | +| Contract ownership is correct | Verify DstackKms and DstackApp owners point to the Safe | + +## Common Issues + +| Issue | Solution | +|-------|----------| +| Transaction not appearing in queue | Ensure the transaction was submitted correctly. Check the nonce. Verify gas is sufficient. | +| Signer cannot confirm | Ensure the signer is connected to the correct network. Check that the wallet has ETH for gas. | +| Timelock delay seems too long | Adjust the Timelock parameters through a governance action (which itself goes through the current timelock). | +| Transaction execution reverts | Check the contract state. The transaction may depend on state that changed during the timelock period. Simulate the transaction before executing. | +| Lost signer key | Immediately add a new signer and remove the lost one through governance. Use the remaining signers to approve. | + +## Next Steps + +- **[Register Workload Measurements](register-enclave-measurement)** — Most common governance action +- **[Deploy On-chain KMS Smart Contracts](deploy-onchain-kms)** — Initial contract setup +- **[Concept: Governance](/dstack-cloud/governance)** — Understand the governance model diff --git a/dstack-cloud/monitoring-alerting.mdx b/dstack-cloud/monitoring-alerting.mdx new file mode 100644 index 0000000..27a5446 --- /dev/null +++ b/dstack-cloud/monitoring-alerting.mdx @@ -0,0 +1,161 @@ +--- +title: Monitoring and Alerting +description: Set up monitoring, alerting, and dashboards for dstack-cloud deployments. +--- + +# Monitoring and Alerting + +You can't secure what you can't see. This page covers the key metrics to watch, log collection patterns, and alert rules that help you catch attestation failures, governance anomalies, or KMS downtime before they become incidents. + +## Key Metrics + +### KMS Metrics + +These metrics tell you whether KMS is healthy and delivering keys promptly: + +| Metric | Description | Alert Threshold | +|--------|-------------|-----------------| +| **Key request success rate** | Percentage of key requests that succeed | < 99% | +| **Key dispatch latency (p50/p99)** | Time from request to key delivery | p99 > 5s | +| **Attestation verification success rate** | Percentage of attestation verifications that pass | < 99% | +| **On-chain sync lag** | Time between on-chain state change and KMS picking it up | > 60s | +| **KMS uptime** | Percentage of time KMS is reachable | < 99.9% | + +### CVM / Enclave Metrics + +Track workload health and resource usage: + +| Metric | Description | Alert Threshold | +|--------|-------------|-----------------| +| **CVM uptime** | Percentage of time the CVM is running | < 99% | +| **CVM boot time** | Time from deploy command to CVM ready | > 5 minutes | +| **RA-TLS handshake success rate** | Percentage of successful RA-TLS connections | < 99% | +| **Container restart count** | Number of container restarts within the CVM | > 0 | +| **Memory usage** | CVM memory utilization | > 90% | + +### Governance Metrics + +Governance metrics help you catch stalled proposals or signer inactivity: + +| Metric | Description | Alert Threshold | +|--------|-------------|-----------------| +| **Pending proposals** | Number of governance proposals awaiting execution | > 0 for > 24h | +| **Signer participation rate** | Percentage of signers active in last 7 days | < 80% | +| **Timelock queue depth** | Number of transactions in the timelock queue | > 3 | + +## Log Collection + +### dstack-cloud Logs + +Use the built-in log viewer: + +```bash +# View recent logs +dstack-cloud logs + +# Follow logs in real-time +dstack-cloud logs --follow + +# View logs for a specific container +dstack-cloud logs --container +``` + +### KMS Logs + +KMS logs are available through `dstack-cloud logs` when the KMS is deployed as a dstack CVM. Key log patterns to monitor: + +| Log Pattern | Meaning | +|-------------|---------| +| `attestation verification failed` | A workload's attestation was rejected | +| `measurement not authorized` | The workload's measurement is not registered on-chain | +| `key dispatched` | A key was successfully delivered to a workload | +| `on-chain sync completed` | KMS synced its on-chain state | +| `RA-TLS handshake failed` | TLS connection with attestation failed | + +### Infrastructure Logs + +**GCP:** +- Cloud Logging: `gcloud logging read "resource.type=gce_instance AND labels.instance_id="` +- Serial port output for boot diagnostics + +**AWS Nitro:** +- EC2 instance system logs +- VSOCK proxy logs (if running as a systemd service) + +## Dashboard Configuration + +### Recommended Dashboard Panels + +1. **KMS Health** + - Request rate (requests/min) + - Success rate (%) + - Latency histogram (p50, p95, p99) + - Active connections + +2. **CVM Health** + - Number of running CVMs + - Per-CVM status (running/stopped/error) + - Boot time trend + +3. **Governance** + - Pending proposals count + - Recent governance transactions + - Signer activity heatmap + +### Integration with Datadog + +To integrate with Datadog, you'll need a Datadog agent on each host machine that collects custom metrics from dstack-cloud. The key metric namespaces are `dstack.cloud.cvm.*`, `dstack.cloud.kms.*`, and `dstack.cloud.governance.*`. + +1. Set up a Datadog agent on each host machine +2. Configure custom metrics collection for KMS, CVM, and governance events +3. Create dashboards and monitors based on the metrics above + +## Alert Rules + +### Critical Alerts (Page immediately) + +| Alert | Condition | Response | +|-------|-----------|----------| +| **KMS down** | KMS unreachable for > 2 minutes | Check CVM/Enclave status. Restart if needed. | +| **Attestation failure spike** | > 10 attestation failures in 5 minutes | May indicate a compromised or misconfigured workload. Investigate immediately. | +| **Governance: suspicious transaction** | New proposal to revoke critical measurements | Review the proposal. Alert all signers. | + +### Warning Alerts (Page during business hours) + +| Alert | Condition | Response | +|-------|-----------|----------| +| **High key dispatch latency** | p99 > 5s for > 10 minutes | Check KMS load. Scale if needed. | +| **CVM restart loop** | CVM restarted > 3 times in 1 hour | Investigate container health. Check resource limits. | +| **On-chain sync lag** | KMS on-chain state > 60s behind | Check RPC provider health. Switch to backup RPC. | +| **VSOCK proxy failure** (Nitro) | VSOCK proxy process not running | Restart the proxy. Check host health. | + +### Info Alerts (Log only) + +| Alert | Condition | +|-------|-----------| +| **New governance proposal** | Any new proposal submitted to the Safe | +| **CVM deployed** | New CVM deployment completed | +| **Measurement registered** | New measurement added on-chain | + +## Escalation Policies + +| Severity | Escalation | Response Time | +|----------|-----------|---------------| +| **Critical** | Page on-call SRE + notify security team | 15 minutes | +| **Warning** | Notify platform team via Slack/Teams | 2 hours | +| **Info** | Log to incident channel | Next business day | + +Use [Incident.io](https://incident.io) or similar tools to manage incident lifecycle. + +## On-chain Monitoring + +Monitor the blockchain for governance activity: + +- **Safe Transaction Service:** Subscribe to the Safe's transaction feed for real-time notifications +- **The Graph / Dune Analytics:** Query governance transaction history for reporting +- **Block explorer alerts:** Set up watch-only notifications for DstackKms and DstackApp contract events + +## Next Steps + +- **[Runbook](runbook)** — Step-by-step troubleshooting for common incidents +- **[Upgrade Procedures](upgrade)** — How to upgrade CVMs, KMS, and contracts diff --git a/dstack-cloud/nitro-enclave.mdx b/dstack-cloud/nitro-enclave.mdx new file mode 100644 index 0000000..4e33b15 --- /dev/null +++ b/dstack-cloud/nitro-enclave.mdx @@ -0,0 +1,166 @@ +--- +title: AWS Nitro Enclave Integration +description: How AWS Nitro Enclaves work with dstack-cloud, including EIF, VSOCK proxy, and key architectural differences from GCP. +--- + +## What Are Nitro Enclaves + +AWS Nitro Enclaves provide process-level isolation on EC2: the host OS cannot read or write enclave memory, even with root access. dstack-cloud packages your application into an Enclave Image File (EIF) and uses `dstack-util` for attestation and key retrieval — replacing the Guest Agent used on GCP. + +Key properties: + +- **Memory-isolated:** The host OS cannot read or write Enclave memory +- **No persistent storage:** Every Enclave launch starts from a clean state +- **No external network access:** Enclaves communicate through VSOCK (a socket interface to the host machine) +- **Cryptographic attestation:** AWS Nitro Secure Module (NSM) generates an Attestation Document proving the Enclave's identity + +## How dstack Uses Nitro Enclaves + +The Nitro integration differs significantly from GCP. On GCP, dstack-cloud deploys a full virtual machine (dstack CVM) with a Guest Agent. On AWS Nitro, the workload runs inside an AWS Enclave OS image — there is no Guest Agent and no dstack OS. + +![Nitro Enclave Architecture](/images/dstack-cloud/nitro-enclave-architecture-v2.png) + +On AWS Nitro, the flow is: + +1. You define your application in a `Dockerfile` +2. AWS Nitro CLI (`nitro-cli build-enclave`) converts the Dockerfile into an **Enclave Image File (EIF)** — the Enclave OS image +3. dstack packages **`dstack-util`** into the EIF. This tool handles attestation and key retrieval from KMS +4. The Enclave runs your application code inside the AWS-provided Enclave OS — not dstack OS +5. Inside the Enclave, `dstack-util` communicates with KMS through the VSOCK proxy on the host to obtain keys +6. Your application decides how to use the key (e.g., encrypting disk in-memory, decrypting model weights, etc.) + +### Key Differences from GCP + +| Aspect | GCP (dstack CVM) | AWS Nitro (Enclave) | +|--------|------------------|---------------------| +| **OS inside TEE** | dstack OS (custom Linux with Guest Agent) | Enclave OS (provided by AWS) | +| **Guest Agent** | Yes — handles attestation, key management, storage encryption | **No** — replaced by `dstack-util` | +| **Disk encryption** | Automatic (Guest Agent handles it) | **User-managed** — `dstack-util` provides the key, your application decides what to encrypt | +| **Image build** | dstack-cloud builds a CVM disk image | AWS Nitro CLI builds EIF from a Dockerfile | +| **Application format** | `docker-compose.yaml` | `Dockerfile` (single container) | + +## Enclave Image (EIF) Build Process + +A Nitro Enclave is launched from an **Enclave Image File (EIF)**. The build process has two stages: + +### Stage 1: Build Docker Image + +First, build a Docker image from your Dockerfile: + +```bash +docker build -t my-app -f Dockerfile . +``` + +Your Dockerfile should include your application and `dstack-util`. + +### Stage 2: Build EIF from Docker Image + +Then, convert the Docker image into an EIF using the Nitro CLI: + +```bash +nitro-cli build-enclave --docker-uri my-app:latest \ + --output-file my-app.eif +``` + +This command: +- Uses the AWS-provided Enclave OS as the base +- Packages your Docker image content into an EIF +- Generates 3 PCR values (PCR0, PCR1, PCR2) that uniquely identify the image + +### Stage 3: Register OS_IMAGE_HASH + +The 3 PCR values are combined into a single `OS_IMAGE_HASH` for on-chain authorization: + +```bash +# Example: combine PCRs into OS_IMAGE_HASH +OS_IMAGE_HASH=$(sha256sum <<< "${PCR0}${PCR1}${PCR2}" | cut -d' ' -f1) +``` + +The `OS_IMAGE_HASH` must be registered via [governance](governance) before KMS will deliver keys to this Enclave. + +> **Note:** Any change to the Dockerfile, application code, or `dstack-util` version produces different PCR values and a different `OS_IMAGE_HASH`. + +### Automated Deployment + +In practice, these steps are automated by deployment scripts. See [Run a Workload on AWS Nitro](/dstack-cloud/run-on-nitro) for the full workflow using the `deploy_host.sh` and `get_keys.sh` scripts. + +## Key Retrieval with dstack-util + +On Nitro, `dstack-util` replaces the Guest Agent for key management. It is a lightweight tool that: + +- Obtains the Enclave's attestation document from NSM (Nitro Secure Module) +- Sends the attestation to dstack-kms through the VSOCK proxy +- Receives the requested key if the attestation is valid (and the measurement is authorized on-chain, when using on-chain KMS mode) +- Makes the key available to your application (e.g., via stdout, file, or shared memory) + +> **Note:** Unlike GCP where the Guest Agent automatically handles disk encryption, on Nitro you are responsible for deciding how to use the key. Common patterns include encrypting model weights in memory, decrypting configuration secrets, or encrypting a RAM-disk. + +## Networking and Communication + +Nitro Enclaves have no direct network access. All communication goes through VSOCK: + +![VSOCK Communication](/images/dstack-cloud/vsock-communication-v2.png) + +- The Enclave communicates with the host via VSOCK (AF_VSOCK socket family) +- A VSOCK proxy on the host forwards TCP connections to the outside world (including KMS) +- The proxy must be started before the Enclave launches +- If the proxy crashes, the Enclave loses external connectivity + +### VSOCK Proxy Setup + +The VSOCK proxy translates between VSOCK (Enclave↔Host) and TCP (Host↔Network): + +```bash +# Example: forward Enclave VSOCK port 8000 to host TCP port 8000 +socat VSOCK-LISTEN:8000,reuseaddr,fork TCP:localhost:8000 +``` + +dstack-cloud provides helper scripts and configuration for setting up the VSOCK proxy as part of the deployment process. + +## Resource Constraints + +Nitro Enclaves have static resource allocation: + +| Resource | Behavior | +|----------|----------| +| **CPU** | Allocated at launch (in vCPUs), cannot be changed during runtime | +| **Memory** | Allocated at launch (in MB), cannot be changed during runtime | +| **Maximum** | Depends on the host EC2 instance type | + +### Supported EC2 Instance Types + +Nitro Enclaves are supported on instances built on the AWS Nitro System. Common choices: + +| Instance Type | vCPUs | Memory | Notes | +|--------------|-------|--------|-------| +| `m5.xlarge` | 4 | 16 GB | General purpose | +| `m5.2xlarge` | 8 | 32 GB | General purpose | +| `c5.2xlarge` | 8 | 16 GB | Compute optimized | +| `r5.2xlarge` | 8 | 64 GB | Memory optimized | + +Note: The EC2 instance must have Nitro Enclave support enabled in its Nitro configuration, and you must allocate enough CPU/memory to accommodate both the host OS and the Enclave. + +## When to Use Nitro vs. GCP + +Choose **AWS Nitro Enclaves** when: + +- Your existing infrastructure is on AWS +- You need the strongest possible isolation (process-level, no host access) +- Your workload fits within static resource limits +- You don't need persistent disk storage or you want to do it with the key by yourself +- You want full control over how keys are used (encryption strategy, key usage pattern) + +Choose **GCP Confidential VMs** when: + +- Your existing infrastructure is on GCP +- You need automatic disk encryption managed by dstack-cloud +- You need direct network access without a VSOCK proxy +- You need persistent disk storage +- You need elastic resource allocation +- You prefer a more managed experience with the Guest Agent handling attestation and key lifecycle + +## Next Steps + +- **[Quick Start: Deploy on AWS Nitro](/dstack-cloud/get-started)** — Hands-on tutorial +- **[Run a Workload on AWS Nitro](/dstack-cloud/run-on-nitro)** — Detailed deployment guide +- **[Attestation Integration](attestation-integration)** — How attestation works on Nitro diff --git a/dstack-cloud/overview.mdx b/dstack-cloud/overview.mdx new file mode 100644 index 0000000..cd6fa13 --- /dev/null +++ b/dstack-cloud/overview.mdx @@ -0,0 +1,95 @@ +--- +title: dstack-cloud Overview +description: dstack-cloud is an extension of the dstack framework that enables deploying confidential Docker applications on GCP Confidential VMs and AWS Nitro Enclaves. +--- + +# dstack-cloud Overview + +dstack-cloud lets you run Docker applications inside hardware-encrypted Trusted Execution Environments (TEEs) on **GCP Confidential VMs** and **AWS Nitro Enclaves** — without owning specialized bare-metal servers. + +Built on the open-source [dstack framework](https://github.com/Dstack-TEE/dstack), it brings TEE security to mainstream cloud platforms. Your code and data stay encrypted in memory, even from the cloud provider. + +## What is dstack + +dstack is an open-source confidential computing framework originally designed to run on TDX BareMetal servers. It provides: + +- Hardware-level memory encryption and isolation via TEE (Trusted Execution Environment) +- A key management system (KMS) that delivers secrets exclusively to verified workloads +- Remote attestation to prove that code is running in genuine TEE hardware +- SDK libraries in Python, TypeScript, Rust, and Go for application integration (the **dstack-sdk**) + +## What is dstack-cloud + +dstack-cloud extends dstack to run on major cloud platforms. Instead of requiring bare-metal TDX hardware, it enables deployment on: + +- **GCP Confidential VMs** (Intel TDX) +- **AWS Nitro Enclaves** (NSM) + +This means you can use the full dstack security model — hardware isolation, attested key delivery, encrypted storage — without owning specialized hardware. + +## How It Works + +![How It Works](/images/dstack-cloud/architecture-overview-v2.png) + +You define your application, and `dstack-cloud deploy` handles the rest: building a TEE environment, managing attestation, and delivering keys. + +### On GCP: + +1. `dstack-cloud deploy` creates a **Confidential Virtual Machine** running dstack OS with Intel TDX enabled. +2. A **Guest Agent** inside the CVM handles attestation, key retrieval from KMS, and automatic disk encryption. +3. Your application communicates with the Guest Agent via a Unix socket (`/var/run/dstack.sock`). + +### On AWS Nitro: + +1. AWS Nitro CLI converts your Dockerfile into an **Enclave Image File (EIF)** using Enclave OS (AWS-provided). +2. The EIF runs as a Nitro Enclave — there is no dstack OS and no Guest Agent. +3. **`dstack-util`** inside the Enclave handles attestation and key retrieval from KMS. Your application decides how to use the key. + +> The two platforms have different architectures. See [Nitro Enclave Integration](nitro-enclave) for a detailed comparison. + +## Key Components + +| Component | Role | +|-----------|------| +| **dstack-cloud CLI** | Build and deployment tool for both GCP and Nitro | +| **dstack-kms** | Key Management Service running in its own TEE; verifies workloads and dispatches keys | +| **Gateway** | Edge TLS termination, automatic ACME certificate provisioning, RA-TLS support | +| **Guest Agent** (GCP only) | Process inside each CVM that handles attestation, key management, and automatic disk encryption | +| **dstack-util** (Nitro only) | Lightweight tool packaged into the EIF; handles attestation and key retrieval from KMS | +| **VSOCK Proxy** (Nitro only) | Runs on the EC2 host; forwards network traffic between the Enclave and external services | + +## Typical Use Cases + +### AI Inference with Model Protection + +Deploy AI models with model weights fully encrypted in memory. The cloud provider cannot access or copy your proprietary models. + +### Sensitive Data Processing + +Process healthcare, financial, or personal data in a hardware-isolated environment. Memory encryption ensures that even system administrators cannot read your data. + +### DeFi and Web3 Applications + +Run smart contract backend services or trading bots with private keys that never leave the TEE. The on-chain governance model provides verifiable, auditable key management. + +## Supported Environments + +| Platform | TEE Technology | Attestation | Status | +|----------|---------------|-------------|--------| +| GCP Confidential VMs | Intel TDX + TPM | TDX Quote | Available | +| AWS Nitro Enclaves | Nitro Secure Module | NSM Attestation Document | Available | + +## What You Need + +- An AWS or GCP account with appropriate permissions +- `dstack-cloud` CLI installed +- A `docker-compose.yaml` defining your application +- (Optional) [dstack-sdk](https://github.com/Dstack-TEE/dstack/tree/master/sdk) integration for key retrieval from within your containers + +## Next Steps + +- **[Quick Start Tutorial](/dstack-cloud/get-started)** — Deploy your first dstack CVM in 30 minutes +- **[Nitro Enclave](nitro-enclave)** — How AWS Nitro Enclaves work in dstack-cloud +- **[KMS and Key Delivery](kms-and-key-delivery)** — Understand how keys are managed and delivered +- **[Security Model](security-model)** — Trust boundaries and security guarantees +- **[Attestation Integration](attestation-integration)** — How remote attestation works across platforms diff --git a/dstack-cloud/register-enclave-measurement.mdx b/dstack-cloud/register-enclave-measurement.mdx new file mode 100644 index 0000000..a3a31f5 --- /dev/null +++ b/dstack-cloud/register-enclave-measurement.mdx @@ -0,0 +1,207 @@ +--- +title: Register Workload Measurements On-chain +description: Register workload measurements (OS_IMAGE_HASH / RTMR) on-chain for KMS authorization. +--- + +# Register Workload Measurements On-chain + +This guide explains how to register workload measurements (RTMR / OS_IMAGE_HASH) on-chain so that KMS will authorize your workloads to receive keys. + +## Background + +When a TEE workload starts — whether a dstack CVM (GCP) or a Nitro Enclave (AWS) — it generates a hardware attestation that includes measurements (cryptographic hashes of the code and configuration). KMS checks these measurements against an on-chain allowlist before dispatching keys. + +If you update your application code, Docker images, or the base image version, the measurements change. You must register the new measurements before KMS will authorize the updated workload. + +### Two Layers of On-chain Registration + +There are two distinct registration flows in the dstack ecosystem: + +| Registration Type | What Gets Registered | Purpose | Where to Find | +|-------------------|---------------------|---------|---------------| +| **KMS Registration** | KMS's `mrAggregated` (GCP) or public key | Proves KMS runs in a genuine TEE | [Run dstack-kms on GCP](run-kms-on-gcp) Step 8 | +| **Workload Registration** (this guide) | Workload's `compose-hash` or `OS_IMAGE_HASH` | Authorizes workload to receive keys from KMS | This document | + +**This guide covers workload registration** — the process of registering your application's measurement so that KMS will dispatch keys to it. This applies to: + +- **GCP workloads** — dstack CVMs running applications that need keys from KMS +- **Nitro workloads** — Enclaves that need keys from KMS (KMS itself runs on GCP only) + +Both types of workloads must register their measurements on-chain before KMS will authorize key delivery. + +| Platform | Measurement Field | Where It Comes From | +|----------|-------------------|---------------------| +| GCP (TDX) | `compose-hash` (in RTMR3) | `dstack-cloud deploy` output | +| AWS Nitro | `OS_IMAGE_HASH` | `nitro-cli describe-enclave` output | + +## Prerequisites + +- A deployed dstack-kms instance with on-chain governance configured +- Governance contracts (DstackKms, DstackApp) deployed +- Multisig Safe with signer access +- The new measurement value (from your build/deploy output) + +## Step 1: Build and Deploy Your Workload + +First, build and deploy your application to get the measurements: + +```bash +# On GCP +dstack-cloud deploy +dstack-cloud status +# Note the compose-hash value in RTMR3 + +# On Nitro +./scripts/build-eif.sh +# Note the OS_IMAGE_HASH value from output +``` + +## Step 2: Extract the Measurement + +**GCP (TDX):** +The measurement is the `compose-hash` value in RTMR3, displayed in the `dstack-cloud status` output. This value represents the hash of your docker-compose.yaml configuration. + +**AWS Nitro:** +The measurement is the `OS_IMAGE_HASH`, calculated as `sha256(PCR0 || PCR1 || PCR2)`. Run: + +```bash +./scripts/build-eif.sh +``` + +The script outputs: +``` +PCR0: +PCR1: +PCR2: +OS_IMAGE_HASH: +``` + +**PCR values are derived from:** +- **PCR0** — Complete EIF content hash (enclave image) +- **PCR1** — Linux kernel and boot ramdisk +- **PCR2** — Application layer (Docker image filesystem) + +> **Note:** The `OS_IMAGE_HASH` is the value you register on-chain. Any change to the Dockerfile, application code, or configuration produces a different hash. + +**Alternative: Preview measurements without full build:** + +```bash +dstack-util get-keys --show-mrs \ + --kms-url "https://your-kms:12001" \ + --app-id "0xYOUR_APP_ID" +``` + +> **Important:** When using `--show-mrs` to preview, you must use the exact same `KMS_URL`, `APP_ID`, and `root_ca.pem` as the production build. Any difference will produce different PCR values. + +## Step 3: Prepare the Governance Transaction + +You need to call `DstackKms.addOsImageHash()` with the new measurement value. + +### 3.1 Draft the Transaction + +Using the Safe web interface (https://app.safe.global): + +1. Connect your signer wallet +2. Go to your Safe +3. Click "New Transaction" → "Contract interaction" +4. Select the `DstackKms` contract +5. Select the `addOsImageHash` function +6. Enter the measurement value (hex string) +7. Review and submit + +### 3.2 Alternative: Using CLI + +```bash +# Using cast (from Foundry) +cast send \ + "addOsImageHash(bytes32)" \ + 0xYOUR_MEASUREMENT_HASH \ + --rpc-url $RPC_URL \ + --private-key $SIGNER_KEY +``` + +> **Note:** In production, do not use a single private key. Submit the transaction through the multisig Safe instead. + +## Step 4: Collect Signatures + +The transaction enters the multisig queue. Other signers must approve it. + +1. Each signer connects to the Safe web interface +2. Opens the pending transaction +3. Reviews the measurement value +4. Confirms (signs) the transaction + +The transaction is executed once the required threshold of signatures is collected. + +## Step 5: Wait for Timelock + +After multisig approval, the transaction enters the Timelock queue. + +- **Production:** Wait 24-72 hours (depending on your configuration) +- **Testnet:** Wait 1-4 hours + +During this period, any signer or observer can review and, if necessary, raise concerns. + +## Step 6: Execute + +After the Timelock expires, the transaction can be executed: + +1. Open the Safe web interface +2. Find the transaction in the queue +3. Click "Execute" + +The measurement is now registered on-chain. KMS will authorize workloads with this measurement. + +## Step 7: Verify + +Verify that the measurement is registered: + +```bash +# Using cast +cast call \ + "isAuthorized(bytes32)(bool)" \ + 0xYOUR_MEASUREMENT_HASH \ + --rpc-url $RPC_URL +``` + +Expected output: `true` + +## Workflow Summary + +![Measurement Registration Flow](/images/dstack-cloud/measurement-registration-flow.png) + +## Registering Multiple Measurements + +If you are deploying multiple workloads (e.g., different application versions for canary testing), you can register multiple measurements in a single governance transaction: + +1. Call `addOsImageHash` for each measurement value +2. Bundle them into a batch transaction in the Safe +3. Submit for approval as usual + +## Revoking a Measurement + +If a measurement is found to be compromised or no longer needed: + +```bash +cast send \ + "removeOsImageHash(bytes32)" \ + 0xCOMPROMISED_MEASUREMENT_HASH \ + --rpc-url $RPC_URL +``` + +Follow the same governance flow (multisig → timelock → execute). + +## Common Issues + +| Issue | Solution | +|-------|----------| +| KMS refuses to dispatch keys | Verify the measurement is registered: `cast call ... isAuthorized(...)`. Check that the measurement matches exactly (case-sensitive hex). | +| Governance transaction stuck | Verify the timelock has expired. Check that the Safe has sufficient gas. | +| Wrong measurement registered | You must revoke the wrong measurement and register the correct one through separate governance transactions. | +| Measurement changes on every deploy | The `compose-hash` / `OS_IMAGE_HASH` is derived from your Docker images and configuration. Use pinned image tags (SHA256 digests) for reproducible builds. | + +## Next Steps + +- **[Manage Governance](manage-governance)** — Other governance operations +- **[Concept: Attestation Integration](/dstack-cloud/attestation-integration)** — How measurements are generated +- **[Concept: KMS and Key Delivery](/dstack-cloud/kms-and-key-delivery)** — How KMS uses measurements diff --git a/dstack-cloud/release-notes.mdx b/dstack-cloud/release-notes.mdx new file mode 100644 index 0000000..fbc77e4 --- /dev/null +++ b/dstack-cloud/release-notes.mdx @@ -0,0 +1,31 @@ +--- +title: Release Notes +description: Release notes, version history, compatibility notes, and known issues. +--- + +# Release Notes + +## v0.6.0 (Current) + +### Features + +- Initial release with support for GCP Confidential VMs and AWS Nitro Enclaves +- dstack-cloud CLI for launching CVMs from Docker Compose configurations +- KMS with two modes: Local Key Provider and standalone KMS +- On-chain governance (DstackKms, DstackApp, Multisig + Timelock) +- RA-TLS for secure KMS ↔ workload communication +- VSOCK proxy support for Nitro Enclave networking +- KMS bootstrap (Onboard → Finish) flow +- Attestation verification (TDX Quote on GCP, NSM Document on Nitro) + +### Compatibility Notes + +- Requires Intel TDX-capable GCP zones or Nitro Enclave-capable EC2 instances + +--- + +## Changelog + +All notable changes will be documented here. + +Format follows [Keep a Changelog](https://keepachangelog.com/). diff --git a/dstack-cloud/run-kms-on-gcp.mdx b/dstack-cloud/run-kms-on-gcp.mdx new file mode 100644 index 0000000..db1345b --- /dev/null +++ b/dstack-cloud/run-kms-on-gcp.mdx @@ -0,0 +1,267 @@ +--- +title: Run a dstack-kms CVM on GCP +description: Deploy a dstack-kms (Key Management Service) instance on GCP as a Confidential VM with on-chain governance. +--- + +# Run a dstack-kms CVM on GCP + +Run your own dstack-kms instance on GCP so you have full control over key policy. This page covers the production deployment workflow: bootstrap the KMS, register its attestation on-chain, and verify it's serving keys correctly. + +> **Reference:** This guide is based on the [official deployment guide](https://github.com/Phala-Network/dstack-cloud-deployment-guide/blob/main/guide_EN.md). + +> **Platform Requirement:** `dstack-cloud deploy` must be run on **Linux**. macOS is not supported because the FAT32 shared disk image created by macOS `dosfstools` fails GCP image validation. + +--- + +## Prerequisites + +- A GCP project with Confidential VM (TDX) quota +- `gcloud` CLI installed and authenticated (`gcloud auth login`) +- `gsutil` available in PATH (required by `dstack-cloud deploy` for GCS upload) +- `docker` / `docker compose` +- `node` + `npm` (for smart contract deployment) +- `jq` +- `mtools` (provides `mcopy`) +- `dosfstools` (provides `mkfs.fat`) +- **`dstack-cloud` CLI** installed: + ```bash + curl -fsSL -o ~/.local/bin/dstack-cloud \ + https://raw.githubusercontent.com/Phala-Network/meta-dstack-cloud/main/scripts/bin/dstack-cloud + chmod +x ~/.local/bin/dstack-cloud + ``` +- **For on-chain governance mode:** + - A blockchain RPC endpoint (e.g., `https://sepolia.base.org`) + - A wallet with sufficient balance + - Deployed smart contracts (DstackKms, DstackApp) — see [Deploy On-chain KMS Smart Contracts](deploy-onchain-kms) + - You’ll need `KMS_CONTRACT_ADDR` and `APP_CONTRACT_ADDR` before KMS bootstrap/finish + +> **Note:** This guide uses pre-built KMS Docker images. If you need to build KMS from source or customize configuration, see the [official KMS build guide](https://github.com/Phala-Network/dstack-cloud/blob/master/docs/tutorials/kms-build-configuration.md). + +--- + +## Step 1: Configure dstack-cloud + +Follow the same steps as [Run a Workload on GCP](run-on-gcp#step-1-configure-dstack-cloud) to set up your global config and GCS bucket. + +--- + +## Step 2: Pull the OS Images + +Follow the same steps as [Run a Workload on GCP](run-on-gcp#step-2-pull-the-os-image) to download the `dstack-cloud-0.6.0` images. + +> If `disk.raw` is missing, deployment may create a VM that cannot boot (UEFI loop: `Failed to load image`). + +--- + +## Step 3: Create a KMS Project + +```bash +mkdir -p workshop-run +dstack-cloud new workshop-run/kms-prod \ + --os-image dstack-cloud-0.6.0 \ + --key-provider tpm \ + --instance-name dstack-kms + +cd workshop-run/kms-prod +``` + +Generated files include: + +- `app.json` +- `docker-compose.yaml` +- `prelaunch.sh` + +--- + +## Step 4: Build or Select KMS Docker Image + +Use pre-built image or build your own: + +```bash +# Option 1: Pre-built +# KMS_IMAGE=cr.kvin.wang/dstack-kms:latest + +# Option 2: Build from source (from deployment-guide workshop repo) +cd workshop/kms/builder +./build-image.sh dstack-kms:latest +docker push dstack-kms:latest +``` + +--- + +## Step 5: Configure KMS Runtime + +Replace generated compose/prelaunch with KMS config. + +> The compose templates below come from: +> `Phala-Network/dstack-cloud-deployment-guide` +> Clone that repo first if you only cloned this docs repo. + +### Option A: Direct RPC + +```bash +cp /path/to/dstack-cloud-deployment-guide/workshop/kms/docker-compose.direct.yaml docker-compose.yaml +``` + +Create `prelaunch.sh`: + +```bash +cat > prelaunch.sh <<'EOF' +#!/bin/sh +cat > .env <<'ENVEOF' +KMS_HTTPS_PORT=12001 +AUTH_HTTP_PORT=18000 +KMS_IMAGE=cr.kvin.wang/dstack-kms:latest +ETH_RPC_URL=https://sepolia.base.org +KMS_CONTRACT_ADDR= +APP_CONTRACT_ADDR= +DSTACK_REPO=https://github.com/Phala-Network/dstack-cloud.git +DSTACK_REF=14963a2ccb0ec7bef8a496c1ac5ac40f5593145d +ENVEOF +EOF +chmod +x prelaunch.sh +``` + +### Option B: Light Client (Helios) + +```bash +cp /path/to/dstack-cloud-deployment-guide/workshop/kms/docker-compose.light.yaml docker-compose.yaml +``` + +`prelaunch.sh` can stay the same unless your template requires different vars. + +--- + +## Step 6: Deploy KMS CVM + +```bash +dstack-cloud deploy --delete +``` + +If bucket does not exist, create explicitly first: + +```bash +gcloud storage buckets create gs:// \ + --project \ + --location us-central1 +``` + +--- + +## Step 7: Open Firewall + +```bash +dstack-cloud fw allow 12001 +dstack-cloud fw allow 18000 +``` + +For light-client mode: + +```bash +dstack-cloud fw allow 18545 +``` + +Port notes: + +- `12001/tcp`: KMS API (**required**) +- `18000/tcp`: auth-api debug endpoint (**optional**) +- `18545/tcp`: helios RPC (light mode only) + +--- + +## Step 8: Bootstrap (First-time) + +### 8.1 Check KMS endpoint + +```bash +dstack-cloud status +``` + +### 8.2 Get attestation info + +```bash +KMS_URL="http://:12001" +curl -s "$KMS_URL/prpc/Onboard.GetAttestationInfo?json" | jq . +``` + +Capture: +- `device_id` +- `mr_aggregated` +- `os_image_hash` + +### 8.3 Register measurements on-chain (required for on-chain mode) + +```bash +npx hardhat kms:add-image 0x --network custom +npx hardhat kms:add 0x --network custom +npx hardhat kms:add-device 0x --network custom +``` + +> `device_id` must come from `Onboard.GetAttestationInfo` (not dummy serial-console value). + +### 8.4 Run bootstrap + +```bash +curl -s "$KMS_URL/prpc/Onboard.Bootstrap?json" \ + -d '{"domain":""}' | tee bootstrap-info.json | jq . +``` + +### 8.5 Finish + +```bash +curl "$KMS_URL/finish" +sleep 5 +``` + +--- + +## Step 9: Verify + +```bash +# Onboard mode check (before finish) +curl -s "http://:12001/prpc/Onboard.GetAttestationInfo?json" | jq . + +# Normal mode check (after finish) +curl -sk "https://:12001/prpc/GetMeta?json" -d '{}' | jq . +``` + +--- + +## Common Issues + +| Issue | Solution | +|---|---| +| macOS image/shared-disk errors | Use Linux host for deploy | +| `Boot image ... not found` | Ensure image search path is correct and `disk.raw` exists under `/dstack-cloud-0.6.0/` | +| VM `RUNNING` but 12001/18000 unreachable + serial shows `UEFI: Failed to load image` | Wrong boot image format/content. Ensure `disk.raw` comes from `dstack-cloud-0.6.0-uki.tar.gz` (do **not** rename `rootfs.img.parted.verity` as `disk.raw`) | +| `gsutil` missing | Install Google Cloud SDK/gsutil and ensure in PATH | +| `mcopy` missing | Install `mtools` | +| `mkfs.fat` missing | Install `dosfstools` | +| `missing field 'status'` from KMS flow | Usually indicates auth-eth / on-chain config mismatch (RPC/contract addresses/registrations). Verify `ETH_RPC_URL`, `KMS_CONTRACT_ADDR`, `APP_CONTRACT_ADDR`, and on-chain image/MR/device registration | +| KMS port responds but APIs return 404 | Shared disk config stale/wrong; `dstack-cloud deploy --delete` | + +--- + +## Cleanup + +```bash +dstack-cloud stop +dstack-cloud remove +``` + +Or manually: + +```bash +gcloud compute instances delete dstack-kms --zone=us-central1-a +gcloud compute images delete dstack-kms-boot +gcloud compute images delete dstack-kms-shared +``` + +--- + +## Next Steps + +- [Register Workload Measurements](register-enclave-measurement) +- [KMS and Key Delivery](/dstack-cloud/kms-and-key-delivery) +- [Attestation Integration](/dstack-cloud/attestation-integration) +- [Deploy On-chain KMS Smart Contracts](deploy-onchain-kms) diff --git a/dstack-cloud/run-on-gcp.mdx b/dstack-cloud/run-on-gcp.mdx new file mode 100644 index 0000000..ce9c9ff --- /dev/null +++ b/dstack-cloud/run-on-gcp.mdx @@ -0,0 +1,243 @@ +--- +title: Run a Workload on GCP +description: Step-by-step guide to deploy a Docker application as a dstack CVM on GCP with Intel TDX. +--- + +# Run a Workload on GCP + +Deploy a Docker application as a dstack CVM on GCP with Intel TDX. This page covers the full workflow — from configuration to verification — including how to choose between managed and self-hosted KMS endpoints. + +--- + +## Prerequisites + +- A GCP project with Confidential VM quota enabled + - Intel TDX Confidential VMs are available in select zones (for example `us-central1-a`) +- `gcloud` CLI installed and authenticated + ```bash + gcloud auth login + gcloud config set project YOUR_PROJECT_ID + ``` +- Linux host for deployment (recommended) +- Docker installed +- `gsutil` available in PATH +- `mtools` (`mcopy`) and `dosfstools` (`mkfs.fat`) installed +- `dstack-cloud` CLI installed + ```bash + curl -fsSL -o ~/.local/bin/dstack-cloud \ + https://raw.githubusercontent.com/Phala-Network/meta-dstack-cloud/main/scripts/bin/dstack-cloud + chmod +x ~/.local/bin/dstack-cloud + ``` + +> Why Linux + mtools + dosfstools? `dstack-cloud deploy` builds a shared FAT image and needs these tools in local environment. + +--- + +## Step 1: Configure `dstack-cloud` + +Edit global config: + +```bash +dstack-cloud config-edit +``` + +`dstack-cloud` uses **JSON** config (`~/.config/dstack-cloud/config.json`). Example: + +```json +{ + "services": { + "kms_urls": ["https://kms.tdxlab.dstack.org:12001"], + "gateway_urls": ["https://gateway.tdxlab.dstack.org:12002"], + "pccs_url": "" + }, + "image_search_paths": ["/path/to/images"], + "gcp": { + "project": "YOUR_PROJECT_ID", + "zone": "us-central1-a", + "bucket": "gs://YOUR_BUCKET_NAME" + } +} +``` + +Create bucket if needed: + +```bash +gcloud storage buckets create gs://YOUR_BUCKET_NAME --project YOUR_PROJECT_ID --location us-central1 +``` + +### KMS Options + +| Option | Description | When to Use | +|---|---|---| +| **Phala Official KMS** | Use managed KMS endpoints in `services.kms_urls` | Quick start, testing | +| **Self-hosted KMS** | Use your own KMS endpoint in `services.kms_urls` | Production, compliance, full control | + +For Self-hosted KMS, point `services.kms_urls` to your deployed KMS URL (see [Run a dstack-kms CVM on GCP](run-kms-on-gcp) for how to set one up). + +If you run with key provider `tpm`/`none` (no external KMS), remove `.env` in project and remove `env_file` from `app.json`. + +--- + +## Step 2: Pull the OS Image + +For `dstack-cloud-0.6.0`, download both archives: + +```bash +dstack-cloud pull https://github.com/Phala-Network/meta-dstack-cloud/releases/download/v0.6.0-test/dstack-cloud-0.6.0.tar.gz +dstack-cloud pull https://github.com/Phala-Network/meta-dstack-cloud/releases/download/v0.6.0-test/dstack-cloud-0.6.0-uki.tar.gz +``` + +Verify boot image file exists: + +```bash +ls -lh /path/to/images/dstack-cloud-0.6.0/disk.raw +``` + +> If `disk.raw` is missing, VM may boot-loop with UEFI `Failed to load image`. + +--- + +## Step 3: Create a Project + +```bash +dstack-cloud new my-gcp-app --os-image dstack-cloud-0.6.0 --instance-name dstack-my-app +cd my-gcp-app +``` + +--- + +## Step 4: Configure Project (`app.json`) + +Update key fields in `app.json`: + +- `gcp_config.project` +- `gcp_config.zone` +- `gcp_config.bucket` +- `gcp_config.instance_name` + +Choose key provider mode: + +- **External KMS mode (recommended):** `"key_provider": "kms"` +- **No external KMS mode:** `"key_provider": "tpm"` (or `none`) + +Gateway options: + +- If you use dstack gateway URL routing, keep `gateway_enabled: true` +- If you access service directly via VM public IP + opened port, set `gateway_enabled: false` + +--- + +## Step 5: Define Your Application + +Edit `docker-compose.yaml`: + +```yaml +services: + web: + image: nginx:latest + ports: + - "8080:80" +``` + +--- + +## Step 6: (Optional) Add Environment Variables + +If your app needs secrets/config, create `.env`: + +```env +API_KEY=your-api-key +DATABASE_URL=postgres://user:pass@host:5432/db +``` + +In KMS mode, env values are encrypted client-side and only decrypted inside CVM after attestation. + +--- + +## Step 7: Deploy + +```bash +dstack-cloud deploy --delete +``` + +`dstack-cloud` will: + +1. Prepare shared config files +2. Upload image artifacts to GCS +3. Create a GCP TDX Confidential VM +4. Start VM and run compose workload + +First deployment usually takes several minutes. + +--- + +## Step 8: Open Firewall + +Open app port(s): + +```bash +dstack-cloud fw allow 8080 +``` + +List firewall rules: + +```bash +dstack-cloud fw list +``` + +--- + +## Step 9: Verify + +Check status: + +```bash +dstack-cloud status +``` + +View logs: + +```bash +dstack-cloud logs --follow +``` + +Access app: + +- Direct VM mode (`gateway_enabled=false`): + ```bash + curl http://:8080 + ``` +- Gateway mode (`gateway_enabled=true`): use URL printed in `dstack-cloud status`. + +--- + +## Managing Your Deployment + +```bash +dstack-cloud logs +dstack-cloud stop +dstack-cloud start +dstack-cloud remove +``` + +--- + +## Common Issues + +| Issue | Solution | +|---|---| +| `Boot image 'dstack-cloud-0.6.0' not found locally` | Ensure `disk.raw` exists under `/dstack-cloud-0.6.0/` | +| VM RUNNING but serial log shows UEFI load failures | Wrong boot image source; use official `-uki.tar.gz` image containing `disk.raw` | +| `gsutil` not found | Install Google Cloud SDK / ensure PATH | +| `mcopy` not found | Install `mtools` | +| `mkfs.fat` not found | Install `dosfstools` | +| `.env found but KMS is not enabled` | Remove `.env` and remove `env_file` from `app.json`, or set key provider back to `kms` | +| App not reachable immediately | Wait for compose startup to complete; check `dstack-cloud logs` | + +--- + +## Next Steps + +- [Run a dstack-kms CVM on GCP](run-kms-on-gcp) +- [Attestation Integration](/dstack-cloud/attestation-integration) +- [Run a Workload on AWS Nitro](run-on-nitro) diff --git a/dstack-cloud/run-on-nitro.mdx b/dstack-cloud/run-on-nitro.mdx new file mode 100644 index 0000000..d221e83 --- /dev/null +++ b/dstack-cloud/run-on-nitro.mdx @@ -0,0 +1,247 @@ +--- +title: Run a Workload on AWS Nitro +description: Step-by-step guide to deploy a Docker application as a Nitro Enclave on AWS. +--- + +# Run a Workload on AWS Nitro + +Run a Docker application inside an AWS Nitro Enclave — one of the strongest isolation guarantees available in the public cloud. The host OS cannot read your enclave's memory, even with root access. + +> **Important:** On AWS Nitro, your workload runs inside an **AWS Enclave OS image** — not a dstack CVM. There is no Guest Agent. `dstack-util` is packaged into the Enclave for attestation and key retrieval. You control how keys are used (for example disk/data decryption in your app). + +## Prerequisites + +- AWS account permissions for EC2 + Nitro Enclaves +- AWS CLI configured (`aws configure`) +- Docker installed +- Git + `gh` CLI (if using GitHub template/release flow) +- The [dstack-nitro-enclave-app-template](https://github.com/Phala-Network/dstack-nitro-enclave-app-template) repository + +## Overview + +Deployment flow: + +1. Create app from template +2. Replace KMS root CA certificate (**required before build**) +3. Build EIF and get measurements +4. Register OS_IMAGE_HASH on-chain +5. Deploy/run on Nitro host and retrieve keys + +### Key Delivery via KMS + +On Nitro, `dstack-util` inside Enclave reaches KMS through host-side VSOCK proxy. KMS verifies Nitro attestation and policy before returning keys. + +**KMS Options** + +| Option | Description | When to Use | +|--------|-------------|-------------| +| **Phala Official KMS** | Managed KMS by Phala | Quick start/testing | +| **Self-hosted KMS** | Your own dstack-kms | Production/compliance/full control | + +Self-hosted KMS can run on: +- GCP TDX CVM — see [Run a dstack-kms CVM on GCP](run-kms-on-gcp) +- Intel TDX bare metal + +--- + +## Step 1: Create Your App from Template + +```bash +gh repo create my-enclave-app \ + --template Phala-Network/dstack-nitro-enclave-app-template \ + --private + +git clone https://github.com/YOUR_USER/my-enclave-app.git +cd my-enclave-app +``` + +--- + +## Step 2: Replace KMS Root CA Certificate + +> ⚠️ Required. Template `app/root_ca.pem` is placeholder only. + +`root_ca.pem` is baked into image and affects measured hash. + +```bash +# From your running KMS endpoint +curl -sk https://:12001/prpc/GetTempCaCert?json \ + | jq -r .temp_ca_cert > app/root_ca.pem +``` + +> Use a **domain name** in KMS URL when possible. Avoid raw IP in production. + +If KMS CA rotates, rebuild EIF and re-register new image hash. + +--- + +## Step 3: Configure and Build EIF + +### 3.1 Template Variables + +`app/entrypoint.sh` uses placeholders: + +```bash +KMS_URL="__KMS_URL__" +APP_ID="__APP_ID__" +``` + +Do not hardcode manually; pass via build inputs. + +### 3.2 Local Build + +```bash +DSTACK_UTIL=/path/to/dstack-util \ +KMS_URL=https://your-kms-domain:12001 \ +APP_ID=0xYOUR_APP_ID \ + ./scripts/build-eif.sh +``` + +or source build variant: + +```bash +KMS_URL=https://your-kms-domain:12001 \ +APP_ID=0xYOUR_APP_ID \ +DSTACK_COMMIT=14963a2ccb0ec7bef8a496c1ac5ac40f5593145d \ + ./scripts/build-eif.sh +``` + +### 3.3 GitHub Actions Build + +```bash +git add . +git commit -m "configure enclave app" +git push origin main +git tag v0.1.0 +git push origin v0.1.0 +``` + +### 3.4 Output + +`./output/`: + +- `enclave.eif` +- `measurements.json` +- `measurements.sigstore.json` (CI) + +Output includes: + +- `PCR0`, `PCR1`, `PCR2` +- `OS_IMAGE_HASH = sha256(PCR0 || PCR1 || PCR2)` + +--- + +## Step 4: Register OS_IMAGE_HASH On-chain + +Before key retrieval, register measured image hash. + +### Development + +```bash +cd dstack/kms/auth-eth +npx hardhat kms:add-image --network +``` + +### Production + +Use governance/multisig/timelock flow. See [Register Workload Measurements](register-enclave-measurement). + +### App policy check (important) + +For successful `GetAppKey`, app policy must also allow your runtime attestation: + +- compose hash/image hash allowed (`app:add-hash`) +- device policy satisfied (either `allowAnyDevice=true` or device explicitly added) + +Useful commands: + +```bash +npx hardhat app:add-hash --app-id --network +# Choose one: +npx hardhat app:set-allow-any-device --app-id true --network +# or +npx hardhat app:add-device --app-id --network +``` + +--- + +## Step 5: Deploy on EC2 + +### 5.1 Launch Nitro-capable instance + +| Setting | Value | +|---------|-------| +| Instance type | `c5.xlarge` or larger (c5/m5/r5 families) | +| AMI | Amazon Linux 2023 | +| Nitro Enclaves | enabled | + +### 5.2 Install Nitro tooling + +```bash +sudo yum install -y aws-nitro-enclaves-cli +sudo systemctl enable nitro-enclaves-allocator.service +sudo systemctl start nitro-enclaves-allocator.service +``` + +### 5.3 Allocator resources + +`/etc/nitro_enclaves/allocator.yaml`: + +```yaml +memory_mib: 2048 +cpu_count: 2 +``` + +```bash +sudo systemctl restart nitro-enclaves-allocator.service +``` + +### 5.4 Run EIF + +```bash +gh release download v0.1.0 -p 'enclave.eif' +nitro-cli run-enclave --eif-path enclave.eif --cpu-count 2 --memory 2048 +``` + +### 5.5 Verify + +```bash +nitro-cli describe-enclaves +nitro-cli console +``` + +--- + +## Entrypoint Notes + +The template uses `dstack-util get-keys` and a VSOCK proxy for key retrieval. Keys are returned to `/var/run/dstack/keys.json` inside the enclave, or captured on the host via VSOCK in helper scripts. + +> **Important:** The KMS URL, APP_ID, and `root_ca.pem` are all baked into the enclave image and affect the measured hash. You must use the exact same inputs between `--show-mrs` preview and the real run — any difference produces a different `OS_IMAGE_HASH`. + +--- + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| `Boot denied: OS image is not allowed` | register the exact runtime OS_IMAGE_HASH; ensure runtime build inputs match show-mrs inputs | +| Raw IP in KMS URL causes TLS/SAN issues | use DNS name (`kms.example.com`), not bare IP | +| `get_keys.sh` completes but key file is empty | treat as failure; inspect enclave console and KMS policy response | +| `DEBUG_ENCLAVE=1` run cannot pass policy | expected for production validation; debug mode alters attestation behavior and should be used only for diagnostics | +| `Insufficient CPUs available in the pool` | terminate leftover enclaves and adjust allocator CPU count | + +--- + +## Differences from GCP + +Nitro and GCP take fundamentally different approaches. On GCP, the Guest Agent handles attestation, key management, and automatic disk encryption. On Nitro, `dstack-util` only retrieves the key — your application decides what to encrypt. + +For a full comparison, see the [overview](overview) and [KMS and Key Delivery](kms-and-key-delivery). + +--- + +## Next Steps + +- [Register Workload Measurements](register-enclave-measurement) +- [Run a dstack-kms on GCP](run-kms-on-gcp) +- [Nitro Enclave concept](/dstack-cloud/nitro-enclave) diff --git a/dstack-cloud/runbook.mdx b/dstack-cloud/runbook.mdx new file mode 100644 index 0000000..7a3a734 --- /dev/null +++ b/dstack-cloud/runbook.mdx @@ -0,0 +1,267 @@ +--- +title: Troubleshooting Runbook +description: Troubleshooting runbook for common dstack-cloud deployment issues. +--- + +# Troubleshooting Runbook + +When something breaks in a dstack-cloud deployment, the root cause usually falls into one of these categories: attestation mismatch, KMS unavailability, governance hold-up, or infrastructure issues. This runbook covers the most common failure modes and how to diagnose them. + +## RA-TLS Connection Failures + +### Symptoms +- Workload logs show "RA-TLS handshake failed" +- KMS logs show "connection from unverified peer" +- Workload cannot obtain keys + +### Diagnosis +```bash +# Check workload logs +dstack-cloud logs + +# Check KMS logs +cd kms-prod +dstack-cloud logs +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| Workload attestation invalid | Verify the workload's measurements match what is registered on-chain. Run `dstack-cloud status` to get current measurements. | +| KMS attestation invalid | Verify KMS is running in a genuine TEE. Check `dstack-cloud status` for the KMS instance. | +| Clock skew between workload and KMS | RA-TLS requires relatively synchronized clocks. Check NTP configuration on both sides. | +| Certificate expired | Check that the RA-TLS certificates have not expired. Restart the CVM to regenerate. | + +## Attestation Verification Failures + +### Symptoms +- KMS refuses to dispatch keys +- Logs show "measurement not authorized" or "attestation verification failed" + +### Diagnosis +```bash +# Get current measurements +dstack-cloud status +# Note the RTMR3 / OS_IMAGE_HASH + +# Check on-chain authorization +cast call \ + "isAuthorized(bytes32)(bool)" \ + 0xYOUR_MEASUREMENT_HASH \ + --rpc-url $RPC_URL +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| Measurement not registered on-chain | Register the measurement via governance. See [Register Workload Measurements](/dstack-cloud/register-enclave-measurement). | +| Measurement changed after update | Application code or Docker image changed. Register the new measurement. | +| KMS pointing to wrong contract | Verify `KMS_CONTRACT_ADDR` environment variable. | +| RPC returns stale state | Check RPC provider health. Switch to a backup RPC endpoint. | + +## CVM / Enclave Startup Failures + +### Symptoms +- `dstack-cloud deploy` succeeds but CVM exits immediately +- `dstack-cloud status` shows "ERROR" or "STOPPED" + +### Diagnosis +```bash +# Check logs for the reason +dstack-cloud logs + +# Check resource allocation (GCP) +gcloud compute instances describe + +# Check resource allocation (Nitro) +sudo amazon-nitro-enclaves-cli describe-enclaves +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| Insufficient memory | Allocate more memory. On GCP, use a larger machine type. On Nitro, increase `--memory` in `allocate-enclaves`. | +| Invalid Docker image | Verify the image exists and is accessible. Use SHA256 digests for pinned images. | +| Container crash loop | Check application logs. The container may have a runtime error. | +| OS image incompatible | Ensure the OS image version matches the dstack-cloud CLI version. | + +### GCP-specific + +| Cause | Fix | +|-------|-----| +| Confidential VM quota exceeded | Request quota increase in GCP Console. | +| VM not booting as TDX | Verify the VM has `confidential-compute: enabled` in GCP Console. | + +### Nitro-specific + +| Cause | Fix | +|-------|-----| +| Enclave image (EIF) too large | Reduce Docker image size. Use multi-stage builds. | +| Nitro driver not installed | Install: `sudo apt-get install -y aws-nitro-enclaves-cli` | +| Enclave resource limit exceeded | Run `allocate-enclaves` with higher values and retry. | + +## On-chain Authorization Failures + +### Symptoms +- KMS logs show "workload not authorized" +- Keys are not dispatched despite correct attestation + +### Diagnosis +```bash +# Check if measurement is authorized on-chain +cast call "isAuthorized(bytes32)(bool)" 0xHASH --rpc-url $RPC_URL + +# Check DstackKms contract state +cast call "owner()(address)" --rpc-url $RPC_URL +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| Measurement registered on wrong contract | Verify the KMS is configured to use the correct `DstackKms` address. | +| Governance transaction not yet executed | Check the Safe for pending transactions. Wait for timelock. | +| Measurement was revoked | Check the Safe transaction history. If revoked by mistake, re-register via governance. | + +## KMS Unavailable + +### Symptoms +- Workloads cannot connect to KMS +- `dstack-cloud status` shows KMS as stopped or unreachable + +### Diagnosis +```bash +# Check KMS status +cd kms-prod +dstack-cloud status + +# Check KMS logs +dstack-cloud logs + +# Test connectivity +curl -k https://:12001/health +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| KMS CVM stopped | Restart: `dstack-cloud start` | +| KMS bootstrap not completed | Complete the bootstrap procedure. See [Run a dstack-kms CVM on GCP](/dstack-cloud/run-kms-on-gcp). | +| Network issue | Verify firewall rules. Check VSOCK proxy on Nitro. | +| KMS out of memory | Allocate more resources. Check `dstack-cloud logs` for OOM errors. | + +## Governance Transactions Stuck + +### Symptoms +- Governance proposal not advancing +- Transaction in Safe queue not executing + +### Diagnosis + +1. Check the Safe web interface for transaction status +2. Check if the timelock has expired +3. Verify the Safe has sufficient gas + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| Not enough signatures | Contact missing signers. If a signer is unavailable, consider adding a new signer (requires governance). | +| Timelock not yet expired | Check the exact expiry time. Wait. | +| Safe out of gas | Send ETH to the Safe address. | +| Transaction will revert | Simulate the transaction before executing. The contract state may have changed since the proposal was created. Cancel and re-submit. | +| Stale transaction in queue | Cancel the stale transaction through the Safe interface. Submit a new one. | + +## VSOCK Proxy Failures (Nitro-specific) + +### Symptoms +- Enclave cannot reach KMS or external services +- `dstack-cloud logs` shows network timeout errors + +### Diagnosis +```bash +# Check if socat is running +ps aux | grep socat + +# Check VSOCK proxy logs +journalctl -u vsock-proxy -f # if running as systemd service + +# Test VSOCK connectivity from the host +echo "test" | socat - VSOCK-CONNECT:1:8000 +``` + +### Common Causes and Fixes + +| Cause | Fix | +|-------|-----| +| socat not running | Start the VSOCK proxy. Check `prelaunch.sh` for the proxy startup command. | +| Wrong VSOCK port | Verify the VSOCK port matches between the Enclave and the proxy. | +| socat crashed | Restart socat. Check system logs for crash reason. Consider running as a systemd service with auto-restart. | +| Port conflict | Another process is using the same port. Change the proxy port configuration. | + +## Emergency Operations + +### Revoke a Compromised Measurement + +1. Draft a governance transaction to remove the measurement from `DstackKms` +2. Request expedited approval from all signers +3. Wait for the timelock (cannot be bypassed) +4. Execute after the delay +5. Verify the measurement is no longer authorized + +### KMS Key Compromise + +If the KMS root key may have been compromised: + +1. Stop the KMS immediately: `dstack-cloud stop` +2. Audit all workloads that received keys from the compromised KMS +3. Rotate affected application keys +4. Deploy a new KMS instance with fresh measurements +5. Register the new KMS measurements on-chain +6. Revoke the old KMS measurements +7. Restart workloads against the new KMS + +### Full System Recovery + +1. Stop all CVMs and KMS instances +2. Verify blockchain state is consistent +3. Redeploy from known-good configuration +4. Re-register measurements if needed +5. Verify end-to-end key delivery +6. Review governance activity for suspicious transactions + +## Diagnostic Commands Cheat Sheet + +```bash +# Check deployment status +dstack-cloud status + +# View logs +dstack-cloud logs +dstack-cloud logs --follow +dstack-cloud logs --container + +# Check measurements +dstack-cloud status | grep -E "measurement|hash|rtmr" + +# On-chain queries (using cast) +cast call "isAuthorized(bytes32)(bool)" 0xHASH --rpc-url $RPC_URL +cast call "owner()(address)" --rpc-url $RPC_URL + +# GCP diagnostics +gcloud compute instances describe +gcloud logging read "resource.type=gce_instance" + +# Nitro diagnostics +sudo amazon-nitro-enclaves-cli describe-enclaves +sudo amazon-nitro-enclaves-cli allocate-enclaves --cpu-count 2 --memory 4096 +``` + +## Next Steps + +- **[Monitoring and Alerting](monitoring-alerting)** — Set up proactive monitoring +- **[Upgrade Procedures](upgrade)** — Upgrade versions to fix known issues diff --git a/dstack-cloud/security-model.mdx b/dstack-cloud/security-model.mdx new file mode 100644 index 0000000..5ad7dfa --- /dev/null +++ b/dstack-cloud/security-model.mdx @@ -0,0 +1,131 @@ +--- +title: Threat Model and Security Guarantees +description: Trust boundaries, threat categories, security guarantees, and residual risks for dstack-cloud deployments. +--- + +# Threat Model and Security Guarantees + +## Trust Boundaries + +Understanding what you must trust — and what's protected by hardware or blockchain — is the foundation of evaluating dstack-cloud's security. This page maps out trust boundaries, threat categories, and the guarantees the system provides. + +![Trust Boundaries](/images/dstack-cloud/trust-boundaries.png) + +### Untrusted + +| Entity | Assumption | +|--------|-----------| +| **Cloud platform** (GCP / AWS) | May attempt to read workload memory, inspect traffic, or modify configurations. TEE hardware prevents memory access. | +| **Host machine** (EC2 instance on Nitro) | Has root access to the host OS. Cannot access Enclave memory or modify Enclave code. | +| **Network attackers** | May intercept, modify, or replay network traffic. Defended by TLS / RA-TLS. | +| **RPC providers** | May return stale or malicious blockchain state. KMS should use multiple RPC sources. | + +### Protected by Hardware (TEE) + +| Entity | Protection | +|--------|-----------| +| **dstack CVM** (GCP workload) | Memory encrypted by TDX. Host and cloud platform cannot read or modify it. Guest Agent handles attestation and key management. | +| **Nitro Enclave** (AWS workload) | Memory encrypted by Nitro. Host and cloud platform cannot read or modify it. `dstack-util` handles attestation and key retrieval. | +| **dstack-kms** (KMS) | Runs in its own TEE. Keys are generated and stored inside; never exposed outside. | + +### Protected by Blockchain Consensus + +| Entity | Protection | +|--------|-----------| +| **On-chain contracts** (DstackKms, DstackApp) | Immutable unless governance process is followed. Changes require multisig + timelock. | + +### Partially Trusted + +| Entity | Risk | +|--------|------| +| **Multisig signers** | Can collude to push through unauthorized changes. Impact is limited by the signature threshold and timelock delay. | + +## Threat Categories + +### T1: Malicious Cloud Platform Operator or Compromised Host OS + +- **Attack:** Cloud provider or host OS administrator attempts to read workload memory or extract keys. +- **Impact:** Data breach, key compromise. +- **Mitigation:** TEE hardware encryption prevents memory access on both GCP (TDX) and AWS (Nitro Enclave). Attestation proves hardware authenticity. +- **Residual risk:** Microarchitectural side-channel attacks (speculative execution, etc.). See Residual Risks. + +### T3: Malicious or Compromised Workload + +- **Attack:** An attacker gains control of a workload container inside the CVM or Enclave. +- **Impact:** Data within that container is compromised. The attacker may try to escalate to the Guest Agent (GCP) or dstack-util (Nitro). +- **Mitigation:** Container isolation within the CVM/Enclave. The Guest Agent (GCP) or dstack-util (Nitro) validates attestation before delivering keys. +- **Residual risk:** If the attacker can modify the CVM/Enclave image itself, the measurements change and KMS will refuse to deliver keys. On Nitro, since encryption strategy is user-controlled, a compromised workload may misuse any keys it has already obtained. + +### T4: Man-in-the-Middle / Network Attack + +- **Attack:** Attacker intercepts communication between CVM and KMS, or between CVM and external services. +- **Impact:** Key interception, data theft, configuration tampering. +- **Mitigation:** All communication uses TLS or RA-TLS. RA-TLS additionally verifies both parties' attestation. +- **Residual risk:** TLS implementation vulnerabilities, certificate authority compromise. + +### T5: Compromised RPC Provider + +- **Attack:** Attacker operates a malicious RPC node that returns false blockchain state. +- **Impact:** KMS may accept unauthorized measurements or reject authorized ones. +- **Mitigation:** Use multiple independent RPC providers. KMS should verify blockchain state across sources. +- **Residual risk:** If all RPC providers are colluding or compromised. + +### T6: Compromised or Colluding Multisig Signers + +- **Attack:** Multiple signers collude to push through unauthorized governance changes (e.g., register malicious measurements). +- **Impact:** Unauthorized workloads receive keys from KMS. +- **Mitigation:** Signature threshold (≥ 2/3) limits the number of signers that must be compromised. Timelock provides a window for detection. +- **Residual risk:** If enough signers collude to meet the threshold, the system is compromised. + +### T7: Covert Deployer Attack + +- **Attack:** A workload deployer secretly modifies the application code after deployment. +- **Impact:** The workload behaves differently from what was approved. +- **Mitigation:** On-chain measurement registration. Any code change produces new measurements. KMS refuses to deliver keys to unregistered measurements. +- **Residual risk:** If the attacker can register the new measurements through governance without being detected. + +## Security Guarantees + +| Guarantee | Mechanism | +|-----------|-----------| +| **Keys never leave verified TEE** | KMS runs in its own TEE. Keys are generated, stored, and dispatched entirely within TEE. The cloud provider cannot access them. | +| **Only approved code receives keys** | Workload measurements must be registered on-chain. KMS verifies measurements before dispatching keys. | +| **Governance changes are auditable** | All governance actions go through Multisig + Timelock and are recorded on-chain. Anyone can verify the history. | +| **Memory is encrypted** | TEE hardware encrypts all memory. The host OS and cloud platform cannot read CVM (GCP) or Enclave (Nitro) memory. | +| **Code integrity is verifiable** | Attestation proves the exact code and configuration running in the TEE. External parties can independently verify. | + +## Residual Risks + +These are risks that the current architecture does not fully mitigate: + +| Risk | Description | Mitigation | +|------|-------------|------------| +| **Hardware side-channels** | TEE hardware may be vulnerable to microarchitectural side-channel attacks (e.g., Spectre, Meltdown variants). | Keep TCB (Trusted Computing Base) firmware updated. Monitor Intel / AWS security advisories. | +| **Smart contract vulnerabilities** | Bugs in DstackKms, DstackApp, or governance contracts could lead to unauthorized access. | Conduct formal smart contract audits. Use well-tested contract libraries (Safe, Timelock). | +| **KMS root key** | The KMS root key is currently a single point of trust within the KMS TEE. | Future plans include MPC (Multi-Party Computation) to distribute root key generation. | +| **Denial of service** | The cloud provider or host operator can shut down CVMs or Enclaves, denying service. | Use cross-region, cross-provider redundancy for high-availability deployments. | + +## Security Checklist for Deployments + +Before going to production, verify: + +**TEE and Attestation:** +- [ ] dstack OS image is built from audited source code +- [ ] All measurements (RTMR / OS_IMAGE_HASH) are registered on-chain +- [ ] TLS certificates are valid and properly configured + +**Governance:** +- [ ] Multisig signers are using hardware wallets +- [ ] Signature threshold is ≥ 2/3 +- [ ] Timelock delay is appropriate for your risk profile + +**Operations:** +- [ ] Multiple independent RPC providers are configured +- [ ] Monitoring and alerting are set up for attestation failures and governance events +- [ ] Runbook exists for common failure scenarios + +## Next Steps + +- **[Glossary](/dstack-cloud/glossary)** — Definitions of security-related terms +- **[Runbook](/dstack-cloud/runbook)** — Troubleshooting security-related issues +- **[dstack Security Model](https://github.com/Phala-Network/dstack-cloud/blob/master/docs/security/security-model.md)** — Official security model document diff --git a/dstack-cloud/upgrade.mdx b/dstack-cloud/upgrade.mdx new file mode 100644 index 0000000..75aaaf7 --- /dev/null +++ b/dstack-cloud/upgrade.mdx @@ -0,0 +1,214 @@ +--- +title: Upgrade Procedures +description: Upgrade procedures for KMS images, CVM applications, and smart contracts. +--- + +# Upgrade Procedures + +Upgrading in a TEE environment is different from normal cloud upgrades: changing your Docker image or OS version changes the measurements, which means you need to go through governance before KMS will deliver keys to the new version. This page covers the upgrade procedures for KMS, CVMs/Enclaves, and smart contracts. + +## Pre-upgrade Checklist + +Before any upgrade: + +- [ ] Review the changelog for the new version +- [ ] Test the upgrade in a non-production environment first +- [ ] Ensure you have access to all required signers (for governance changes) +- [ ] Back up current configuration (`app.json`, `docker-compose.yaml`, `.env`) +- [ ] Verify monitoring and alerting is operational +- [ ] Notify stakeholders of planned downtime (if applicable) +- [ ] Prepare a rollback plan + +## KMS Image Upgrade + +### When to Upgrade + +- New dstack-cloud version available with security fixes +- New KMS features needed +- KMS vulnerability disclosed + +### Procedure (GCP) + +1. **Pull the new OS image:** + ```bash + dstack-cloud pull --os-image dstack-cloud-0.7.0 + ``` + +2. **Stop the current KMS CVM:** + ```bash + cd kms-prod + dstack-cloud stop + ``` + +3. **Update the OS image reference in `app.json`:** + ```json + { + "os_image": "dstack-cloud-0.7.0" + } + ``` + +4. **Redeploy:** + ```bash + dstack-cloud deploy + ``` + +5. **Re-bootstrap if the KMS image changed:** + - If the new KMS image produces different measurements, you must register them on-chain before KMS will work + - Follow the bootstrap procedure in [Run a dstack-kms CVM on GCP](/dstack-cloud/run-kms-on-gcp) + +6. **Verify:** + ```bash + dstack-cloud status + dstack-cloud logs --follow + ``` + +### Procedure (Nitro) + +The process is similar, but note: + +- The Enclave is stateless — after stopping and restarting, key material must be re-derived +- Ensure the VSOCK proxy is running before deploying the new Enclave +- New measurements may require on-chain registration + +### Rollback + +1. Stop the new KMS CVM: `dstack-cloud stop` +2. Revert `app.json` to the previous OS image version +3. Redeploy: `dstack-cloud deploy` +4. If measurements changed during upgrade, the old measurements should still be registered on-chain + +## CVM / Enclave Application Upgrade + +### When Upgrading Requires Governance + +If your application upgrade changes the measurements (different Docker images, different `docker-compose.yaml`, different dstack OS version), you must go through governance: + +1. **Build and deploy the updated CVM/Enclave** (in a test environment first) +2. **Extract the new measurements:** + ```bash + dstack-cloud status + # Note the RTMR3 / OS_IMAGE_HASH + ``` + +3. **Register new measurements on-chain:** + - See [Register Workload Measurements](/dstack-cloud/register-enclave-measurement) + +4. **Wait for governance approval and timelock** + +5. **Deploy to production:** + ```bash + dstack-cloud deploy + ``` + +### When Upgrading Does NOT Require Governance + +If the upgrade only changes application logic without changing the Docker images or configuration (e.g., updating application code via a mounted volume, which is not possible in TEE), no governance is needed. In practice, most upgrades in a TEE environment require governance because the code is measured at build time. + +### Procedure (GCP) + +1. Update `docker-compose.yaml` with new image versions: + ```yaml + services: + web: + image: my-app:v2.0.0@sha256:abcd1234... + ``` + +2. Redeploy: + ```bash + dstack-cloud deploy + ``` + +3. Verify: + ```bash + dstack-cloud logs --follow + ``` + +### Procedure (Nitro) + +1. Update `docker-compose.yaml` +2. Redeploy: + ```bash + dstack-cloud deploy + ``` +3. The Enclave is rebuilt from the new image +4. Verify via VSOCK proxy + +## Smart Contract Upgrade + +### When to Upgrade + +- Security vulnerability in contract code +- New governance feature needed +- Bug fix in contract logic + +### Procedure (Using Proxy Pattern) + +If contracts use an upgradeable proxy pattern: + +1. **Deploy the new implementation contract:** + ```bash + npx hardhat run scripts/deploy-kms-v2.ts --network + ``` + +2. **Draft a governance transaction** to call `proxy.upgradeTo(newImplementation)`: + - Use the Safe web interface + - The transaction must go through multisig + timelock + +3. **Test the upgrade on testnet first:** + - Deploy new implementation on testnet + - Run through governance on testnet + - Verify the upgraded contract works as expected + +4. **Execute governance on mainnet:** + - Collect signatures + - Wait for timelock + - Execute + +5. **Verify:** + ```bash + # Check the implementation address + cast call "implementation()(address)" --rpc-url $RPC_URL + ``` + +### Rollback + +To rollback, follow the same procedure with the previous implementation address. + +### Important Notes + +- The proxy admin must be the governance Safe — never an EOA +- Upgrades cannot be rushed — the timelock delay applies +- Plan upgrades during low-traffic windows +- Have a tested rollback procedure ready before upgrading + +## Coordinated Upgrade: Application + Measurements + Contracts + +A full-stack upgrade requires careful coordination because each step may depend on the previous one completing. The general order is: + +1. Test new application + new KMS + new contracts in staging +2. Deploy new contracts to mainnet (through governance) +3. Register new measurements on-chain (through governance) +4. Wait for all governance actions to complete +5. Deploy new KMS image to production +6. Deploy new application CVMs to production +7. Verify end-to-end key delivery +8. Optionally revoke old measurements after verification + +## Downtime Considerations + +| Component | Downtime During Upgrade | +|-----------|------------------------| +| KMS (GCP) | 1-2 minutes (stop + redeploy) | +| CVM (GCP) | 2-3 minutes | +| CVM (Nitro) | 30 seconds to 1 minute | +| Smart contracts | No downtime (upgrade is a transaction) | + +To minimize downtime: +- Deploy new CVMs first, then switch traffic +- Keep old CVMs running until new ones are verified +- For KMS, consider running multiple instances for high availability + +## Next Steps + +- **[Monitoring and Alerting](monitoring-alerting)** — Monitor upgrade impact +- **[Runbook](runbook)** — Troubleshoot upgrade issues diff --git a/images/dstack-cloud/architecture-overview-v2.png b/images/dstack-cloud/architecture-overview-v2.png new file mode 100644 index 0000000..92b79e7 Binary files /dev/null and b/images/dstack-cloud/architecture-overview-v2.png differ diff --git a/images/dstack-cloud/cvm-deployment-flow.png b/images/dstack-cloud/cvm-deployment-flow.png new file mode 100644 index 0000000..e5d86db Binary files /dev/null and b/images/dstack-cloud/cvm-deployment-flow.png differ diff --git a/images/dstack-cloud/governance-workflow.png b/images/dstack-cloud/governance-workflow.png new file mode 100644 index 0000000..c743bb4 Binary files /dev/null and b/images/dstack-cloud/governance-workflow.png differ diff --git a/images/dstack-cloud/key-request-flow.png b/images/dstack-cloud/key-request-flow.png new file mode 100644 index 0000000..81a16f6 Binary files /dev/null and b/images/dstack-cloud/key-request-flow.png differ diff --git a/images/dstack-cloud/kms-key-delivery-gcp-v5.png b/images/dstack-cloud/kms-key-delivery-gcp-v5.png new file mode 100644 index 0000000..1910e21 Binary files /dev/null and b/images/dstack-cloud/kms-key-delivery-gcp-v5.png differ diff --git a/images/dstack-cloud/kms-key-delivery-nitro-v5.png b/images/dstack-cloud/kms-key-delivery-nitro-v5.png new file mode 100644 index 0000000..7b2cfd5 Binary files /dev/null and b/images/dstack-cloud/kms-key-delivery-nitro-v5.png differ diff --git a/images/dstack-cloud/measurement-registration-flow.png b/images/dstack-cloud/measurement-registration-flow.png new file mode 100644 index 0000000..64a5500 Binary files /dev/null and b/images/dstack-cloud/measurement-registration-flow.png differ diff --git a/images/dstack-cloud/nitro-enclave-architecture-v2.png b/images/dstack-cloud/nitro-enclave-architecture-v2.png new file mode 100644 index 0000000..2a1c7fa Binary files /dev/null and b/images/dstack-cloud/nitro-enclave-architecture-v2.png differ diff --git a/images/dstack-cloud/trust-boundaries.png b/images/dstack-cloud/trust-boundaries.png new file mode 100644 index 0000000..31f3701 Binary files /dev/null and b/images/dstack-cloud/trust-boundaries.png differ diff --git a/images/dstack-cloud/vsock-communication-v2.png b/images/dstack-cloud/vsock-communication-v2.png new file mode 100644 index 0000000..848ef79 Binary files /dev/null and b/images/dstack-cloud/vsock-communication-v2.png differ