diff --git a/cli/README.md b/cli/README.md index b7222851..e1093a4f 100644 --- a/cli/README.md +++ b/cli/README.md @@ -32,6 +32,10 @@ phala logs # SSH into the CVM phala ssh + +# List all replicas for one app, then target one by UUID +phala cvms list --app app_abc123 +phala cvms restart 550e8400-e29b-41d4-a716-446655440000 ``` > **Tip:** Run `phala link` after your first deploy. It creates a `phala.toml` that binds the directory to the CVM, so subsequent commands (`deploy`, `logs`, `ssh`, `cp`, `ps`) work without specifying a CVM ID. `phala.toml` is safe to commit to version control. diff --git a/cli/docs/cvms.md b/cli/docs/cvms.md index 57415f4f..8525f9e9 100644 --- a/cli/docs/cvms.md +++ b/cli/docs/cvms.md @@ -21,6 +21,7 @@ List CVMs in your workspace with filtering and pagination. | Flag | Default | Description | |------|---------|-------------| +| `--app ` | | List all CVMs belonging to a specific app | | `--page` | 1 | Page number for pagination | | `--page-size` | 30 | Number of items per page | | `--search ` | | Search CVMs by name or ID | @@ -36,6 +37,7 @@ List CVMs in your workspace with filtering and pagination. #### Examples $ phala cvms list + $ phala cvms list --app app_abc123 $ phala cvms ls --status running --status starting $ phala cvms list --search my-app --json $ phala cvms ls --region us-west --page 2 @@ -67,6 +69,7 @@ Get detailed information about a specific CVM. $ phala cvms get $ phala cvms get app_abc123 + $ phala cvms get 550e8400-e29b-41d4-a716-446655440000 $ phala cvms get --json $ phala cvms get --interactive diff --git a/cli/src/commands/cvms/get/index.ts b/cli/src/commands/cvms/get/index.ts index 2b58d4fd..4a7ca961 100644 --- a/cli/src/commands/cvms/get/index.ts +++ b/cli/src/commands/cvms/get/index.ts @@ -59,6 +59,8 @@ async function runCvmsGetCommand( logger.keyValueTable({ Name: cvm.name, "App ID": `app_${cvm.app_id}`, + "VM UUID": cvm.vm_uuid ?? "N/A", + "Instance ID": cvm.instance_id ?? "N/A", Status: statusColour, vCPU: cvm.resource.vcpu, Memory: diff --git a/cli/src/commands/cvms/list/command.ts b/cli/src/commands/cvms/list/command.ts index b78ae153..cda1726b 100644 --- a/cli/src/commands/cvms/list/command.ts +++ b/cli/src/commands/cvms/list/command.ts @@ -8,6 +8,13 @@ export const cvmsListCommandMeta: CommandMeta = { description: "List CVMs", stability: "unstable", options: [ + { + name: "app", + description: "List all CVMs belonging to a specific app_id", + type: "string", + target: "appId", + group: "basic", + }, { name: "page", description: "Page number (1-based)", @@ -86,6 +93,10 @@ export const cvmsListCommandMeta: CommandMeta = { name: "List CVMs", value: "phala cvms ls", }, + { + name: "List all replicas for an app", + value: "phala cvms ls --app app_123", + }, { name: "Second page", value: "phala cvms ls --page 2", @@ -106,6 +117,7 @@ export const cvmsListCommandMeta: CommandMeta = { }; export const cvmsListCommandSchema = z.object({ + appId: z.string().optional(), page: z.coerce.number().int().min(1).default(1), pageSize: z.coerce.number().int().min(1).max(100).default(50), search: z.string().optional(), diff --git a/cli/src/commands/cvms/list/index.ts b/cli/src/commands/cvms/list/index.ts index 7dfacc3a..cc41eecb 100644 --- a/cli/src/commands/cvms/list/index.ts +++ b/cli/src/commands/cvms/list/index.ts @@ -1,4 +1,5 @@ import chalk from "chalk"; +import { safeGetAppCvms, safeGetCvmStatusBatch } from "@phala/cloud"; import { defineCommand } from "@/src/core/define-command"; import type { CommandContext } from "@/src/core/types"; import { getClient } from "@/src/lib/client"; @@ -12,6 +13,21 @@ import { type CvmsListCommandInput, } from "./command"; +interface AppScopedListResult { + page: number; + pageSize: number; + total: number; + totalPages: number; + items: Array<{ + appId: string; + vmUuid?: string | null; + instanceId?: string | null; + cvmName: string; + status: string; + uptime?: string | null; + }>; +} + function formatStatus(status: string): string { if (status.toLowerCase().endsWith("ing")) return chalk.yellow(status); if (status === "running") return chalk.green(status); @@ -19,40 +35,239 @@ function formatStatus(status: string): string { return chalk.yellow(status); } +function getAppScopedVmUuid(cvm: Record): string | null { + if (typeof cvm.vm_uuid === "string" && cvm.vm_uuid.length > 0) { + return cvm.vm_uuid; + } + + const hosted = cvm.hosted; + if ( + hosted && + typeof hosted === "object" && + "id" in hosted && + typeof hosted.id === "string" && + hosted.id.length > 0 + ) { + return hosted.id; + } + + return null; +} + +function getAppScopedInstanceId(cvm: Record): string | null { + if (typeof cvm.instance_id === "string" && cvm.instance_id.length > 0) { + return cvm.instance_id; + } + + const hosted = cvm.hosted; + if ( + hosted && + typeof hosted === "object" && + "instance_id" in hosted && + typeof hosted.instance_id === "string" && + hosted.instance_id.length > 0 + ) { + return hosted.instance_id; + } + + return null; +} + +function getAppScopedAppId(cvm: Record): string | null { + if (typeof cvm.app_id === "string" && cvm.app_id.length > 0) { + return cvm.app_id; + } + + const hosted = cvm.hosted; + if ( + hosted && + typeof hosted === "object" && + "app_id" in hosted && + typeof hosted.app_id === "string" && + hosted.app_id.length > 0 + ) { + return hosted.app_id; + } + + return null; +} + +function matchesAppScopedFilters( + cvm: Record, + input: CvmsListCommandInput, +): boolean { + if (input.search) { + const needle = input.search.toLowerCase(); + const haystacks = [ + typeof cvm.name === "string" ? cvm.name : "", + getAppScopedAppId(cvm) ?? "", + getAppScopedVmUuid(cvm) ?? "", + getAppScopedInstanceId(cvm) ?? "", + ]; + if (!haystacks.some((value) => value.toLowerCase().includes(needle))) { + return false; + } + } + + if ( + input.status && + input.status.length > 0 && + (typeof cvm.status !== "string" || !input.status.includes(cvm.status)) + ) { + return false; + } + + if (input.listed !== undefined && Boolean(cvm.listed) !== input.listed) { + return false; + } + + if ( + input.baseImage && + (!cvm.os || + typeof cvm.os !== "object" || + !("name" in cvm.os) || + cvm.os.name !== input.baseImage) + ) { + return false; + } + + if ( + input.instanceType && + (!cvm.resource || + typeof cvm.resource !== "object" || + !("instance_type" in cvm.resource) || + cvm.resource.instance_type !== input.instanceType) + ) { + return false; + } + + if (input.kmsType && cvm.kms_type !== input.kmsType) { + return false; + } + + if ( + input.node && + (!cvm.node_info || + typeof cvm.node_info !== "object" || + !("name" in cvm.node_info) || + cvm.node_info.name !== input.node) + ) { + return false; + } + + if ( + input.region && + !((cvm.node_info && + typeof cvm.node_info === "object" && + "region" in cvm.node_info && + cvm.node_info.region === input.region) || + (cvm.node && + typeof cvm.node === "object" && + "region_identifier" in cvm.node && + cvm.node.region_identifier === input.region)) + ) { + return false; + } + + return true; +} + +async function listCvmsForApp( + input: CvmsListCommandInput, +): Promise { + const client = await getClient(); + const appId = input.appId?.replace(/^app_/, ""); + if (!appId) { + throw new Error("App ID is required"); + } + + const cvmsResult = await safeGetAppCvms(client as never, { appId }); + if (!cvmsResult.success) { + throw new Error(cvmsResult.error.message); + } + + const filtered = cvmsResult.data.filter((cvm) => + matchesAppScopedFilters(cvm as Record, input), + ); + + const vmUuids = filtered + .map((cvm) => getAppScopedVmUuid(cvm as Record)) + .filter( + (uuid): uuid is string => typeof uuid === "string" && uuid.length > 0, + ); + + const statusBatch = + vmUuids.length > 0 + ? await safeGetCvmStatusBatch(client as never, { vmUuids }) + : { success: true as const, data: {} }; + + if (!statusBatch.success) { + throw new Error(statusBatch.error.message); + } + + const start = (input.page - 1) * input.pageSize; + const paged = filtered.slice(start, start + input.pageSize); + + return { + page: input.page, + pageSize: input.pageSize, + total: filtered.length, + totalPages: filtered.length === 0 ? 1 : Math.ceil(filtered.length / input.pageSize), + items: paged.map((cvm) => { + const normalized = cvm as Record; + const vmUuid = getAppScopedVmUuid(normalized); + const batch = vmUuid ? statusBatch.data[vmUuid] : undefined; + return { + appId: getAppScopedAppId(normalized) ?? `app_${appId}`, + vmUuid, + instanceId: getAppScopedInstanceId(normalized), + cvmName: cvm.name, + status: batch?.status ?? cvm.status, + uptime: batch?.uptime, + }; + }), + }; +} + async function runCvmsListCommand( input: CvmsListCommandInput, context: CommandContext, ): Promise { try { - const client = await getClient(); - const result = await listAppsWithCvmStatus(client as never, { - page: input.page, - pageSize: input.pageSize, - search: input.search, - status: input.status, - listed: input.listed, - baseImage: input.baseImage, - instanceType: input.instanceType, - kmsType: input.kmsType, - node: input.node, - region: input.region, - }); - - if (result.success === false) { - context.fail(result.error.message); - return 1; - } + const data = input.appId + ? await listCvmsForApp(input) + : await (async () => { + const client = await getClient(); + const result = await listAppsWithCvmStatus(client as never, { + page: input.page, + pageSize: input.pageSize, + search: input.search, + status: input.status, + listed: input.listed, + baseImage: input.baseImage, + instanceType: input.instanceType, + kmsType: input.kmsType, + node: input.node, + region: input.region, + }); + + if (result.success === false) { + throw new Error(result.error.message); + } - const data = result.data; + return result.data; + })(); if (input.json) { context.success(data); return 0; } - const columns = ["APP_ID", "CVM", "STATUS", "UPTIME"] as const; + const columns = ["APP_ID", "VM_UUID", "INSTANCE_ID", "CVM", "STATUS", "UPTIME"] as const; const rows = data.items.map((item) => ({ APP_ID: item.appId, + VM_UUID: item.vmUuid ?? "-", + INSTANCE_ID: item.instanceId ?? "-", CVM: item.cvmName, STATUS: formatStatus(item.status), UPTIME: item.uptime ?? "-", diff --git a/cli/src/lib/apps/list-apps-with-cvm-status.ts b/cli/src/lib/apps/list-apps-with-cvm-status.ts index 9a881427..d528328e 100644 --- a/cli/src/lib/apps/list-apps-with-cvm-status.ts +++ b/cli/src/lib/apps/list-apps-with-cvm-status.ts @@ -17,6 +17,8 @@ export interface AppsListWithStatusOptions { export interface AppCvmRow { readonly appId: string; + readonly vmUuid?: string | null; + readonly instanceId?: string | null; readonly cvmName: string; readonly status: string; readonly uptime?: string | null; @@ -112,6 +114,8 @@ export async function listAppsWithCvmStatus( rows.push({ appId: app.app_id, + vmUuid: currentCvm.vm_uuid ?? null, + instanceId: currentCvm.instance_id ?? null, cvmName: currentCvm.name, status, uptime: batch?.uptime, diff --git a/docs/issue-243-stateful-primitives-plan.md b/docs/issue-243-stateful-primitives-plan.md new file mode 100644 index 00000000..6c5b583d --- /dev/null +++ b/docs/issue-243-stateful-primitives-plan.md @@ -0,0 +1,60 @@ +# Issue #243: Per-instance stateful replica primitives + +## Summary + +This repository contains the public CLI, SDK, and docs for Phala Cloud, but not the cloud backend service that would implement new app lifecycle semantics. The first useful public slice should therefore expose the per-instance shape that already exists in the API responses, while deferring new control-plane behavior that requires backend and likely scheduler/reconciler work. + +## What is already exposed today + +- `GET /apps/{appId}` already returns `cvms[]` and `cvm_count` in the SDK schemas used by this repo. +- `GET /apps/{appId}/cvms` already returns per-CVM rows. +- CVM payloads in the SDK schemas already include `instance_id`, `vm_uuid`, `app_id`, resource info, and status. +- Existing per-CVM lifecycle operations already exist for a resolved CVM identity: + - restart + - stop + - start + - delete + +That means the API already has enough read surface for operators to discover stable replica identity, and enough write surface for a first round of manual per-instance operations once a replica is addressed by UUID / instance ID instead of only by app-level views. + +## Proposed phased implementation + +### Phase 1: expose per-instance identity in public tools + +Scope for this repo: + +- fix CLI replica visibility so `phala cvms list` shows every replica rather than only `current_cvm` +- surface `instance_id` and `vm_uuid` in `phala cvms get` +- document the current per-replica operational path: + - discover replicas from `apps/{appId}/cvms` + - target one replica via UUID / instance ID + - use existing restart / stop / delete operations on that specific CVM + +This is a safe first PR because it is grounded in response fields already present in this repo's SDK schemas and does not invent new public API contracts. + +### Phase 2: new app-level stateful rollout primitives + +Likely requires backend work outside this repository: + +- `update_policy` persisted on app spec +- ordered rollout enforcement with at least `max_unavailable = 0` +- workload lifecycle hook plumbing +- health-check-driven auto-heal / reconcile +- explicit preserved-state metadata on instance responses if not already sourced from backend models + +These are not just CLI/SDK additions. They imply control-plane behavior, state transitions, rollout orchestration, and probably DB/API changes in the internal cloud service. + +## Open API questions to settle before provider work + +1. Is the persisted dstack instance identity considered stable and supported as a first-class public field for operators? +2. Should per-instance operations be modeled as: + - app-scoped resources (`/apps/{appId}/instances/...`), or + - CVM-scoped operations over existing instance rows? +3. Where should preserved-state metadata live in the public response shape: + - on each instance row directly, or + - on a future app-instance resource? +4. Should rollout semantics live only on the app spec, or also expose an explicit rollout/revision resource? + +## Why stop Phase 1 here + +Without backend code in this repo, adding SDK/CLI methods for hypothetical endpoints would be guesswork. The public contract needs maintainer confirmation before shipping anything beyond the already-exposed per-instance read/write surface.