From 54335e8f419a6a7ca599ffacb50ad36d892819eb Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:08:33 +0800 Subject: [PATCH 1/6] Implement RunPod cloud adapter --- packages/cloud/runpod/README.md | 17 +- packages/cloud/runpod/src/index.test.ts | 242 +++++++++++++- packages/cloud/runpod/src/index.ts | 420 +++++++++++++++++++++--- 3 files changed, 630 insertions(+), 49 deletions(-) diff --git a/packages/cloud/runpod/README.md b/packages/cloud/runpod/README.md index cc8be9f2..1c4e9b73 100644 --- a/packages/cloud/runpod/README.md +++ b/packages/cloud/runpod/README.md @@ -4,10 +4,19 @@ Provides the RunPod (GPU) cloud provider adapter for sh1pt scale and deploy work ## What it does -- Connects cloud provider credentials and project settings. -- Supports infrastructure planning, deployment, or status workflows where implemented. -- Includes a connection flow for account or credential setup. -- Includes setup guidance for required credentials or provider configuration. +- Connects to RunPod through the GraphQL API with `RUNPOD_API_KEY`. +- Quotes GPU pods from either explicit `hourlyPrice` config or RunPod `gpuTypes` pricing. +- Provisions on-demand GPU pods with `podFindAndDeployOnDemand`. +- Lists account pods, checks a pod by ID, and terminates pods with `podTerminate`. +- Requires `imageName` for real provisioning and supports `maxHourlyPrice` guardrails before any pod is created. + +Common config fields: + +- `gpuTypeId`: RunPod GPU type ID, such as `NVIDIA RTX A6000`. +- `imageName`: container image for real pod creation, such as `runpod/pytorch`. +- `cloudType`: `ALL`, `COMMUNITY`, or `SECURE`. +- `hourlyPrice`: optional explicit offline quote value per GPU. +- `ports`, `dockerArgs`, `volumeInGb`, `containerDiskInGb`, `minVcpuCount`, `minMemoryInGb`, `volumeMountPath`, `networkVolumeId`, and `env`. ## Package diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index 74d1cb34..346f3e2c 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -1,8 +1,242 @@ import { contractTestCloud } from '@profullstack/sh1pt-core/testing'; -import cloud from './index.js'; +import { afterEach, describe, expect, it, vi } from 'vitest'; +import adapter from './index.js'; -contractTestCloud(cloud, { - sampleConfig: { cloudType: 'COMMUNITY' }, - sampleSpec: { kind: 'gpu', gpu: { model: 'A100-40GB', count: 1 } }, +const API = 'https://api.runpod.io/graphql'; + +afterEach(() => { + vi.unstubAllGlobals(); +}); + +describe('RunPod cloud adapter', () => { + it('connects by querying the authenticated RunPod account', async () => { + const fetchMock = vi.fn(async (url: string, init: RequestInit) => { + expect(url).toBe(API); + expect(init.method).toBe('POST'); + expect(init.headers).toEqual(expect.objectContaining({ + Authorization: 'Bearer test-token', + 'Content-Type': 'application/json', + })); + expect(JSON.parse(String(init.body)).query).toContain('myself'); + return graphql({ myself: { id: 'user-1', email: 'ops@example.com' } }); + }); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.connect(connectCtx(), {})).resolves.toEqual({ accountId: 'user-1' }); + }); + + it('quotes from configured hourly pricing without calling RunPod', async () => { + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + const quote = await adapter.quote( + connectCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 2 } }, + { hourlyPrice: 0.49, gpuTypeId: 'NVIDIA RTX A6000', cloudType: 'COMMUNITY' }, + ); + + expect(quote).toMatchObject({ + hourly: 0.98, + monthly: 715.4, + provider: 'runpod', + currency: 'USD', + sku: 'NVIDIA RTX A6000 x2', + availabilityZone: 'COMMUNITY', + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('quotes from RunPod GPU type pricing when hourlyPrice is omitted', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ + gpuTypes: [{ + id: 'NVIDIA RTX A6000', + displayName: 'RTX A6000', + communityPrice: 0.44, + securePrice: 0.79, + communitySpotPrice: 0.22, + }], + }))); + + const quote = await adapter.quote( + connectCtx(), + { kind: 'gpu', gpu: { model: 'RTX A6000', count: 2 }, spotOk: true }, + { cloudType: 'COMMUNITY' }, + ); + + expect(quote.hourly).toBe(0.44); + expect(quote.spot).toBe(true); + }); + + it('creates a RunPod pod through GraphQL', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('podFindAndDeployOnDemand'); + expect(body.variables.input).toEqual({ + cloudType: 'SECURE', + gpuCount: 1, + gpuTypeId: 'NVIDIA RTX A6000', + name: 'trainer', + imageName: 'runpod/pytorch', + ports: '8888/http', + volumeInGb: 40, + containerDiskInGb: 20, + minVcpuCount: 4, + minMemoryInGb: 16, + env: [{ key: 'JUPYTER_PASSWORD', value: 'secret' }], + }); + return graphql({ + podFindAndDeployOnDemand: pod({ + id: 'pod-1', + name: 'trainer', + desiredStatus: 'RUNNING', + imageName: 'runpod/pytorch', + costPerHr: 0.79, + }), + }); + }); + vi.stubGlobal('fetch', fetchMock); + + const instance = await adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 }, cpu: 4, memory: 16, maxHourlyPrice: 1 }, + { + cloudType: 'SECURE', + gpuTypeId: 'NVIDIA RTX A6000', + imageName: 'runpod/pytorch', + name: 'trainer', + hourlyPrice: 0.79, + ports: '8888/http', + volumeInGb: 40, + containerDiskInGb: 20, + env: { JUPYTER_PASSWORD: 'secret' }, + }, + ); + + expect(instance).toMatchObject({ + id: 'pod-1', + kind: 'gpu', + status: 'running', + hourlyRate: 0.79, + sku: 'runpod/pytorch', + }); + }); + + it('requires imageName before creating a real pod', async () => { + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 } }, + { hourlyPrice: 0.5 }, + )).rejects.toThrow('config.imageName is required'); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('honors maxHourlyPrice before provisioning', async () => { + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 }, maxHourlyPrice: 0.01 }, + { hourlyPrice: 0.5, imageName: 'runpod/pytorch' }, + )).rejects.toThrow('exceeds maxHourlyPrice'); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('lists pods from the authenticated account', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ + myself: { + pods: [ + pod({ id: 'pod-1', name: 'trainer', desiredStatus: 'RUNNING', costPerHr: 0.44 }), + pod({ id: 'pod-2', name: 'stopped', desiredStatus: 'EXITED', costPerHr: 0 }), + ], + }, + }))); + + const instances = await adapter.list(connectCtx(), {}); + + expect(instances.map((instance) => [instance.id, instance.status])).toEqual([ + ['pod-1', 'running'], + ['pod-2', 'stopped'], + ]); + }); + + it('checks status for a single pod', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('query Pod'); + expect(body.variables).toEqual({ input: { podId: 'pod-1' } }); + return graphql({ pod: pod({ id: 'pod-1', desiredStatus: 'RUNNING', costPerHr: 0.44 }) }); + }); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.status(connectCtx(), 'pod-1', {})).resolves.toMatchObject({ + id: 'pod-1', + status: 'running', + publicIp: '203.0.113.10', + }); + }); + + it('terminates a pod through GraphQL', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('podTerminate'); + expect(body.variables).toEqual({ input: { podId: 'pod-1' } }); + return graphql({ podTerminate: null }); + }); + vi.stubGlobal('fetch', fetchMock); + + await adapter.destroy(provisionCtx(), 'pod-1', {}); + + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + it('reports GraphQL errors', async () => { + vi.stubGlobal('fetch', vi.fn(async () => new Response(JSON.stringify({ + errors: [{ message: 'not authorized' }], + }), { status: 200 }))); + + await expect(adapter.connect(connectCtx(), {})).rejects.toThrow('RunPod GraphQL failed: not authorized'); + }); +}); + +contractTestCloud(adapter, { + sampleConfig: { cloudType: 'COMMUNITY', hourlyPrice: 0.001 }, + sampleSpec: { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 } }, requiredSecrets: ['RUNPOD_API_KEY'], }); + +function connectCtx() { + return { + secret: (key: string) => key === 'RUNPOD_API_KEY' ? 'test-token' : undefined, + log: vi.fn(), + }; +} + +function provisionCtx(dryRun = false) { + return { + ...connectCtx(), + dryRun, + }; +} + +function graphql(data: unknown) { + return new Response(JSON.stringify({ data })); +} + +function pod(overrides: Record = {}) { + return { + id: 'pod-1', + name: 'trainer', + desiredStatus: 'RUNNING', + createdAt: '2026-06-14T00:00:00Z', + costPerHr: 0.44, + imageName: 'runpod/pytorch', + runtime: { + ports: [{ ip: '203.0.113.10', isIpPublic: true, publicPort: 8888, privatePort: 8888, type: 'http' }], + }, + ...overrides, + }; +} diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index 33ac9714..54e366d1 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -1,58 +1,230 @@ -import { defineCloud, tokenSetup, type Instance, type Quote, type InstanceSpec } from '@profullstack/sh1pt-core'; +import { + defineCloud, + tokenSetup, + type CloudConnectContext, + type Instance, + type InstanceSpec, + type ProvisionContext, + type Quote, +} from '@profullstack/sh1pt-core'; + +type Numberish = number | string; +type CloudType = 'ALL' | 'COMMUNITY' | 'SECURE'; -// RunPod — GPU-first, pay-by-the-second. GraphQL API. Two pod types: -// - Community Cloud: cheapest, non-SLA, host may reclaim -// - Secure Cloud: SLA, redundant, 1.5–2× the cost interface Config { - apiKey?: string; // stored as RUNPOD_API_KEY secret - cloudType?: 'COMMUNITY' | 'SECURE'; + apiBaseUrl?: string; + cloudType?: CloudType; + gpuTypeId?: string; + imageName?: string; + name?: string; + hourlyPrice?: Numberish; + volumeInGb?: Numberish; + containerDiskInGb?: Numberish; + minVcpuCount?: Numberish; + minMemoryInGb?: Numberish; + dockerArgs?: string; + ports?: string; + volumeMountPath?: string; networkVolumeId?: string; + env?: Record; +} + +interface RunpodGraphqlResponse { + data?: T; + errors?: Array<{ message?: string }>; +} + +interface RunpodGpuType { + id?: string; + displayName?: string; + memoryInGb?: number; + communityPrice?: number; + securePrice?: number; + communitySpotPrice?: number; + secureSpotPrice?: number; +} + +interface RunpodPod { + id?: string; + name?: string; + desiredStatus?: string; + createdAt?: string; + lastStartedAt?: string; + costPerHr?: number; + adjustedCostPerHr?: number; + imageName?: string; + machineId?: string; + gpuCount?: number; + runtime?: { + ports?: Array<{ + ip?: string; + isIpPublic?: boolean; + publicPort?: number; + privatePort?: number; + type?: string; + }>; + }; } +const API = 'https://api.runpod.io/graphql'; + +const POD_FIELDS = ` + id + name + desiredStatus + createdAt + lastStartedAt + costPerHr + adjustedCostPerHr + imageName + machineId + gpuCount + runtime { + ports { + ip + isIpPublic + publicPort + privatePort + type + } + } +`; + export default defineCloud({ id: 'cloud-runpod', label: 'RunPod (GPU)', supports: ['gpu'], - async connect(ctx) { - const key = ctx.secret('RUNPOD_API_KEY'); - if (!key) throw new Error('RUNPOD_API_KEY not set — `sh1pt secret set RUNPOD_API_KEY ...`'); - ctx.log('runpod connected'); - return { accountId: 'runpod-account' }; + async connect(ctx, config) { + const data = await runpodGraphql<{ myself: { id?: string; email?: string } }>( + ctx, + config, + `query Myself { + myself { + id + email + } + }`, + ); + return { accountId: data.myself.id ?? data.myself.email ?? 'runpod-account' }; }, - async quote(ctx, spec) { - ctx.log(`runpod quote · gpu=${spec.gpu?.model} x${spec.gpu?.count ?? 1} · spot=${spec.spotOk ?? false}`); - // TODO: GraphQL query gpuTypes { id, displayName, communityPrice, securePrice } - // Pick the cheapest SKU matching spec.gpu.model + spec.gpu.count. - return { hourly: 0, monthly: 0, currency: 'USD', provider: 'runpod', sku: 'stub', spot: !!spec.spotOk }; + async quote(ctx, spec, config) { + const gpu = requireGpuSpec(spec); + const gpuTypeId = config.gpuTypeId ?? gpu.model; + const hourly = config.hourlyPrice !== undefined + ? nonNegativeNumber(config.hourlyPrice, 'hourlyPrice') * gpu.count + : await quoteFromApi(ctx, spec, config, gpuTypeId); + + return { + hourly, + monthly: hourly * 730, + currency: 'USD', + provider: 'runpod', + sku: `${gpuTypeId} x${gpu.count}`, + spot: !!spec.spotOk, + availabilityZone: config.cloudType ?? 'ALL', + } satisfies Quote; }, - async provision(ctx, spec, config): Promise { - if (!spec.gpu) throw new Error('cloud-runpod: spec.gpu is required'); - if (spec.maxHourlyPrice !== undefined) { - ctx.log(`maxHourlyPrice=${spec.maxHourlyPrice} — quote will be validated before launch`); + async provision(ctx, spec, config) { + const gpu = requireGpuSpec(spec); + const quote = await this.quote(ctx, spec, config); + if (spec.maxHourlyPrice !== undefined && quote.hourly > spec.maxHourlyPrice) { + throw new Error(`RunPod quote ${quote.hourly} USD/hr exceeds maxHourlyPrice ${spec.maxHourlyPrice}`); } - ctx.log(`runpod provision · ${spec.gpu.count}×${spec.gpu.model} · ${config.cloudType ?? 'COMMUNITY'}`); + + const name = safeName(config.name ?? `sh1pt-runpod-${Date.now().toString(36)}`); if (ctx.dryRun) { - return stubInstance('dry-run', 'provisioning', spec); + return podInstance({ + id: `dry-run-${name}`, + name, + desiredStatus: 'CREATED', + createdAt: new Date().toISOString(), + costPerHr: quote.hourly, + gpuCount: gpu.count, + }, quote); } - // TODO: GraphQL mutation podFindAndDeployOnDemand({ gpuCount, gpuTypeId, imageName, ... }) - return stubInstance(`pod_${Date.now()}`, 'provisioning', spec); + + if (!config.imageName) { + throw new Error('config.imageName is required for RunPod provisioning'); + } + + const input = stripUndefined({ + cloudType: config.cloudType ?? 'ALL', + gpuCount: gpu.count, + gpuTypeId: config.gpuTypeId ?? gpu.model, + name, + imageName: config.imageName, + dockerArgs: config.dockerArgs, + ports: config.ports, + volumeInGb: nonNegativeNumber(config.volumeInGb ?? spec.storage ?? 40, 'volumeInGb'), + containerDiskInGb: nonNegativeNumber(config.containerDiskInGb ?? 40, 'containerDiskInGb'), + minVcpuCount: optionalPositiveNumber(config.minVcpuCount ?? spec.cpu, 'minVcpuCount'), + minMemoryInGb: optionalPositiveNumber(config.minMemoryInGb ?? spec.memory, 'minMemoryInGb'), + volumeMountPath: config.volumeMountPath, + networkVolumeId: config.networkVolumeId, + env: envInput(config.env), + }); + + const data = await runpodGraphql<{ podFindAndDeployOnDemand: RunpodPod }>( + ctx, + config, + `mutation DeployPod($input: PodFindAndDeployOnDemandInput) { + podFindAndDeployOnDemand(input: $input) { + ${POD_FIELDS} + } + }`, + { input }, + ); + + return podInstance(data.podFindAndDeployOnDemand, quote); }, - async list() { - return []; + async list(ctx, config) { + const data = await runpodGraphql<{ myself: { pods?: RunpodPod[] } }>( + ctx, + config, + `query Pods { + myself { + pods { + ${POD_FIELDS} + } + } + }`, + ); + return (data.myself.pods ?? []).map((pod) => podInstance(pod)); }, - async destroy(ctx, instanceId) { - ctx.log(`runpod destroy pod=${instanceId}`); - // TODO: GraphQL mutation podTerminate({ podId }) + async destroy(ctx, instanceId, config) { + if (ctx.dryRun) { + ctx.log(`runpod dry-run terminate pod=${instanceId}`); + return; + } + + await runpodGraphql<{ podTerminate: null }>( + ctx, + config, + `mutation TerminatePod($input: PodTerminateInput!) { + podTerminate(input: $input) + }`, + { input: { podId: instanceId } }, + ); }, - async status(ctx, instanceId) { - ctx.log(`runpod status pod=${instanceId}`); - return stubInstance(instanceId, 'running', { kind: 'gpu' }); + async status(ctx, instanceId, config) { + const data = await runpodGraphql<{ pod?: RunpodPod }>( + ctx, + config, + `query Pod($input: PodFilter) { + pod(input: $input) { + ${POD_FIELDS} + } + }`, + { input: { podId: instanceId } }, + ); + if (!data.pod) throw new Error(`RunPod pod not found: ${instanceId}`); + return podInstance(data.pod); }, setup: tokenSetup({ @@ -60,21 +232,187 @@ export default defineCloud({ label: 'RunPod', vendorDocUrl: 'https://www.runpod.io/console/user/settings', steps: [ - 'Open runpod.io → Settings → API Keys', - 'Create API key (read + write scope)', + 'Open runpod.io -> Settings -> API Keys', + 'Create an API key with pod read/write scope', 'Copy the key', - '⚠ GPU instances bill by the second — always use --max-hourly-price', + 'Run: sh1pt secret set RUNPOD_API_KEY ', + 'Set maxHourlyPrice when provisioning GPU pods', ], }), }); -function stubInstance(id: string, status: Instance['status'], spec: Partial): Instance { +async function quoteFromApi( + ctx: CloudConnectContext, + spec: InstanceSpec, + config: Config, + gpuTypeId: string, +): Promise { + const data = await runpodGraphql<{ gpuTypes: RunpodGpuType[] }>( + ctx, + config, + `query GpuTypes($input: GpuTypeFilter) { + gpuTypes(input: $input) { + id + displayName + memoryInGb + communityPrice + securePrice + communitySpotPrice + secureSpotPrice + } + }`, + { input: { id: gpuTypeId } }, + ); + const selected = selectGpuType(data.gpuTypes, gpuTypeId); + const price = priceForGpu(selected, config.cloudType ?? 'ALL', !!spec.spotOk); + return price * (spec.gpu?.count ?? 1); +} + +async function runpodGraphql( + ctx: CloudConnectContext | ProvisionContext, + config: Config, + query: string, + variables?: Record, +): Promise { + const response = await fetch(config.apiBaseUrl ?? API, { + method: 'POST', + headers: { + Authorization: `Bearer ${requireToken(ctx)}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ query, variables }), + }); + + const text = await response.text(); + let payload: RunpodGraphqlResponse; + try { + payload = text ? JSON.parse(text) : {}; + } catch (error) { + if (response.ok) throw error; + throw new Error(`RunPod GraphQL failed: ${response.status} ${text || response.statusText}`); + } + + if (!response.ok) { + throw new Error(`RunPod GraphQL failed: ${response.status} ${graphqlError(payload) || response.statusText}`); + } + if (payload.errors?.length) { + throw new Error(`RunPod GraphQL failed: ${graphqlError(payload)}`); + } + if (!payload.data) { + throw new Error('RunPod GraphQL response did not include data'); + } + return payload.data; +} + +function requireToken(ctx: CloudConnectContext | ProvisionContext): string { + const token = ctx.secret('RUNPOD_API_KEY'); + if (!token) throw new Error('RUNPOD_API_KEY not set - run: sh1pt secret set RUNPOD_API_KEY '); + return token; +} + +function requireGpuSpec(spec: InstanceSpec): NonNullable { + if (spec.kind !== 'gpu') throw new Error(`cloud-runpod supports gpu specs only, got ${spec.kind}`); + if (!spec.gpu?.model) throw new Error('cloud-runpod: spec.gpu.model is required'); + if (!spec.gpu.count || spec.gpu.count < 1) throw new Error('cloud-runpod: spec.gpu.count must be >= 1'); + return spec.gpu; +} + +function selectGpuType(gpus: RunpodGpuType[], requested: string): RunpodGpuType { + const normalized = normalize(requested); + const selected = gpus.find((gpu) => normalize(gpu.id) === normalized || normalize(gpu.displayName) === normalized) ?? + gpus.find((gpu) => normalize(gpu.id).includes(normalized) || normalize(gpu.displayName).includes(normalized)) ?? + gpus[0]; + + if (!selected) throw new Error(`RunPod GPU type not found: ${requested}`); + return selected; +} + +function priceForGpu(gpu: RunpodGpuType, cloudType: CloudType, spot: boolean): number { + const community = spot ? gpu.communitySpotPrice : gpu.communityPrice; + const secure = spot ? gpu.secureSpotPrice : gpu.securePrice; + const prices = cloudType === 'COMMUNITY' + ? [community] + : cloudType === 'SECURE' + ? [secure] + : [community, secure]; + const price = prices.find((value): value is number => typeof value === 'number' && value >= 0); + if (price === undefined) { + throw new Error(`RunPod GPU price not available for ${gpu.id ?? gpu.displayName ?? 'selected GPU'}`); + } + return price; +} + +function podInstance(pod: RunpodPod, quote?: Quote): Instance { + const id = pod.id; + if (!id) throw new Error('RunPod pod response did not include an id'); + const publicPort = pod.runtime?.ports?.find((port) => port.isIpPublic && port.ip); + const hourlyRate = pod.costPerHr ?? pod.adjustedCostPerHr ?? quote?.hourly ?? 0; + return { id, - kind: spec.kind ?? 'gpu', - status, - createdAt: new Date().toISOString(), - hourlyRate: 0, - currency: 'USD', + kind: 'gpu', + status: podStatus(pod.desiredStatus), + publicIp: publicPort?.ip, + createdAt: pod.createdAt ?? pod.lastStartedAt ?? new Date().toISOString(), + hourlyRate, + currency: quote?.currency ?? 'USD', + sku: pod.imageName ?? quote?.sku, + tags: pod.name ? [pod.name] : undefined, }; } + +function podStatus(status: string | undefined): Instance['status'] { + switch (status) { + case 'RUNNING': + return 'running'; + case 'EXITED': + case 'STOPPED': + return 'stopped'; + case 'TERMINATED': + return 'destroyed'; + case 'ERROR': + case 'FAILED': + return 'failed'; + default: + return 'provisioning'; + } +} + +function envInput(env: Record | undefined): Array<{ key: string; value: string }> | undefined { + if (!env) return undefined; + return Object.entries(env).map(([key, value]) => ({ key, value })); +} + +function safeName(value: string): string { + return value + .toLowerCase() + .replace(/[^a-z0-9-]/g, '-') + .replace(/-+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 60) || 'sh1pt-runpod'; +} + +function optionalPositiveNumber(value: Numberish | undefined, label: string): number | undefined { + if (value === undefined) return undefined; + const number = Number(value); + if (!Number.isFinite(number) || number <= 0) throw new Error(`RunPod ${label} must be a positive number`); + return number; +} + +function nonNegativeNumber(value: Numberish, label: string): number { + const number = Number(value); + if (!Number.isFinite(number) || number < 0) throw new Error(`RunPod ${label} must be a non-negative number`); + return number; +} + +function graphqlError(payload: RunpodGraphqlResponse): string { + return payload.errors?.map((error) => error.message).filter(Boolean).join('; ') ?? ''; +} + +function stripUndefined(value: Record): Record { + return Object.fromEntries(Object.entries(value).filter(([, item]) => item !== undefined)); +} + +function normalize(value: string | undefined): string { + return (value ?? '').toLowerCase().replace(/[^a-z0-9]+/g, ''); +} From 061285b006cba153402ff8cf6f59f5ae691d23f3 Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:16:51 +0800 Subject: [PATCH 2/6] Address RunPod adapter review feedback --- packages/cloud/runpod/README.md | 2 + packages/cloud/runpod/src/index.test.ts | 49 ++++++++++++++++++++++++ packages/cloud/runpod/src/index.ts | 50 ++++++++++++++++++++----- 3 files changed, 92 insertions(+), 9 deletions(-) diff --git a/packages/cloud/runpod/README.md b/packages/cloud/runpod/README.md index 1c4e9b73..84e798c5 100644 --- a/packages/cloud/runpod/README.md +++ b/packages/cloud/runpod/README.md @@ -6,9 +6,11 @@ Provides the RunPod (GPU) cloud provider adapter for sh1pt scale and deploy work - Connects to RunPod through the GraphQL API with `RUNPOD_API_KEY`. - Quotes GPU pods from either explicit `hourlyPrice` config or RunPod `gpuTypes` pricing. +- Uses the highest available price when `cloudType` is `ALL` so `maxHourlyPrice` is not checked against a lower community-only estimate. - Provisions on-demand GPU pods with `podFindAndDeployOnDemand`. - Lists account pods, checks a pod by ID, and terminates pods with `podTerminate`. - Requires `imageName` for real provisioning and supports `maxHourlyPrice` guardrails before any pod is created. +- Does not call RunPod during dry-run provisioning; set `hourlyPrice` to simulate a non-zero cost in dry-run output. Common config fields: diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index 346f3e2c..1d5a626d 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -25,6 +25,12 @@ describe('RunPod cloud adapter', () => { await expect(adapter.connect(connectCtx(), {})).resolves.toEqual({ accountId: 'user-1' }); }); + it('reports a scoped account error when RunPod returns no account', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ myself: null }))); + + await expect(adapter.connect(connectCtx(), {})).rejects.toThrow('RunPod account not available'); + }); + it('quotes from configured hourly pricing without calling RunPod', async () => { const fetchMock = vi.fn(); vi.stubGlobal('fetch', fetchMock); @@ -67,6 +73,25 @@ describe('RunPod cloud adapter', () => { expect(quote.spot).toBe(true); }); + it('uses the highest available price for ALL cloud type quotes', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ + gpuTypes: [{ + id: 'NVIDIA RTX A6000', + displayName: 'RTX A6000', + communityPrice: 0.44, + securePrice: 0.79, + }], + }))); + + const quote = await adapter.quote( + connectCtx(), + { kind: 'gpu', gpu: { model: 'RTX A6000', count: 1 } }, + {}, + ); + + expect(quote.hourly).toBe(0.79); + }); + it('creates a RunPod pod through GraphQL', async () => { const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { const body = JSON.parse(String(init.body)); @@ -133,6 +158,24 @@ describe('RunPod cloud adapter', () => { expect(fetchMock).not.toHaveBeenCalled(); }); + it('does not call RunPod for dry-run provisioning without hourlyPrice', async () => { + const fetchMock = vi.fn(); + vi.stubGlobal('fetch', fetchMock); + + const instance = await adapter.provision( + provisionCtx(true), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 } }, + { name: 'preview' }, + ); + + expect(instance).toMatchObject({ + id: 'dry-run-preview', + status: 'provisioning', + hourlyRate: 0, + }); + expect(fetchMock).not.toHaveBeenCalled(); + }); + it('honors maxHourlyPrice before provisioning', async () => { const fetchMock = vi.fn(); vi.stubGlobal('fetch', fetchMock); @@ -163,6 +206,12 @@ describe('RunPod cloud adapter', () => { ]); }); + it('reports a scoped account error when listing has no account', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ myself: null }))); + + await expect(adapter.list(connectCtx(), {})).rejects.toThrow('RunPod account not available'); + }); + it('checks status for a single pod', async () => { const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { const body = JSON.parse(String(init.body)); diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index 54e366d1..7be0493e 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -96,7 +96,7 @@ export default defineCloud({ supports: ['gpu'], async connect(ctx, config) { - const data = await runpodGraphql<{ myself: { id?: string; email?: string } }>( + const data = await runpodGraphql<{ myself?: { id?: string; email?: string } | null }>( ctx, config, `query Myself { @@ -106,7 +106,8 @@ export default defineCloud({ } }`, ); - return { accountId: data.myself.id ?? data.myself.email ?? 'runpod-account' }; + const account = requireAccount(data.myself); + return { accountId: account.id ?? account.email ?? 'runpod-account' }; }, async quote(ctx, spec, config) { @@ -129,7 +130,9 @@ export default defineCloud({ async provision(ctx, spec, config) { const gpu = requireGpuSpec(spec); - const quote = await this.quote(ctx, spec, config); + const quote = ctx.dryRun && config.hourlyPrice === undefined + ? quoteFromHourly(spec, config, gpu, 0) + : await this.quote(ctx, spec, config); if (spec.maxHourlyPrice !== undefined && quote.hourly > spec.maxHourlyPrice) { throw new Error(`RunPod quote ${quote.hourly} USD/hr exceeds maxHourlyPrice ${spec.maxHourlyPrice}`); } @@ -182,7 +185,7 @@ export default defineCloud({ }, async list(ctx, config) { - const data = await runpodGraphql<{ myself: { pods?: RunpodPod[] } }>( + const data = await runpodGraphql<{ myself?: { pods?: RunpodPod[] } | null }>( ctx, config, `query Pods { @@ -193,7 +196,7 @@ export default defineCloud({ } }`, ); - return (data.myself.pods ?? []).map((pod) => podInstance(pod)); + return (requireAccount(data.myself).pods ?? []).map((pod) => podInstance(pod)); }, async destroy(ctx, instanceId, config) { @@ -268,6 +271,24 @@ async function quoteFromApi( return price * (spec.gpu?.count ?? 1); } +function quoteFromHourly( + spec: InstanceSpec, + config: Config, + gpu: NonNullable, + hourly: number, +): Quote { + const gpuTypeId = config.gpuTypeId ?? gpu.model; + return { + hourly, + monthly: hourly * 730, + currency: 'USD', + provider: 'runpod', + sku: `${gpuTypeId} x${gpu.count}`, + spot: !!spec.spotOk, + availabilityZone: config.cloudType ?? 'ALL', + }; +} + async function runpodGraphql( ctx: CloudConnectContext | ProvisionContext, config: Config, @@ -331,17 +352,28 @@ function priceForGpu(gpu: RunpodGpuType, cloudType: CloudType, spot: boolean): n const community = spot ? gpu.communitySpotPrice : gpu.communityPrice; const secure = spot ? gpu.secureSpotPrice : gpu.securePrice; const prices = cloudType === 'COMMUNITY' - ? [community] + ? validPrices([community]) : cloudType === 'SECURE' - ? [secure] - : [community, secure]; - const price = prices.find((value): value is number => typeof value === 'number' && value >= 0); + ? validPrices([secure]) + : validPrices([community, secure]); + const price = cloudType === 'ALL' && prices.length > 0 ? Math.max(...prices) : prices[0]; if (price === undefined) { throw new Error(`RunPod GPU price not available for ${gpu.id ?? gpu.displayName ?? 'selected GPU'}`); } return price; } +function validPrices(values: Array): number[] { + return values.filter((value): value is number => typeof value === 'number' && value >= 0); +} + +function requireAccount(account: T | null | undefined): T { + if (!account) { + throw new Error('RunPod account not available; check RUNPOD_API_KEY permissions'); + } + return account; +} + function podInstance(pod: RunpodPod, quote?: Quote): Instance { const id = pod.id; if (!id) throw new Error('RunPod pod response did not include an id'); From 01d613814a4d9f9ae4ede40ff8b08aca2a27e390 Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:21:50 +0800 Subject: [PATCH 3/6] Avoid default RunPod network volume --- packages/cloud/runpod/README.md | 3 ++- packages/cloud/runpod/src/index.test.ts | 33 +++++++++++++++++++++++++ packages/cloud/runpod/src/index.ts | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/packages/cloud/runpod/README.md b/packages/cloud/runpod/README.md index 84e798c5..25cdcc81 100644 --- a/packages/cloud/runpod/README.md +++ b/packages/cloud/runpod/README.md @@ -18,7 +18,8 @@ Common config fields: - `imageName`: container image for real pod creation, such as `runpod/pytorch`. - `cloudType`: `ALL`, `COMMUNITY`, or `SECURE`. - `hourlyPrice`: optional explicit offline quote value per GPU. -- `ports`, `dockerArgs`, `volumeInGb`, `containerDiskInGb`, `minVcpuCount`, `minMemoryInGb`, `volumeMountPath`, `networkVolumeId`, and `env`. +- `volumeInGb`: optional network volume size; omitted unless explicitly requested through config or the spec storage field. +- `ports`, `dockerArgs`, `containerDiskInGb`, `minVcpuCount`, `minMemoryInGb`, `volumeMountPath`, `networkVolumeId`, and `env`. ## Package diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index 1d5a626d..253adf9e 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -146,6 +146,39 @@ describe('RunPod cloud adapter', () => { }); }); + it('omits network volume storage unless explicitly requested', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('podFindAndDeployOnDemand'); + expect(body.variables.input).toEqual(expect.objectContaining({ + gpuTypeId: 'NVIDIA RTX A6000', + imageName: 'runpod/pytorch', + containerDiskInGb: 40, + })); + expect(body.variables.input).not.toHaveProperty('volumeInGb'); + return graphql({ + podFindAndDeployOnDemand: pod({ + id: 'pod-1', + imageName: 'runpod/pytorch', + costPerHr: 0.5, + }), + }); + }); + vi.stubGlobal('fetch', fetchMock); + + await adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 } }, + { + gpuTypeId: 'NVIDIA RTX A6000', + imageName: 'runpod/pytorch', + hourlyPrice: 0.5, + }, + ); + + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + it('requires imageName before creating a real pod', async () => { const fetchMock = vi.fn(); vi.stubGlobal('fetch', fetchMock); diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index 7be0493e..3d21394e 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -161,7 +161,7 @@ export default defineCloud({ imageName: config.imageName, dockerArgs: config.dockerArgs, ports: config.ports, - volumeInGb: nonNegativeNumber(config.volumeInGb ?? spec.storage ?? 40, 'volumeInGb'), + volumeInGb: optionalPositiveNumber(config.volumeInGb ?? spec.storage, 'volumeInGb'), containerDiskInGb: nonNegativeNumber(config.containerDiskInGb ?? 40, 'containerDiskInGb'), minVcpuCount: optionalPositiveNumber(config.minVcpuCount ?? spec.cpu, 'minVcpuCount'), minMemoryInGb: optionalPositiveNumber(config.minMemoryInGb ?? spec.memory, 'minMemoryInGb'), From 2013f9a7b520ebbd4f1c09088deac27024c5876a Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:24:19 +0800 Subject: [PATCH 4/6] Handle missing RunPod GPU type results --- packages/cloud/runpod/README.md | 1 + packages/cloud/runpod/src/index.test.ts | 22 ++++++++++++++++++++++ packages/cloud/runpod/src/index.ts | 7 +++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/packages/cloud/runpod/README.md b/packages/cloud/runpod/README.md index 25cdcc81..cc419d13 100644 --- a/packages/cloud/runpod/README.md +++ b/packages/cloud/runpod/README.md @@ -7,6 +7,7 @@ Provides the RunPod (GPU) cloud provider adapter for sh1pt scale and deploy work - Connects to RunPod through the GraphQL API with `RUNPOD_API_KEY`. - Quotes GPU pods from either explicit `hourlyPrice` config or RunPod `gpuTypes` pricing. - Uses the highest available price when `cloudType` is `ALL` so `maxHourlyPrice` is not checked against a lower community-only estimate. +- Fails clearly when the requested GPU type is not returned instead of silently selecting another GPU. - Provisions on-demand GPU pods with `podFindAndDeployOnDemand`. - Lists account pods, checks a pod by ID, and terminates pods with `podTerminate`. - Requires `imageName` for real provisioning and supports `maxHourlyPrice` guardrails before any pod is created. diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index 253adf9e..4a96d100 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -92,6 +92,28 @@ describe('RunPod cloud adapter', () => { expect(quote.hourly).toBe(0.79); }); + it('reports a clear error when RunPod returns no GPU types', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ gpuTypes: null }))); + + await expect(adapter.quote( + connectCtx(), + { kind: 'gpu', gpu: { model: 'RTX A6000', count: 1 } }, + {}, + )).rejects.toThrow('RunPod GPU type not found: RTX A6000'); + }); + + it('does not silently fall back to a different GPU type', async () => { + vi.stubGlobal('fetch', vi.fn(async () => graphql({ + gpuTypes: [{ id: 'NVIDIA A100', displayName: 'A100', communityPrice: 1.25 }], + }))); + + await expect(adapter.quote( + connectCtx(), + { kind: 'gpu', gpu: { model: 'RTX A6000', count: 1 } }, + {}, + )).rejects.toThrow('RunPod GPU type not found: RTX A6000'); + }); + it('creates a RunPod pod through GraphQL', async () => { const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { const body = JSON.parse(String(init.body)); diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index 3d21394e..7f98f56d 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -250,7 +250,7 @@ async function quoteFromApi( config: Config, gpuTypeId: string, ): Promise { - const data = await runpodGraphql<{ gpuTypes: RunpodGpuType[] }>( + const data = await runpodGraphql<{ gpuTypes?: RunpodGpuType[] | null }>( ctx, config, `query GpuTypes($input: GpuTypeFilter) { @@ -266,7 +266,7 @@ async function quoteFromApi( }`, { input: { id: gpuTypeId } }, ); - const selected = selectGpuType(data.gpuTypes, gpuTypeId); + const selected = selectGpuType(data.gpuTypes ?? [], gpuTypeId); const price = priceForGpu(selected, config.cloudType ?? 'ALL', !!spec.spotOk); return price * (spec.gpu?.count ?? 1); } @@ -341,8 +341,7 @@ function requireGpuSpec(spec: InstanceSpec): NonNullable { function selectGpuType(gpus: RunpodGpuType[], requested: string): RunpodGpuType { const normalized = normalize(requested); const selected = gpus.find((gpu) => normalize(gpu.id) === normalized || normalize(gpu.displayName) === normalized) ?? - gpus.find((gpu) => normalize(gpu.id).includes(normalized) || normalize(gpu.displayName).includes(normalized)) ?? - gpus[0]; + gpus.find((gpu) => normalize(gpu.id).includes(normalized) || normalize(gpu.displayName).includes(normalized)); if (!selected) throw new Error(`RunPod GPU type not found: ${requested}`); return selected; From 0477848e574b641626c6c627869b45e5835e2838 Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:31:31 +0800 Subject: [PATCH 5/6] Use on-demand RunPod pricing for guardrails --- packages/cloud/runpod/README.md | 1 + packages/cloud/runpod/src/index.test.ts | 29 ++++++++++++++++++++++--- packages/cloud/runpod/src/index.ts | 12 +++++----- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/packages/cloud/runpod/README.md b/packages/cloud/runpod/README.md index cc419d13..605e7056 100644 --- a/packages/cloud/runpod/README.md +++ b/packages/cloud/runpod/README.md @@ -7,6 +7,7 @@ Provides the RunPod (GPU) cloud provider adapter for sh1pt scale and deploy work - Connects to RunPod through the GraphQL API with `RUNPOD_API_KEY`. - Quotes GPU pods from either explicit `hourlyPrice` config or RunPod `gpuTypes` pricing. - Uses the highest available price when `cloudType` is `ALL` so `maxHourlyPrice` is not checked against a lower community-only estimate. +- Uses on-demand GPU prices for guardrails; spot bids are not sent by this adapter. - Fails clearly when the requested GPU type is not returned instead of silently selecting another GPU. - Provisions on-demand GPU pods with `podFindAndDeployOnDemand`. - Lists account pods, checks a pod by ID, and terminates pods with `podTerminate`. diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index 4a96d100..b43084d9 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -52,7 +52,7 @@ describe('RunPod cloud adapter', () => { expect(fetchMock).not.toHaveBeenCalled(); }); - it('quotes from RunPod GPU type pricing when hourlyPrice is omitted', async () => { + it('quotes from RunPod on-demand GPU type pricing when hourlyPrice is omitted', async () => { vi.stubGlobal('fetch', vi.fn(async () => graphql({ gpuTypes: [{ id: 'NVIDIA RTX A6000', @@ -69,8 +69,8 @@ describe('RunPod cloud adapter', () => { { cloudType: 'COMMUNITY' }, ); - expect(quote.hourly).toBe(0.44); - expect(quote.spot).toBe(true); + expect(quote.hourly).toBe(0.88); + expect(quote.spot).toBe(false); }); it('uses the highest available price for ALL cloud type quotes', async () => { @@ -243,6 +243,29 @@ describe('RunPod cloud adapter', () => { expect(fetchMock).not.toHaveBeenCalled(); }); + it('uses on-demand pricing for maxHourlyPrice even when spot is allowed', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('GpuTypes'); + return graphql({ + gpuTypes: [{ + id: 'NVIDIA RTX A6000', + displayName: 'RTX A6000', + communityPrice: 0.44, + communitySpotPrice: 0.22, + }], + }); + }); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 }, spotOk: true, maxHourlyPrice: 0.3 }, + { imageName: 'runpod/pytorch', cloudType: 'COMMUNITY' }, + )).rejects.toThrow('exceeds maxHourlyPrice'); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + it('lists pods from the authenticated account', async () => { vi.stubGlobal('fetch', vi.fn(async () => graphql({ myself: { diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index 7f98f56d..b3e9d838 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -123,7 +123,7 @@ export default defineCloud({ currency: 'USD', provider: 'runpod', sku: `${gpuTypeId} x${gpu.count}`, - spot: !!spec.spotOk, + spot: false, availabilityZone: config.cloudType ?? 'ALL', } satisfies Quote; }, @@ -267,7 +267,7 @@ async function quoteFromApi( { input: { id: gpuTypeId } }, ); const selected = selectGpuType(data.gpuTypes ?? [], gpuTypeId); - const price = priceForGpu(selected, config.cloudType ?? 'ALL', !!spec.spotOk); + const price = priceForGpu(selected, config.cloudType ?? 'ALL'); return price * (spec.gpu?.count ?? 1); } @@ -284,7 +284,7 @@ function quoteFromHourly( currency: 'USD', provider: 'runpod', sku: `${gpuTypeId} x${gpu.count}`, - spot: !!spec.spotOk, + spot: false, availabilityZone: config.cloudType ?? 'ALL', }; } @@ -347,9 +347,9 @@ function selectGpuType(gpus: RunpodGpuType[], requested: string): RunpodGpuType return selected; } -function priceForGpu(gpu: RunpodGpuType, cloudType: CloudType, spot: boolean): number { - const community = spot ? gpu.communitySpotPrice : gpu.communityPrice; - const secure = spot ? gpu.secureSpotPrice : gpu.securePrice; +function priceForGpu(gpu: RunpodGpuType, cloudType: CloudType): number { + const community = gpu.communityPrice; + const secure = gpu.securePrice; const prices = cloudType === 'COMMUNITY' ? validPrices([community]) : cloudType === 'SECURE' From 5465626db76592a1913012adcad806d6712cca29 Mon Sep 17 00:00:00 2001 From: caydyan Date: Sun, 14 Jun 2026 11:37:56 +0800 Subject: [PATCH 6/6] Guard empty RunPod provision response --- packages/cloud/runpod/src/index.test.ts | 16 ++++++++++++++++ packages/cloud/runpod/src/index.ts | 5 ++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/cloud/runpod/src/index.test.ts b/packages/cloud/runpod/src/index.test.ts index b43084d9..90bd06e8 100644 --- a/packages/cloud/runpod/src/index.test.ts +++ b/packages/cloud/runpod/src/index.test.ts @@ -168,6 +168,22 @@ describe('RunPod cloud adapter', () => { }); }); + it('reports unavailable capacity when RunPod returns no provisioned pod', async () => { + const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { + const body = JSON.parse(String(init.body)); + expect(body.query).toContain('podFindAndDeployOnDemand'); + return graphql({ podFindAndDeployOnDemand: null }); + }); + vi.stubGlobal('fetch', fetchMock); + + await expect(adapter.provision( + provisionCtx(), + { kind: 'gpu', gpu: { model: 'NVIDIA RTX A6000', count: 1 } }, + { gpuTypeId: 'NVIDIA RTX A6000', imageName: 'runpod/pytorch', hourlyPrice: 0.5 }, + )).rejects.toThrow('RunPod pod was not provisioned'); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + it('omits network volume storage unless explicitly requested', async () => { const fetchMock = vi.fn(async (_url: string, init: RequestInit) => { const body = JSON.parse(String(init.body)); diff --git a/packages/cloud/runpod/src/index.ts b/packages/cloud/runpod/src/index.ts index b3e9d838..6fb11a4f 100644 --- a/packages/cloud/runpod/src/index.ts +++ b/packages/cloud/runpod/src/index.ts @@ -170,7 +170,7 @@ export default defineCloud({ env: envInput(config.env), }); - const data = await runpodGraphql<{ podFindAndDeployOnDemand: RunpodPod }>( + const data = await runpodGraphql<{ podFindAndDeployOnDemand?: RunpodPod | null }>( ctx, config, `mutation DeployPod($input: PodFindAndDeployOnDemandInput) { @@ -181,6 +181,9 @@ export default defineCloud({ { input }, ); + if (!data.podFindAndDeployOnDemand) { + throw new Error('RunPod pod was not provisioned; requested GPU capacity may be unavailable'); + } return podInstance(data.podFindAndDeployOnDemand, quote); },