diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 40b59b930a..b8e3768d9a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,7 +81,7 @@ jobs: - name: Install agent-sandbox run: | kubectl apply -f "https://github.com/kubernetes-sigs/agent-sandbox/releases/download/${AGENT_SANDBOX_VERSION}/manifest.yaml" - kubectl wait --for=condition=Established crd/sandboxes.agents.x-k8s.io --timeout=90s + timeout 90s bash -c 'until [ "$(kubectl get crd sandboxes.agents.x-k8s.io -o jsonpath="{.status.conditions[?(@.type==\"Established\")].status}" 2>/dev/null)" = "True" ]; do sleep 1; done' kubectl rollout status deployment/agent-sandbox-controller -n agent-sandbox-system --timeout=120s kubectl wait --for=condition=Ready pod -l app=agent-sandbox-controller -n agent-sandbox-system --timeout=120s diff --git a/docs/substrate-agentharness-lifecycle.md b/docs/substrate-agentharness-lifecycle.md new file mode 100644 index 0000000000..f01a18fc31 --- /dev/null +++ b/docs/substrate-agentharness-lifecycle.md @@ -0,0 +1,60 @@ +# Substrate AgentHarness Lifecycle + +This branch should use a single ownership model for `runtime: substrate` harnesses. + +## Ownership + +- Platform/Helm owns `WorkerPool` capacity. +- kagent owns the generated per-harness `ActorTemplate`. +- kagent owns the per-harness actor lifecycle through `ate-api`. +- Substrate owns the `WorkerPool` deployment and the `ActorTemplate` golden snapshot process. + +kagent should not create or delete `WorkerPool` resources from the `AgentHarness` reconciler. A chart may optionally install a default `WorkerPool`, and the controller may use that default when `spec.substrate.workerPoolRef` is unset. + +## Spec Shape + +`AgentHarness.spec.substrate` should contain only harness-level inputs: + +- `workerPoolRef`, optional; falls back to the configured controller default. +- `snapshotsConfig`, optional; defaults to `gs://ate-snapshots//`. +- `workloadImage`, optional. +- exactly one of `gatewayToken` or `gatewayTokenSecretRef`. + +There is no `actorTemplateRef`. kagent always generates the `ActorTemplate`, so adopting an external template is not part of the workflow. + +## Status + +Use top-level Kubernetes conditions for progress: + +- `Accepted` +- `ActorTemplateReady` +- `ActorReady` +- `Ready` + +`Ready` is the aggregate condition. Specific blockers should be reflected in `reason` and `message`. + +Do not store ownership booleans or cleanup markers in annotations or status. Ownership is deterministic: + +- `WorkerPool` is external. +- generated `ActorTemplate` is owned by the `AgentHarness` through an owner reference. + +## Reconcile + +The substrate reconcile path should: + +1. Resolve `workerPoolRef` from spec or controller default. +2. Verify the `WorkerPool` exists. +3. Create or update the generated `ActorTemplate` with an owner reference to the `AgentHarness`. +4. Wait for `ActorTemplate.status.phase == Ready`. +5. Create or resume the actor through `ate-api`. +6. Mark `ActorReady` and aggregate `Ready`. + +## Delete + +The finalizer should: + +1. Delete the harness actor recorded in `status.backendRef.id`. +2. Read the generated `ActorTemplate` and delete `status.goldenActorID`, if present. +3. Remove the finalizer. + +Kubernetes garbage collection deletes the generated `ActorTemplate` through the owner reference. kagent does not delete `WorkerPool`. diff --git a/examples/substrate-openclaw/README.md b/examples/substrate-openclaw/README.md new file mode 100644 index 0000000000..3ddfc5cd99 --- /dev/null +++ b/examples/substrate-openclaw/README.md @@ -0,0 +1,144 @@ +# OpenClaw on Agent Substrate + +## 1. Install Substrate on your Kind cluster + +You can clone the kagent fork of substrate [here](https://github.com/kagent-dev/substrate). + +These instructions use a Kind cluster called `kind` (`KIND_CLUSTER_NAME=kind`). + +```bash +cd substrate + +./hack/create-kind-cluster.sh +./hack/install-ate-kind.sh --deploy-ate-system +``` + +`--deploy-ate-system` installs the **control plane only** (ate-api, ate-controller, atelet, atenet, …). Your registry catalog will show `ateapi-*`, `atelet-*`, etc., but **not** ateom until you build it. + +Build and push **ateom-gvisor** (required for the WorkerPool `ateomImage`): + +```bash +# build the ateom-gvisor image from the substrate repo root +export KO_DOCKER_REPO=localhost:5001 +export KO_DEFAULTPLATFORMS=linux/$(go env GOARCH) +./hack/run-tool.sh ko build -B ./cmd/ateom-gvisor +``` + +## kagent AgentHarness with substrate runtime + +kagent generates a per-harness `ActorTemplate` and uses an existing `WorkerPool`. + +Install kagent (Substrate must already be running in the cluster): + +```bash +export KIND_CLUSTER_NAME=kind +make helm-install KAGENT_HELM_EXTRA_ARGS="\ + --set controller.substrate.enabled=true \ + --set controller.substrate.ateApiEndpoint=dns:///api.ate-system.svc:443 \ + --set controller.substrate.ateApiInsecure=true \ + --set substrateWorkerPool.create=true \ + --set substrateWorkerPool.ateomImage=localhost:5001/ateom-gvisor:latest" +``` + +The generated `ActorTemplate` uses `controller.substrate.pauseImage`, `controller.substrate.runscAMD64URL`, `controller.substrate.runscAMD64SHA256`, `controller.substrate.runscARM64URL`, and `controller.substrate.runscARM64SHA256` from the Helm values Override them with `--set` or a values file when you need to pin a different gVisor build. + +Create a harness. If `snapshotsConfig` is omitted, kagent defaults it to `gs://ate-snapshots//`. + +- **Worker pool** — reference an existing pool (`workerPoolRef`) or configure a controller default WorkerPool +- **Gateway token** — required per harness with either `gatewayToken` or `gatewayTokenSecretRef` + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: AgentHarness +metadata: + name: peterj-claw + namespace: kagent +spec: + runtime: substrate + backend: openclaw + description: OpenClaw on Agent Substrate + modelConfigRef: default-model-config + substrate: + # Optional: defaults to gs://ate-snapshots/kagent/peterj-claw + # snapshotsConfig: + # location: gs://ate-snapshots/kagent/peterj-claw + + # Required unless the controller has a default WorkerPool configured. + workerPoolRef: + name: kagent-default + + # Required: configure the OpenClaw gateway token for this harness. + # Use either gatewayToken or gatewayTokenSecretRef. The Secret must contain key "token". + gatewayToken: test-token + + # gatewayTokenSecretRef: + # name: openclaw-gateway-token + + # Optional: override the sandbox image used in the ActorTemplate (must be digest-pinned). + # workloadImage: ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:d52bee415dc4c0dba7164f9eabe727574c056d4f211781f20af249707883a3b4 +``` + +kagent creates an `ActorTemplate` that looks roughly like this: + +```yaml +apiVersion: ate.dev/v1alpha1 +kind: ActorTemplate +metadata: + name: peterj-claw + namespace: kagent + labels: + app.kubernetes.io/managed-by: kagent + kagent.dev/agent-harness: peterj-claw +spec: + pauseImage: gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da + runsc: + amd64: + url: gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc + sha256Hash: a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63 + arm64: + url: gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc + sha256Hash: 1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9 + workerPoolRef: + name: peterj-claw-wp + namespace: kagent + snapshotsConfig: + location: gs://ate-snapshots/kagent/peterj-claw + containers: + - name: openclaw + image: ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:d52bee415dc4c0dba7164f9eabe727574c056d4f211781f20af249707883a3b4 + ports: + - containerPort: 80 + command: + - /bin/sh + - -c + - | + # Generated by kagent: + # 1. writes ~/.openclaw/openclaw.json from modelConfigRef/channels/gateway token + # 2. configures gateway.controlUi.basePath for the kagent proxy path + # 3. starts `openclaw gateway run --port 80 --allow-unconfigured` + # 4. waits for the gateway and tails the log + env: + - name: HOME + value: /root +``` + +The generated `command` contains a base64-encoded `openclaw.json`, so the live object will be more verbose than the abbreviated example above. `pauseImage`, runsc URLs and hashes, and the default workload image come from controller/Helm configuration unless overridden on the `AgentHarness`; the gateway token comes from `spec.substrate.gatewayToken` or `gatewayTokenSecretRef`. kagent also sets `gateway.controlUi.basePath` to `/api/agentharnesses///gateway` so OpenClaw serves the Control UI under the same path kagent proxies. + +When `modelConfigRef` or `spec.channels` are set, credentials are **not** copied into the ActorTemplate or `openclaw.json` as plaintext. kagent writes `valueFrom.secretKeyRef` (or inline `value` for harness inline tokens) on the ActorTemplate container env; Substrate `ate-api` resolves those refs at actor resume. In `openclaw.json`, kagent uses OpenClaw [env SecretRefs](https://docs.openclaw.ai/gateway/secrets) (`{source:"env",provider:"default",id:""}`) for `models.providers.*.apiKey`, `channels.telegram.accounts.*.botToken`, and `channels.slack.accounts.*.botToken` / `appToken`. Rotate a Secret and recreate the ActorTemplate golden snapshot when keys change. + +With `controller.substrate.enabled=true`, the kagent Helm chart installs a namespace-scoped Role and RoleBinding so `ate-api-server` (in `ate-system` by default) can `get` Secrets and ConfigMaps referenced by generated ActorTemplates. Harnesses in other namespaces need that namespace listed in `rbac.namespaces` (or a matching RoleBinding applied manually). + +Port-forward the UI: + +```bash +kubectl port-forward -n kagent svc/kagent-ui 8001:8080 +``` + +Navigate to the deployed agent harness. If the OpenClaw Control UI asks for a gateway connection, use: + +- Gateway URL: `http://localhost:8001/api/agentharnesses/kagent/peterj-claw/gateway/` +- Gateway token: `test-token` + +The gateway URL must include the trailing slash. The token is the value configured in `spec.substrate.gatewayToken`, or the Secret value referenced by `spec.substrate.gatewayTokenSecretRef`; enter it in the token/credentials field rather than relying on a `token` query parameter. + +kagent proxies UI traffic to the actor OpenClaw gateway through Substrate's **atenet-router** (Envoy) using the actor `Host` header (`.actors.resources.substrate.ate.dev`). The default router URL is `http://atenet-router.ate-system.svc:80`; override with `controller.substrate.atenetRouterURL` when needed. diff --git a/go/api/config/crd/bases/kagent.dev_agentharnesses.yaml b/go/api/config/crd/bases/kagent.dev_agentharnesses.yaml index 308d7ba0f2..9c4c1ee2a6 100644 --- a/go/api/config/crd/bases/kagent.dev_agentharnesses.yaml +++ b/go/api/config/crd/bases/kagent.dev_agentharnesses.yaml @@ -19,6 +19,9 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: + - jsonPath: .spec.runtime + name: Runtime + type: string - jsonPath: .spec.backend name: Backend type: string @@ -511,6 +514,75 @@ spec: type: string type: array type: object + runtime: + default: openshell + description: Runtime selects the harness provisioning stack. Defaults + to openshell when unset. + enum: + - openshell + - substrate + type: string + substrate: + description: Substrate is required when runtime is substrate. + properties: + gatewayToken: + description: |- + GatewayToken is the OpenClaw gateway Bearer token for this harness. + Prefer gatewayTokenSecretRef for production secrets. + minLength: 1 + type: string + gatewayTokenSecretRef: + description: |- + GatewayTokenSecretRef references a Secret key holding the OpenClaw gateway Bearer token. + The Secret must contain a "token" key. + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + required: + - name + type: object + snapshotsConfig: + description: |- + SnapshotsConfig configures actor memory snapshots. Defaults to + gs://ate-snapshots// when unset. + properties: + location: + description: |- + Location is the GCS URI prefix for golden and incremental snapshots. + Example: gs://ate-snapshots/kagent/my-namespace/my-harness/ + pattern: ^gs:// + type: string + required: + - location + type: object + workerPoolRef: + description: |- + WorkerPoolRef references an existing ate.dev WorkerPool in the harness namespace. + When unset, the controller uses its configured default WorkerPool. + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + required: + - name + type: object + workloadImage: + description: WorkloadImage overrides the default nemoclaw/openclaw + sandbox image in the ActorTemplate. + type: string + type: object + x-kubernetes-validations: + - message: Exactly one of gatewayToken or gatewayTokenSecretRef must + be specified + rule: (has(self.gatewayToken) && !has(self.gatewayTokenSecretRef)) + || (!has(self.gatewayToken) && has(self.gatewayTokenSecretRef)) required: - backend type: object @@ -520,6 +592,10 @@ spec: || (has(c.slack) && ((self.backend == ''hermes'' && has(c.slack.hermes) && !has(c.slack.openclaw)) || ((self.backend == ''openclaw'' || self.backend == ''nemoclaw'') && has(c.slack.openclaw) && !has(c.slack.hermes)))))' + - message: spec.substrate may only be set when runtime is substrate + rule: '!has(self.substrate) || self.runtime == ''substrate''' + - message: spec.substrate is required when runtime is substrate + rule: self.runtime != 'substrate' || has(self.substrate) status: description: AgentHarnessStatus is the observed state of an AgentHarness. properties: diff --git a/go/api/httpapi/substrate.go b/go/api/httpapi/substrate.go new file mode 100644 index 0000000000..bbaf83f63c --- /dev/null +++ b/go/api/httpapi/substrate.go @@ -0,0 +1,60 @@ +package httpapi + +// SubstrateStatusResponse aggregates Agent Substrate control-plane and Kubernetes state. +type SubstrateStatusResponse struct { + // Enabled is true when the controller is configured with an ate-api endpoint. + Enabled bool `json:"enabled"` + // AteAPIError is set when ate-api list calls fail (actors/workers may be partial or empty). + AteAPIError string `json:"ateApiError,omitempty"` + + WorkerPools []SubstrateWorkerPoolEntry `json:"workerPools"` + ActorTemplates []SubstrateActorTemplateEntry `json:"actorTemplates"` + Actors []SubstrateActorEntry `json:"actors"` + Workers []SubstrateWorkerEntry `json:"workers"` +} + +// SubstrateWorkerPoolEntry is a ate.dev WorkerPool CR. +type SubstrateWorkerPoolEntry struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + Replicas int32 `json:"replicas"` + AteomImage string `json:"ateomImage"` +} + +// SubstrateActorTemplateEntry is a ate.dev ActorTemplate CR. +type SubstrateActorTemplateEntry struct { + Namespace string `json:"namespace"` + Name string `json:"name"` + Phase string `json:"phase,omitempty"` + GoldenActorID string `json:"goldenActorId,omitempty"` + GoldenSnapshot string `json:"goldenSnapshot,omitempty"` + WorkerPoolRef string `json:"workerPoolRef,omitempty"` + HarnessName string `json:"harnessName,omitempty"` + ManagedByKagent bool `json:"managedByKagent"` +} + +// SubstrateActorEntry is runtime state from ate-api (redis). +type SubstrateActorEntry struct { + ActorID string `json:"actorId"` + Status string `json:"status"` + ActorTemplateNamespace string `json:"actorTemplateNamespace,omitempty"` + ActorTemplateName string `json:"actorTemplateName,omitempty"` + AteomPodNamespace string `json:"ateomPodNamespace,omitempty"` + AteomPodName string `json:"ateomPodName,omitempty"` + AteomPodIP string `json:"ateomPodIp,omitempty"` + LastSnapshot string `json:"lastSnapshot,omitempty"` + InProgressSnapshot string `json:"inProgressSnapshot,omitempty"` + Version int64 `json:"version,omitempty"` +} + +// SubstrateWorkerEntry is a worker assignment from ate-api (redis). +type SubstrateWorkerEntry struct { + WorkerNamespace string `json:"workerNamespace"` + WorkerPool string `json:"workerPool"` + WorkerPod string `json:"workerPod"` + ActorNamespace string `json:"actorNamespace,omitempty"` + ActorTemplate string `json:"actorTemplate,omitempty"` + ActorID string `json:"actorId,omitempty"` + IP string `json:"ip,omitempty"` + Version int64 `json:"version,omitempty"` +} diff --git a/go/api/httpapi/types.go b/go/api/httpapi/types.go index ec80d49ea1..d704eb549a 100644 --- a/go/api/httpapi/types.go +++ b/go/api/httpapi/types.go @@ -144,6 +144,17 @@ type OpenshellAgentHarnessListEntry struct { Endpoint string `json:"endpoint,omitempty"` } +// SubstrateAgentHarnessListEntry is set when runtime is substrate. +type SubstrateAgentHarnessListEntry struct { + Backend v1alpha2.AgentHarnessBackendType `json:"backend"` + Runtime v1alpha2.AgentHarnessRuntime `json:"runtime"` + ActorID string `json:"actorId,omitempty"` + GatewayUIPath string `json:"gatewayUIPath,omitempty"` + ModelConfigRef string `json:"modelConfigRef,omitempty"` + BackendRefID string `json:"backendRefId,omitempty"` + Endpoint string `json:"endpoint,omitempty"` +} + type AgentResponse struct { ID string `json:"id"` Agent *AgentResource `json:"agent"` @@ -157,6 +168,7 @@ type AgentResponse struct { Accepted bool `json:"accepted"` WorkloadMode v1alpha2.WorkloadMode `json:"workloadMode,omitempty"` OpenshellAgentHarness *OpenshellAgentHarnessListEntry `json:"openshellAgentHarness,omitempty"` + SubstrateAgentHarness *SubstrateAgentHarnessListEntry `json:"substrateAgentHarness,omitempty"` } // Session types diff --git a/go/api/v1alpha2/agentharness_types.go b/go/api/v1alpha2/agentharness_types.go index c6a43f6c02..ee74118827 100644 --- a/go/api/v1alpha2/agentharness_types.go +++ b/go/api/v1alpha2/agentharness_types.go @@ -37,6 +37,58 @@ func IsKnownAgentHarnessBackend(b AgentHarnessBackendType) bool { } } +// AgentHarnessRuntime selects which control plane provisions the harness VM. +// +kubebuilder:validation:Enum=openshell;substrate +type AgentHarnessRuntime string + +const ( + AgentHarnessRuntimeOpenshell AgentHarnessRuntime = "openshell" + AgentHarnessRuntimeSubstrate AgentHarnessRuntime = "substrate" +) + +// AgentHarnessSubstrateSnapshotsConfig points at a GCS prefix for actor memory snapshots. +// Substrate currently expects a gs:// location (see Agent Substrate SnapshotsConfig). +type AgentHarnessSubstrateSnapshotsConfig struct { + // Location is the GCS URI prefix for golden and incremental snapshots. + // Example: gs://ate-snapshots/kagent/my-namespace/my-harness/ + // +required + // +kubebuilder:validation:Pattern=`^gs://` + Location string `json:"location"` +} + +// AgentHarnessSubstrateSpec configures Agent Substrate (WorkerPool + ActorTemplate + Actor). +// +// kagent generates a per-harness ActorTemplate and creates an Actor from it. WorkerPool +// capacity is referenced from workerPoolRef or the controller default; it is not +// created or deleted by the AgentHarness controller. +// +kubebuilder:validation:XValidation:rule="(has(self.gatewayToken) && !has(self.gatewayTokenSecretRef)) || (!has(self.gatewayToken) && has(self.gatewayTokenSecretRef))",message="Exactly one of gatewayToken or gatewayTokenSecretRef must be specified" +type AgentHarnessSubstrateSpec struct { + // WorkerPoolRef references an existing ate.dev WorkerPool in the harness namespace. + // When unset, the controller uses its configured default WorkerPool. + // +optional + WorkerPoolRef *TypedLocalReference `json:"workerPoolRef,omitempty"` + + // SnapshotsConfig configures actor memory snapshots. Defaults to + // gs://ate-snapshots// when unset. + // +optional + SnapshotsConfig *AgentHarnessSubstrateSnapshotsConfig `json:"snapshotsConfig,omitempty"` + + // WorkloadImage overrides the default nemoclaw/openclaw sandbox image in the ActorTemplate. + // +optional + WorkloadImage string `json:"workloadImage,omitempty"` + + // GatewayToken is the OpenClaw gateway Bearer token for this harness. + // Prefer gatewayTokenSecretRef for production secrets. + // +optional + // +kubebuilder:validation:MinLength=1 + GatewayToken string `json:"gatewayToken,omitempty"` + + // GatewayTokenSecretRef references a Secret key holding the OpenClaw gateway Bearer token. + // The Secret must contain a "token" key. + // +optional + GatewayTokenSecretRef *TypedLocalReference `json:"gatewayTokenSecretRef,omitempty"` +} + // AgentHarnessChannelType selects a messenger integration for OpenClaw harness VMs. // +kubebuilder:validation:Enum=telegram;slack type AgentHarnessChannelType string @@ -153,11 +205,22 @@ type AgentHarnessChannel struct { // in. The backend is responsible for provisioning an environment that stays // ready to accept incoming commands. // +kubebuilder:validation:XValidation:rule="!has(self.channels) || self.channels.all(c, c.type != 'slack' || (has(c.slack) && ((self.backend == 'hermes' && has(c.slack.hermes) && !has(c.slack.openclaw)) || ((self.backend == 'openclaw' || self.backend == 'nemoclaw') && has(c.slack.openclaw) && !has(c.slack.hermes)))))",message="slack backend-specific settings must match spec.backend" +// +kubebuilder:validation:XValidation:rule="!has(self.substrate) || self.runtime == 'substrate'",message="spec.substrate may only be set when runtime is substrate" +// +kubebuilder:validation:XValidation:rule="self.runtime != 'substrate' || has(self.substrate)",message="spec.substrate is required when runtime is substrate" type AgentHarnessSpec struct { // Backend selects the control plane to use. Required. // +required Backend AgentHarnessBackendType `json:"backend"` + // Runtime selects the harness provisioning stack. Defaults to openshell when unset. + // +optional + // +kubebuilder:default=openshell + Runtime AgentHarnessRuntime `json:"runtime,omitempty"` + + // Substrate is required when runtime is substrate. + // +optional + Substrate *AgentHarnessSubstrateSpec `json:"substrate,omitempty"` + // Description is a short human-readable summary shown in the UI (e.g. agents list). // +optional Description string `json:"description,omitempty"` @@ -234,13 +297,17 @@ type AgentHarnessStatus struct { // AgentHarnessConditionType enumerates the condition types an AgentHarness may report. const ( - AgentHarnessConditionTypeReady = "Ready" - AgentHarnessConditionTypeAccepted = "Accepted" + AgentHarnessConditionTypeReady = "Ready" + AgentHarnessConditionTypeAccepted = "Accepted" + AgentHarnessConditionTypeActorTemplateReady = "ActorTemplateReady" + AgentHarnessConditionTypeActorReady = "ActorReady" + AgentHarnessConditionTypeBootstrapReady = "BootstrapReady" ) // +kubebuilder:object:root=true // +kubebuilder:resource:path=agentharnesses,singular=agentharness,shortName=ahr,categories=kagent // +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Runtime",type="string",JSONPath=".spec.runtime" // +kubebuilder:printcolumn:name="Backend",type="string",JSONPath=".spec.backend" // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // +kubebuilder:printcolumn:name="ID",type="string",JSONPath=".status.backendRef.id" diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index 52d10ed714..1a7dfd4e90 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -295,6 +295,11 @@ func (in *AgentHarnessSlackChannelSpec) DeepCopy() *AgentHarnessSlackChannelSpec // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AgentHarnessSpec) DeepCopyInto(out *AgentHarnessSpec) { *out = *in + if in.Substrate != nil { + in, out := &in.Substrate, &out.Substrate + *out = new(AgentHarnessSubstrateSpec) + (*in).DeepCopyInto(*out) + } if in.Env != nil { in, out := &in.Env, &out.Env *out = make([]v1.EnvVar, len(*in)) @@ -373,6 +378,51 @@ func (in *AgentHarnessStatusRef) DeepCopy() *AgentHarnessStatusRef { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AgentHarnessSubstrateSnapshotsConfig) DeepCopyInto(out *AgentHarnessSubstrateSnapshotsConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AgentHarnessSubstrateSnapshotsConfig. +func (in *AgentHarnessSubstrateSnapshotsConfig) DeepCopy() *AgentHarnessSubstrateSnapshotsConfig { + if in == nil { + return nil + } + out := new(AgentHarnessSubstrateSnapshotsConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AgentHarnessSubstrateSpec) DeepCopyInto(out *AgentHarnessSubstrateSpec) { + *out = *in + if in.WorkerPoolRef != nil { + in, out := &in.WorkerPoolRef, &out.WorkerPoolRef + *out = new(TypedLocalReference) + **out = **in + } + if in.SnapshotsConfig != nil { + in, out := &in.SnapshotsConfig, &out.SnapshotsConfig + *out = new(AgentHarnessSubstrateSnapshotsConfig) + **out = **in + } + if in.GatewayTokenSecretRef != nil { + in, out := &in.GatewayTokenSecretRef, &out.GatewayTokenSecretRef + *out = new(TypedLocalReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AgentHarnessSubstrateSpec. +func (in *AgentHarnessSubstrateSpec) DeepCopy() *AgentHarnessSubstrateSpec { + if in == nil { + return nil + } + out := new(AgentHarnessSubstrateSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *AgentHarnessTelegramChannelSpec) DeepCopyInto(out *AgentHarnessTelegramChannelSpec) { *out = *in diff --git a/go/core/internal/controller/agentharness_controller.go b/go/core/internal/controller/agentharness_controller.go deleted file mode 100644 index a73d7a7835..0000000000 --- a/go/core/internal/controller/agentharness_controller.go +++ /dev/null @@ -1,256 +0,0 @@ -/* -Copyright 2026. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 -*/ - -package controller - -import ( - "context" - "fmt" - "strconv" - "time" - - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/events" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/predicate" - - "github.com/kagent-dev/kagent/go/api/v1alpha2" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" -) - -const ( - // agentHarnessFinalizer guarantees the backend sandbox is deleted before the - // Kubernetes object is removed. - agentHarnessFinalizer = "kagent.dev/agent-harness-backend-cleanup" - - // agentHarnessNotReadyRequeue is how long we wait before re-polling backend - // status while the sandbox is still provisioning. - agentHarnessNotReadyRequeue = 10 * time.Second - - // annotationAgentHarnessBootstrapGeneration records the AgentHarness metadata.generation for which - // post-ready bootstrap (backend OnAgentHarnessReady, e.g. exec hooks) already completed. - annotationAgentHarnessBootstrapGeneration = "kagent.dev/agent-harness-bootstrap-generation" -) - -// AgentHarnessController reconciles a kagent.dev/v1alpha2 AgentHarness against an -// AsyncBackend. It is intentionally independent of the SandboxAgent path — -// harness VMs are a generic exec/SSH-able environment with no in-cluster -// workload owned by kagent. -type AgentHarnessController struct { - Client client.Client - Recorder events.EventRecorder - Backends map[v1alpha2.AgentHarnessBackendType]sandboxbackend.AsyncBackend -} - -// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses/finalizers,verbs=update - -func (r *AgentHarnessController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx).WithValues("agentHarness", req.NamespacedName) - - var ah v1alpha2.AgentHarness - if err := r.Client.Get(ctx, req.NamespacedName, &ah); err != nil { - if apierrors.IsNotFound(err) { - return ctrl.Result{}, nil - } - return ctrl.Result{}, fmt.Errorf("get AgentHarness: %w", err) - } - - if !ah.DeletionTimestamp.IsZero() { - return r.reconcileDelete(ctx, &ah) - } - - if controllerutil.AddFinalizer(&ah, agentHarnessFinalizer) { - if err := r.Client.Update(ctx, &ah); err != nil { - return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) - } - return ctrl.Result{Requeue: true}, nil - } - - backend := r.Backends[ah.Spec.Backend] - if backend == nil { - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, - "BackendUnavailable", - fmt.Sprintf("no backend configured for %q", ah.Spec.Backend)) - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, - "BackendUnavailable", "") - if err := r.patchAgentHarnessStatus(ctx, &ah); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil - } - - res, err := backend.EnsureAgentHarness(ctx, &ah) - if err != nil { - log.Error(err, "EnsureAgentHarness failed") - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, - "EnsureFailed", err.Error()) - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, - "EnsureFailed", err.Error()) - if perr := r.patchAgentHarnessStatus(ctx, &ah); perr != nil { - return ctrl.Result{}, perr - } - return ctrl.Result{}, err - } - - ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{ - Backend: ah.Spec.Backend, - ID: res.Handle.ID, - } - if res.Endpoint != "" { - ah.Status.Connection = &v1alpha2.AgentHarnessConnection{Endpoint: res.Endpoint} - } - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, - "AgentHarnessAccepted", "backend accepted sandbox request") - - st, reason, msg := backend.GetStatus(ctx, res.Handle) - pending := r.postReadyBootstrapPending(&ah) - if st == metav1.ConditionTrue && pending { - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, - "BootstrapPending", - "gateway sandbox is ready; waiting for post-ready bootstrap (OnAgentHarnessReady) to finish") - } else { - setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, st, reason, msg) - } - ah.Status.ObservedGeneration = ah.Generation - - if err := r.patchAgentHarnessStatus(ctx, &ah); err != nil { - return ctrl.Result{}, err - } - - if st != metav1.ConditionTrue { - return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil - } - if pending { - if err := r.maybePostReadyBootstrap(ctx, client.ObjectKeyFromObject(&ah), &ah, res.Handle, backend); err != nil { - log.Error(err, "post-ready sandbox bootstrap failed") - return ctrl.Result{}, err - } - var latest v1alpha2.AgentHarness - if err := r.Client.Get(ctx, req.NamespacedName, &latest); err != nil { - return ctrl.Result{}, fmt.Errorf("get AgentHarness after bootstrap: %w", err) - } - st2, reason2, msg2 := backend.GetStatus(ctx, res.Handle) - setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeReady, st2, reason2, msg2) - latest.Status.ObservedGeneration = latest.Generation - if err := r.Client.Status().Update(ctx, &latest); err != nil { - return ctrl.Result{}, fmt.Errorf("update AgentHarness status after bootstrap: %w", err) - } - } - return ctrl.Result{}, nil -} - -func (r *AgentHarnessController) postReadyBootstrapPending(ah *v1alpha2.AgentHarness) bool { - wantGen := strconv.FormatInt(ah.Generation, 10) - if ah.Annotations != nil && ah.Annotations[annotationAgentHarnessBootstrapGeneration] == wantGen { - return false - } - return true -} - -func (r *AgentHarnessController) maybePostReadyBootstrap(ctx context.Context, key client.ObjectKey, ah *v1alpha2.AgentHarness, h sandboxbackend.Handle, async sandboxbackend.AsyncBackend) error { - if !r.postReadyBootstrapPending(ah) { - return nil - } - wantGen := strconv.FormatInt(ah.Generation, 10) - if err := async.OnAgentHarnessReady(ctx, ah, h); err != nil { - return err - } - var fresh v1alpha2.AgentHarness - if err := r.Client.Get(ctx, key, &fresh); err != nil { - return fmt.Errorf("get AgentHarness after bootstrap: %w", err) - } - base := fresh.DeepCopy() - if fresh.Annotations == nil { - fresh.Annotations = map[string]string{} - } - fresh.Annotations[annotationAgentHarnessBootstrapGeneration] = wantGen - if err := r.Client.Patch(ctx, &fresh, client.MergeFrom(base)); err != nil { - return fmt.Errorf("patch AgentHarness bootstrap-generation annotation: %w", err) - } - ctrl.LoggerFrom(ctx).WithValues("agentHarness", key.String()).Info( - "recorded post-ready bootstrap for AgentHarness generation", "generation", ah.Generation) - return nil -} - -func (r *AgentHarnessController) reconcileDelete(ctx context.Context, ah *v1alpha2.AgentHarness) (ctrl.Result, error) { - if !controllerutil.ContainsFinalizer(ah, agentHarnessFinalizer) { - return ctrl.Result{}, nil - } - - if ah.Status.BackendRef != nil && ah.Status.BackendRef.ID != "" { - del := r.Backends[ah.Status.BackendRef.Backend] - if del != nil { - if err := del.DeleteAgentHarness(ctx, sandboxbackend.Handle{ID: ah.Status.BackendRef.ID}); err != nil { - if r.Recorder != nil { - r.Recorder.Eventf(ah, nil, "Warning", "AgentHarnessDeleteFailed", "DeleteAgentHarness", "%s", err.Error()) - } - return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, err - } - } - } - - controllerutil.RemoveFinalizer(ah, agentHarnessFinalizer) - if err := r.Client.Update(ctx, ah); err != nil { - return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) - } - return ctrl.Result{}, nil -} - -func (r *AgentHarnessController) patchAgentHarnessStatus(ctx context.Context, ah *v1alpha2.AgentHarness) error { - if err := r.Client.Status().Update(ctx, ah); err != nil { - return fmt.Errorf("update AgentHarness status: %w", err) - } - return nil -} - -func setAgentHarnessCondition(ah *v1alpha2.AgentHarness, t string, s metav1.ConditionStatus, reason, msg string) { - now := metav1.Now() - for i := range ah.Status.Conditions { - c := &ah.Status.Conditions[i] - if c.Type != t { - continue - } - if c.Status != s { - c.LastTransitionTime = now - } - c.Status = s - c.Reason = reason - c.Message = msg - c.ObservedGeneration = ah.Generation - return - } - ah.Status.Conditions = append(ah.Status.Conditions, metav1.Condition{ - Type: t, - Status: s, - Reason: reason, - Message: msg, - LastTransitionTime: now, - ObservedGeneration: ah.Generation, - }) -} - -// SetupWithManager registers the controller with the manager. -func (r *AgentHarnessController) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - WithOptions(controller.Options{NeedLeaderElection: new(true)}). - For(&v1alpha2.AgentHarness{}, builder.WithPredicates(predicate.Or( - predicate.GenerationChangedPredicate{}, - predicate.LabelChangedPredicate{}, - ))). - Named("agentharness"). - Complete(r) -} diff --git a/go/core/internal/controller/agentharness_controller_test.go b/go/core/internal/controller/agentharness_controller_test.go new file mode 100644 index 0000000000..7d09b37dee --- /dev/null +++ b/go/core/internal/controller/agentharness_controller_test.go @@ -0,0 +1,289 @@ +package controller + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/require" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" +) + +type fakeSubstrateLifecycle struct { + state substrate.LifecycleState + ensureErr error + cleanupDone bool + cleanupErr error + ensureCalls int + cleanupCalls int +} + +func (f *fakeSubstrateLifecycle) EnsureGeneratedTemplate(_ context.Context, _ *v1alpha2.AgentHarness) (substrate.LifecycleState, error) { + f.ensureCalls++ + return f.state, f.ensureErr +} + +func (f *fakeSubstrateLifecycle) CleanupGeneratedTemplate(_ context.Context, _ *v1alpha2.AgentHarness) (bool, error) { + f.cleanupCalls++ + return f.cleanupDone, f.cleanupErr +} + +type fakeAgentHarnessBackend struct { + ensureCalls int + deleteCalls int + readyCalls int + + ensureHandle string + endpoint string + status metav1.ConditionStatus + reason string + message string + + deleteDone bool + deleteErr error + readyErr error +} + +func (f *fakeAgentHarnessBackend) Name() v1alpha2.AgentHarnessBackendType { + return v1alpha2.AgentHarnessBackendOpenClaw +} + +func (f *fakeAgentHarnessBackend) EnsureAgentHarness(context.Context, *v1alpha2.AgentHarness) (sandboxbackend.EnsureResult, error) { + f.ensureCalls++ + id := f.ensureHandle + if id == "" { + id = "actor-1" + } + return sandboxbackend.EnsureResult{ + Handle: sandboxbackend.Handle{ID: id}, + Endpoint: f.endpoint, + }, nil +} + +func (f *fakeAgentHarnessBackend) GetStatus(context.Context, sandboxbackend.Handle) (metav1.ConditionStatus, string, string) { + st := f.status + if st == "" { + st = metav1.ConditionTrue + } + reason := f.reason + if reason == "" { + reason = "Running" + } + return st, reason, f.message +} + +func (f *fakeAgentHarnessBackend) DeleteAgentHarness(context.Context, sandboxbackend.Handle) (bool, error) { + f.deleteCalls++ + return f.deleteDone, f.deleteErr +} + +func (f *fakeAgentHarnessBackend) OnAgentHarnessReady(context.Context, *v1alpha2.AgentHarness, sandboxbackend.Handle) error { + f.readyCalls++ + return f.readyErr +} + +func TestAgentHarnessController_SubstrateWaitsForGeneratedTemplate(t *testing.T) { + ctx := context.Background() + ah := newSubstrateHarness("kagent", "claw") + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{state: substrate.LifecycleState{ActorTemplateReady: false}} + backend := &fakeAgentHarnessBackend{} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + result, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, agentHarnessNotReadyRequeue, result.RequeueAfter) + require.Equal(t, 1, lifecycle.ensureCalls) + require.Zero(t, backend.ensureCalls, "actor backend must not run before ActorTemplate is ready") + + latest := getAgentHarness(t, controller.Client, ah) + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, "SubstrateLifecyclePending") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, metav1.ConditionFalse, "NotReady") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorReady, metav1.ConditionFalse, "ActorNotCreated") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, "ActorTemplateNotReady") +} + +func TestAgentHarnessController_SubstrateLifecycleErrorSetsStatus(t *testing.T) { + ctx := context.Background() + ah := newSubstrateHarness("kagent", "claw") + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{ensureErr: errors.New("workerpool missing")} + backend := &fakeAgentHarnessBackend{} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + _, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.ErrorContains(t, err, "workerpool missing") + require.Equal(t, 1, lifecycle.ensureCalls) + require.Zero(t, backend.ensureCalls) + + latest := getAgentHarness(t, controller.Client, ah) + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, "SubstrateLifecycleFailed") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, "SubstrateLifecycleFailed") +} + +func TestAgentHarnessController_SubstrateReadyCreatesActorAndRunsBootstrap(t *testing.T) { + ctx := context.Background() + ah := newSubstrateHarness("kagent", "claw") + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{state: substrate.LifecycleState{ActorTemplateReady: true}} + backend := &fakeAgentHarnessBackend{ensureHandle: "actor-1", endpoint: "kagent gateway: /api/agentharnesses/kagent/claw/gateway/"} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + result, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + require.Equal(t, 1, lifecycle.ensureCalls) + require.Equal(t, 1, backend.ensureCalls) + require.Equal(t, 1, backend.readyCalls) + + latest := getAgentHarness(t, controller.Client, ah) + require.NotNil(t, latest.Status.BackendRef) + require.Equal(t, "actor-1", latest.Status.BackendRef.ID) + require.NotNil(t, latest.Status.Connection) + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, "AgentHarnessAccepted") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, metav1.ConditionTrue, "Ready") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorReady, metav1.ConditionTrue, "Running") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionTrue, "BootstrapComplete") + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionTrue, "Running") + + result, err = controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + require.Equal(t, 1, backend.readyCalls, "bootstrap should not rerun for an already bootstrapped generation") +} + +func TestAgentHarnessController_SubstrateDeleteWaitsForActorBeforeTemplateCleanup(t *testing.T) { + ctx := context.Background() + ah := newDeletingSubstrateHarness("kagent", "claw") + ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{Backend: v1alpha2.AgentHarnessBackendOpenClaw, ID: "actor-1"} + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{cleanupDone: true} + backend := &fakeAgentHarnessBackend{deleteDone: false} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + result, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, agentHarnessNotReadyRequeue, result.RequeueAfter) + require.Equal(t, 1, backend.deleteCalls) + require.Zero(t, lifecycle.cleanupCalls, "template cleanup must wait for harness actor deletion") + + latest := getAgentHarness(t, controller.Client, ah) + require.NotNil(t, latest.Status.BackendRef) + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorReady, metav1.ConditionFalse, "ActorDeleting") + require.Contains(t, latest.Finalizers, agentHarnessFinalizer) +} + +func TestAgentHarnessController_SubstrateDeleteWaitsForGeneratedTemplateCleanup(t *testing.T) { + ctx := context.Background() + ah := newDeletingSubstrateHarness("kagent", "claw") + ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{Backend: v1alpha2.AgentHarnessBackendOpenClaw, ID: "actor-1"} + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{cleanupDone: false} + backend := &fakeAgentHarnessBackend{deleteDone: true} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + result, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, agentHarnessNotReadyRequeue, result.RequeueAfter) + require.Equal(t, 1, backend.deleteCalls) + require.Equal(t, 1, lifecycle.cleanupCalls) + + latest := getAgentHarness(t, controller.Client, ah) + require.Nil(t, latest.Status.BackendRef) + requireCondition(t, latest, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, metav1.ConditionFalse, "GoldenActorDeleting") + require.Contains(t, latest.Finalizers, agentHarnessFinalizer) +} + +func TestAgentHarnessController_SubstrateDeleteRemovesFinalizerAfterCleanup(t *testing.T) { + ctx := context.Background() + ah := newDeletingSubstrateHarness("kagent", "claw") + ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{Backend: v1alpha2.AgentHarnessBackendOpenClaw, ID: "actor-1"} + controller := newAgentHarnessTestController(t, ah) + lifecycle := &fakeSubstrateLifecycle{cleanupDone: true} + backend := &fakeAgentHarnessBackend{deleteDone: true} + controller.SubstrateLifecycle = lifecycle + controller.OpenClawBackend = backend + + result, err := controller.Reconcile(ctx, ctrl.Request{NamespacedName: client.ObjectKeyFromObject(ah)}) + require.NoError(t, err) + require.Equal(t, ctrl.Result{}, result) + require.Equal(t, 1, backend.deleteCalls) + require.Equal(t, 1, lifecycle.cleanupCalls) + + var latest v1alpha2.AgentHarness + err = controller.Client.Get(ctx, client.ObjectKeyFromObject(ah), &latest) + require.True(t, apierrors.IsNotFound(err), "fake client should complete deletion after finalizer removal") +} + +func newAgentHarnessTestController(t *testing.T, objects ...client.Object) *SubstrateAgentHarnessController { + t.Helper() + scheme := runtime.NewScheme() + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + kube := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + WithStatusSubresource(&v1alpha2.AgentHarness{}). + Build() + return &SubstrateAgentHarnessController{Client: kube} +} + +func newSubstrateHarness(namespace, name string) *v1alpha2.AgentHarness { + return &v1alpha2.AgentHarness{ + TypeMeta: metav1.TypeMeta{APIVersion: v1alpha2.GroupVersion.String(), Kind: "AgentHarness"}, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Generation: 1, + Finalizers: []string{agentHarnessFinalizer}, + }, + Spec: v1alpha2.AgentHarnessSpec{ + Runtime: v1alpha2.AgentHarnessRuntimeSubstrate, + Backend: v1alpha2.AgentHarnessBackendOpenClaw, + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "token", + }, + }, + } +} + +func newDeletingSubstrateHarness(namespace, name string) *v1alpha2.AgentHarness { + ah := newSubstrateHarness(namespace, name) + now := metav1.Now() + ah.DeletionTimestamp = &now + return ah +} + +func getAgentHarness(t *testing.T, kube client.Client, ah *v1alpha2.AgentHarness) *v1alpha2.AgentHarness { + t.Helper() + var latest v1alpha2.AgentHarness + err := kube.Get(context.Background(), client.ObjectKeyFromObject(ah), &latest) + if apierrors.IsNotFound(err) { + t.Fatalf("AgentHarness %s unexpectedly not found", client.ObjectKeyFromObject(ah)) + } + require.NoError(t, err) + return &latest +} + +func requireCondition(t *testing.T, ah *v1alpha2.AgentHarness, conditionType string, status metav1.ConditionStatus, reason string) { + t.Helper() + condition := meta.FindStatusCondition(ah.Status.Conditions, conditionType) + require.NotNil(t, condition, "missing condition %s", conditionType) + require.Equal(t, status, condition.Status, "condition %s status", conditionType) + require.Equal(t, reason, condition.Reason, "condition %s reason", conditionType) +} diff --git a/go/core/internal/controller/agentharness_openshell_controller.go b/go/core/internal/controller/agentharness_openshell_controller.go new file mode 100644 index 0000000000..be25f2f7d0 --- /dev/null +++ b/go/core/internal/controller/agentharness_openshell_controller.go @@ -0,0 +1,205 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package controller + +import ( + "context" + "fmt" + + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/events" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" +) + +// OpenShellAgentHarnessController reconciles AgentHarness resources that use the +// OpenShell runtime. +type OpenShellAgentHarnessController struct { + Client client.Client + Recorder events.EventRecorder + OpenClawBackend sandboxbackend.AsyncBackend + HermesBackend sandboxbackend.AsyncBackend +} + +func (r *OpenShellAgentHarnessController) backendFor(ah *v1alpha2.AgentHarness) sandboxbackend.AsyncBackend { + switch ah.Spec.Backend { + case v1alpha2.AgentHarnessBackendOpenClaw, v1alpha2.AgentHarnessBackendNemoClaw: + return r.OpenClawBackend + case v1alpha2.AgentHarnessBackendHermes: + return r.HermesBackend + default: + return nil + } +} + +func (r *OpenShellAgentHarnessController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx).WithValues("agentHarness", req.NamespacedName) + + var ah v1alpha2.AgentHarness + if err := r.Client.Get(ctx, req.NamespacedName, &ah); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("get AgentHarness: %w", err) + } + if effectiveAgentHarnessRuntime(&ah) != v1alpha2.AgentHarnessRuntimeOpenshell { + return ctrl.Result{}, nil + } + + if !ah.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, &ah) + } + + if controllerutil.AddFinalizer(&ah, agentHarnessFinalizer) { + if err := r.Client.Update(ctx, &ah); err != nil { + return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) + } + return ctrl.Result{Requeue: true}, nil + } + + backend := r.backendFor(&ah) + if backend == nil { + return reconcileBackendUnavailable(ctx, r.Client, &ah, v1alpha2.AgentHarnessRuntimeOpenshell) + } + + return r.reconcileBackend(ctx, req, &ah, backend, log) +} + +func (r *OpenShellAgentHarnessController) reconcileBackend(ctx context.Context, req ctrl.Request, ah *v1alpha2.AgentHarness, backend sandboxbackend.AsyncBackend, log logr.Logger) (ctrl.Result, error) { + res, err := backend.EnsureAgentHarness(ctx, ah) + if err != nil { + log.Error(err, "EnsureAgentHarness failed") + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, + "EnsureFailed", err.Error()) + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "EnsureFailed", err.Error()) + if perr := patchAgentHarnessStatus(ctx, r.Client, ah); perr != nil { + return ctrl.Result{}, perr + } + return ctrl.Result{}, err + } + + ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{ + Backend: ah.Spec.Backend, + ID: res.Handle.ID, + } + if res.Endpoint != "" { + ah.Status.Connection = &v1alpha2.AgentHarnessConnection{Endpoint: res.Endpoint} + } + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, + "AgentHarnessAccepted", "backend accepted sandbox request") + + st, reason, msg := backend.GetStatus(ctx, res.Handle) + pending := postReadyBootstrapPending(ah) + if st == metav1.ConditionTrue && pending { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, st, reason, msg) + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionFalse, + "BootstrapPending", + "waiting for post-ready bootstrap (OnAgentHarnessReady) to finish") + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "BootstrapPending", + "gateway sandbox is ready; waiting for post-ready bootstrap (OnAgentHarnessReady) to finish") + } else { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, st, reason, msg) + if pending { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionFalse, + "ActorNotReady", "waiting for actor before post-ready bootstrap") + } + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, st, reason, msg) + } + ah.Status.ObservedGeneration = ah.Generation + + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + + if st != metav1.ConditionTrue { + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + if pending { + if err := maybePostReadyBootstrap(ctx, client.ObjectKeyFromObject(ah), ah, res.Handle, backend); err != nil { + log.Error(err, "post-ready sandbox bootstrap failed") + return ctrl.Result{}, err + } + var latest v1alpha2.AgentHarness + if err := r.Client.Get(ctx, req.NamespacedName, &latest); err != nil { + return ctrl.Result{}, fmt.Errorf("get AgentHarness after bootstrap: %w", err) + } + st2, reason2, msg2 := backend.GetStatus(ctx, res.Handle) + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeActorReady, st2, reason2, msg2) + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionTrue, + "BootstrapComplete", "post-ready bootstrap completed") + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeReady, st2, reason2, msg2) + latest.Status.ObservedGeneration = latest.Generation + if err := r.Client.Status().Update(ctx, &latest); err != nil { + return ctrl.Result{}, fmt.Errorf("update AgentHarness status after bootstrap: %w", err) + } + } + return ctrl.Result{}, nil +} + +func (r *OpenShellAgentHarnessController) reconcileDelete(ctx context.Context, ah *v1alpha2.AgentHarness) (ctrl.Result, error) { + if !controllerutil.ContainsFinalizer(ah, agentHarnessFinalizer) { + return ctrl.Result{}, nil + } + + if ah.Status.BackendRef != nil { + actorID := ah.Status.BackendRef.ID + if actorID != "" { + backend := r.backendFor(ah) + actorDone := true + var err error + if backend != nil { + actorDone, err = backend.DeleteAgentHarness(ctx, sandboxbackend.Handle{ID: actorID}) + } + if err != nil { + if r.Recorder != nil { + r.Recorder.Eventf(ah, nil, "Warning", "AgentHarnessDeleteFailed", "DeleteAgentHarness", "%s", err.Error()) + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, err + } + if !actorDone { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, + metav1.ConditionFalse, "ActorDeleting", fmt.Sprintf("waiting for backend actor %q deletion", actorID)) + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + } + ah.Status.BackendRef = nil + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + } + + controllerutil.RemoveFinalizer(ah, agentHarnessFinalizer) + if err := r.Client.Update(ctx, ah); err != nil { + return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) + } + return ctrl.Result{}, nil +} + +// SetupWithManager registers the OpenShell AgentHarness controller with the manager. +func (r *OpenShellAgentHarnessController) SetupWithManager(mgr ctrl.Manager) error { + b := ctrl.NewControllerManagedBy(mgr). + WithOptions(controller.Options{NeedLeaderElection: new(true)}). + For(&v1alpha2.AgentHarness{}, builder.WithPredicates(agentHarnessRuntimePredicate(v1alpha2.AgentHarnessRuntimeOpenshell))) + return b.Named("agentharness-openshell").Complete(r) +} diff --git a/go/core/internal/controller/agentharness_shared.go b/go/core/internal/controller/agentharness_shared.go new file mode 100644 index 0000000000..d0fd40cb22 --- /dev/null +++ b/go/core/internal/controller/agentharness_shared.go @@ -0,0 +1,148 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package controller + +import ( + "context" + "fmt" + "reflect" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" +) + +const ( + // agentHarnessFinalizer guarantees the backend sandbox is deleted before the + // Kubernetes object is removed. + agentHarnessFinalizer = "kagent.dev/agent-harness-backend-cleanup" + + // agentHarnessNotReadyRequeue is how long we wait before re-polling backend + // status while the sandbox is still provisioning. + agentHarnessNotReadyRequeue = 10 * time.Second +) + +// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=kagent.dev,resources=agentharnesses/finalizers,verbs=update + +func reconcileBackendUnavailable(ctx context.Context, kube client.Client, ah *v1alpha2.AgentHarness, runtime v1alpha2.AgentHarnessRuntime) (ctrl.Result, error) { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, + "BackendUnavailable", + fmt.Sprintf("no %s backend configured for %q", runtime, ah.Spec.Backend)) + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "BackendUnavailable", "") + if err := patchAgentHarnessStatus(ctx, kube, ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func postReadyBootstrapPending(ah *v1alpha2.AgentHarness) bool { + cond := meta.FindStatusCondition(ah.Status.Conditions, v1alpha2.AgentHarnessConditionTypeBootstrapReady) + return cond == nil || cond.ObservedGeneration != ah.Generation || cond.Status != metav1.ConditionTrue +} + +func maybePostReadyBootstrap(ctx context.Context, key client.ObjectKey, ah *v1alpha2.AgentHarness, h sandboxbackend.Handle, async sandboxbackend.AsyncBackend) error { + if !postReadyBootstrapPending(ah) { + return nil + } + if err := async.OnAgentHarnessReady(ctx, ah, h); err != nil { + return err + } + ctrl.LoggerFrom(ctx).WithValues("agentHarness", key.String()).Info( + "recorded post-ready bootstrap for AgentHarness generation", "generation", ah.Generation) + return nil +} + +func patchAgentHarnessStatus(ctx context.Context, kube client.Client, ah *v1alpha2.AgentHarness) error { + var current v1alpha2.AgentHarness + if err := kube.Get(ctx, client.ObjectKeyFromObject(ah), ¤t); err != nil { + return fmt.Errorf("get AgentHarness before status update: %w", err) + } + if reflect.DeepEqual(current.Status, ah.Status) { + *ah = current + return nil + } + current.Status = ah.Status + if err := kube.Status().Update(ctx, ¤t); err != nil { + return fmt.Errorf("update AgentHarness status: %w", err) + } + *ah = current + return nil +} + +func effectiveAgentHarnessRuntime(ah *v1alpha2.AgentHarness) v1alpha2.AgentHarnessRuntime { + if ah.Spec.Runtime == "" { + return v1alpha2.AgentHarnessRuntimeOpenshell + } + return ah.Spec.Runtime +} + +func setAgentHarnessCondition(ah *v1alpha2.AgentHarness, t string, s metav1.ConditionStatus, reason, msg string) { + meta.SetStatusCondition(&ah.Status.Conditions, metav1.Condition{ + Type: t, + Status: s, + Reason: reason, + Message: msg, + ObservedGeneration: ah.Generation, + }) +} + +func agentHarnessPrimaryPredicate() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + if e.ObjectOld == nil || e.ObjectNew == nil { + return true + } + if e.ObjectNew.GetGeneration() != e.ObjectOld.GetGeneration() { + return true + } + if !reflect.DeepEqual(e.ObjectNew.GetLabels(), e.ObjectOld.GetLabels()) { + return true + } + return e.ObjectOld.GetDeletionTimestamp().IsZero() && !e.ObjectNew.GetDeletionTimestamp().IsZero() + }, + } +} + +func agentHarnessRuntimePredicate(runtime v1alpha2.AgentHarnessRuntime) predicate.Predicate { + primary := agentHarnessPrimaryPredicate() + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { + return primary.Create(e) && agentHarnessObjectMatchesRuntime(e.Object, runtime) + }, + DeleteFunc: func(e event.DeleteEvent) bool { + return primary.Delete(e) && agentHarnessObjectMatchesRuntime(e.Object, runtime) + }, + UpdateFunc: func(e event.UpdateEvent) bool { + return primary.Update(e) && + (agentHarnessObjectMatchesRuntime(e.ObjectOld, runtime) || agentHarnessObjectMatchesRuntime(e.ObjectNew, runtime)) + }, + } +} + +func agentHarnessObjectMatchesRuntime(obj client.Object, runtime v1alpha2.AgentHarnessRuntime) bool { + ah, ok := obj.(*v1alpha2.AgentHarness) + if !ok || ah == nil { + return false + } + return effectiveAgentHarnessRuntime(ah) == runtime +} diff --git a/go/core/internal/controller/agentharness_substrate_controller.go b/go/core/internal/controller/agentharness_substrate_controller.go new file mode 100644 index 0000000000..3c56e2073c --- /dev/null +++ b/go/core/internal/controller/agentharness_substrate_controller.go @@ -0,0 +1,290 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package controller + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/events" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" +) + +const ( + // substrateDeleteTimeout is the maximum time to wait for substrate cleanup during delete. + substrateDeleteTimeout = 5 * time.Minute +) + +// +kubebuilder:rbac:groups=ate.dev,resources=workerpools,verbs=get;list;watch +// +kubebuilder:rbac:groups=ate.dev,resources=actortemplates,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=ate.dev,resources=actortemplates/status,verbs=get + +// SubstrateAgentHarnessController reconciles AgentHarness resources that use the +// Substrate runtime. +type SubstrateAgentHarnessController struct { + Client client.Client + Recorder events.EventRecorder + OpenClawBackend sandboxbackend.AsyncBackend + NemoClawBackend sandboxbackend.AsyncBackend + SubstrateLifecycle substrate.AgentHarnessLifecycle +} + +func (r *SubstrateAgentHarnessController) backendFor(ah *v1alpha2.AgentHarness) sandboxbackend.AsyncBackend { + switch ah.Spec.Backend { + case v1alpha2.AgentHarnessBackendOpenClaw: + return r.OpenClawBackend + case v1alpha2.AgentHarnessBackendNemoClaw: + return r.NemoClawBackend + default: + return nil + } +} + +func (r *SubstrateAgentHarnessController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx).WithValues("agentHarness", req.NamespacedName) + + var ah v1alpha2.AgentHarness + if err := r.Client.Get(ctx, req.NamespacedName, &ah); err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("get AgentHarness: %w", err) + } + if effectiveAgentHarnessRuntime(&ah) != v1alpha2.AgentHarnessRuntimeSubstrate { + return ctrl.Result{}, nil + } + + if !ah.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, &ah) + } + + if controllerutil.AddFinalizer(&ah, agentHarnessFinalizer) { + if err := r.Client.Update(ctx, &ah); err != nil { + return ctrl.Result{}, fmt.Errorf("add finalizer: %w", err) + } + return ctrl.Result{Requeue: true}, nil + } + + backend := r.backendFor(&ah) + if backend == nil { + return reconcileBackendUnavailable(ctx, r.Client, &ah, v1alpha2.AgentHarnessRuntimeSubstrate) + } + + lifecycleState, err := r.SubstrateLifecycle.EnsureGeneratedTemplate(ctx, &ah) + if err != nil { + log.Error(err, "substrate lifecycle reconciliation failed") + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, + "SubstrateLifecycleFailed", err.Error()) + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "SubstrateLifecycleFailed", "") + if perr := patchAgentHarnessStatus(ctx, r.Client, &ah); perr != nil { + return ctrl.Result{}, perr + } + return ctrl.Result{}, err + } + if lifecycleState.ActorTemplateReady { + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, + metav1.ConditionTrue, "Ready", "ActorTemplate golden snapshot is ready") + } else { + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, + metav1.ConditionFalse, "NotReady", "waiting for ActorTemplate golden snapshot") + } + if err := patchAgentHarnessStatus(ctx, r.Client, &ah); err != nil { + return ctrl.Result{}, err + } + if !lifecycleState.ActorTemplateReady { + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, + "SubstrateLifecyclePending", "waiting for ActorTemplate golden snapshot") + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeActorReady, metav1.ConditionFalse, + "ActorNotCreated", "waiting for ActorTemplate before creating actor") + setAgentHarnessCondition(&ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "ActorTemplateNotReady", "ActorTemplate is not Ready yet") + if err := patchAgentHarnessStatus(ctx, r.Client, &ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + if err := r.Client.Get(ctx, req.NamespacedName, &ah); err != nil { + return ctrl.Result{}, fmt.Errorf("reload AgentHarness after substrate lifecycle reconciliation: %w", err) + } + + return r.reconcileBackend(ctx, req, &ah, backend, log) +} + +func (r *SubstrateAgentHarnessController) reconcileBackend(ctx context.Context, req ctrl.Request, ah *v1alpha2.AgentHarness, backend sandboxbackend.AsyncBackend, log logr.Logger) (ctrl.Result, error) { + res, err := backend.EnsureAgentHarness(ctx, ah) + if err != nil { + log.Error(err, "EnsureAgentHarness failed") + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionFalse, + "EnsureFailed", err.Error()) + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "EnsureFailed", err.Error()) + if perr := patchAgentHarnessStatus(ctx, r.Client, ah); perr != nil { + return ctrl.Result{}, perr + } + return ctrl.Result{}, err + } + + ah.Status.BackendRef = &v1alpha2.AgentHarnessStatusRef{ + Backend: ah.Spec.Backend, + ID: res.Handle.ID, + } + if res.Endpoint != "" { + ah.Status.Connection = &v1alpha2.AgentHarnessConnection{Endpoint: res.Endpoint} + } + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeAccepted, metav1.ConditionTrue, + "AgentHarnessAccepted", "backend accepted sandbox request") + + st, reason, msg := backend.GetStatus(ctx, res.Handle) + pending := postReadyBootstrapPending(ah) + if st == metav1.ConditionTrue && pending { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, st, reason, msg) + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionFalse, + "BootstrapPending", + "waiting for post-ready bootstrap (OnAgentHarnessReady) to finish") + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, metav1.ConditionFalse, + "BootstrapPending", + "gateway sandbox is ready; waiting for post-ready bootstrap (OnAgentHarnessReady) to finish") + } else { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, st, reason, msg) + if pending { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionFalse, + "ActorNotReady", "waiting for actor before post-ready bootstrap") + } + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, st, reason, msg) + } + ah.Status.ObservedGeneration = ah.Generation + + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + + if st != metav1.ConditionTrue { + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + if pending { + if err := maybePostReadyBootstrap(ctx, client.ObjectKeyFromObject(ah), ah, res.Handle, backend); err != nil { + log.Error(err, "post-ready sandbox bootstrap failed") + return ctrl.Result{}, err + } + var latest v1alpha2.AgentHarness + if err := r.Client.Get(ctx, req.NamespacedName, &latest); err != nil { + return ctrl.Result{}, fmt.Errorf("get AgentHarness after bootstrap: %w", err) + } + st2, reason2, msg2 := backend.GetStatus(ctx, res.Handle) + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeActorReady, st2, reason2, msg2) + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeBootstrapReady, metav1.ConditionTrue, + "BootstrapComplete", "post-ready bootstrap completed") + setAgentHarnessCondition(&latest, v1alpha2.AgentHarnessConditionTypeReady, st2, reason2, msg2) + latest.Status.ObservedGeneration = latest.Generation + if err := r.Client.Status().Update(ctx, &latest); err != nil { + return ctrl.Result{}, fmt.Errorf("update AgentHarness status after bootstrap: %w", err) + } + } + return ctrl.Result{}, nil +} + +func (r *SubstrateAgentHarnessController) reconcileDelete(ctx context.Context, ah *v1alpha2.AgentHarness) (ctrl.Result, error) { + if !controllerutil.ContainsFinalizer(ah, agentHarnessFinalizer) { + return ctrl.Result{}, nil + } + + if substrateDeleteTimedOut(ah) { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeReady, + metav1.ConditionFalse, "DeleteTimeout", "substrate cleanup exceeded timeout") + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, fmt.Errorf("substrate cleanup timed out for AgentHarness %s", ah.Name) + } + + if ah.Status.BackendRef != nil { + actorID := ah.Status.BackendRef.ID + if actorID != "" { + backend := r.backendFor(ah) + actorDone := true + var err error + if backend != nil { + actorDone, err = backend.DeleteAgentHarness(ctx, sandboxbackend.Handle{ID: actorID}) + } + if err != nil { + if r.Recorder != nil { + r.Recorder.Eventf(ah, nil, "Warning", "AgentHarnessDeleteFailed", "DeleteAgentHarness", "%s", err.Error()) + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, err + } + if !actorDone { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorReady, + metav1.ConditionFalse, "ActorDeleting", fmt.Sprintf("waiting for substrate actor %q deletion", actorID)) + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + } + ah.Status.BackendRef = nil + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + } + + complete, err := r.SubstrateLifecycle.CleanupGeneratedTemplate(ctx, ah) + if err != nil { + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, fmt.Errorf("cleanup substrate lifecycle: %w", err) + } + if !complete { + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, + metav1.ConditionFalse, "GoldenActorDeleting", "waiting for generated ActorTemplate golden actor deletion") + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil + } + setAgentHarnessCondition(ah, v1alpha2.AgentHarnessConditionTypeActorTemplateReady, + metav1.ConditionFalse, "Deleting", "generated ActorTemplate will be garbage collected") + if err := patchAgentHarnessStatus(ctx, r.Client, ah); err != nil { + return ctrl.Result{}, err + } + + controllerutil.RemoveFinalizer(ah, agentHarnessFinalizer) + if err := r.Client.Update(ctx, ah); err != nil { + return ctrl.Result{}, fmt.Errorf("remove finalizer: %w", err) + } + return ctrl.Result{}, nil +} + +func substrateDeleteTimedOut(ah *v1alpha2.AgentHarness) bool { + if ah == nil || ah.DeletionTimestamp.IsZero() { + return false + } + return time.Since(ah.DeletionTimestamp.Time) > substrateDeleteTimeout +} + +// SetupWithManager registers the Substrate AgentHarness controller with the manager. +func (r *SubstrateAgentHarnessController) SetupWithManager(mgr ctrl.Manager) error { + b := ctrl.NewControllerManagedBy(mgr). + WithOptions(controller.Options{NeedLeaderElection: new(true)}). + For(&v1alpha2.AgentHarness{}, builder.WithPredicates(agentHarnessRuntimePredicate(v1alpha2.AgentHarnessRuntimeSubstrate))) + b = r.substrateWatches(b) + return b.Named("agentharness-substrate").Complete(r) +} diff --git a/go/core/internal/controller/agentharness_substrate_watches.go b/go/core/internal/controller/agentharness_substrate_watches.go new file mode 100644 index 0000000000..9b3153f92b --- /dev/null +++ b/go/core/internal/controller/agentharness_substrate_watches.go @@ -0,0 +1,38 @@ +package controller + +import ( + "context" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" +) + +func (r *SubstrateAgentHarnessController) enqueueAgentHarnessForSubstrateResource(ctx context.Context, obj client.Object) []reconcile.Request { + harnessName := substrate.HarnessNameFromLabels(obj.GetLabels()) + if harnessName == "" { + return nil + } + return []reconcile.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: obj.GetNamespace(), + Name: harnessName, + }, + }} +} + +func (r *SubstrateAgentHarnessController) substrateWatches(b *builder.Builder) *builder.Builder { + if r == nil { + return b + } + return b. + Watches( + &atev1alpha1.ActorTemplate{}, + handler.EnqueueRequestsFromMapFunc(r.enqueueAgentHarnessForSubstrateResource), + ) +} diff --git a/go/core/internal/httpserver/handlers/agentharness_gateway.go b/go/core/internal/httpserver/handlers/agentharness_gateway.go new file mode 100644 index 0000000000..2762605c37 --- /dev/null +++ b/go/core/internal/httpserver/handlers/agentharness_gateway.go @@ -0,0 +1,239 @@ +package handlers + +import ( + "context" + "fmt" + "net/http" + "net/http/httputil" + "net/url" + "strings" + "time" + + "github.com/gorilla/mux" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + // OpenClaw 2026.3.28+ returns 403 without operator scopes on HTTP/WS when only Bearer token is sent. + openclawDefaultOperatorScopes = "operator.admin" + // Origin OpenClaw accepts by default for bind=lan port=80 (localhost/127.0.0.1 on gateway port). + openclawLoopbackOrigin = "http://127.0.0.1:80" +) + +// AgentHarnessGatewayConfig configures Substrate harness HTTP/WebSocket proxy. +// Traffic is proxied through atenet-router (Envoy) using actor Host-based routing. +type AgentHarnessGatewayConfig struct { + AtenetRouterURL string +} + +// HandleAgentHarnessGateway proxies browser traffic to the actor OpenClaw gateway via atenet-router. +func (h *Handlers) HandleAgentHarnessGateway(w ErrorResponseWriter, r *http.Request) { + log := ctrllog.FromContext(r.Context()).WithName("agentharness-gateway") + if h.AgentHarnessGateway == nil { + http.Error(w, "substrate gateway proxy is not configured", http.StatusServiceUnavailable) + return + } + + vars := mux.Vars(r) + namespace := strings.TrimSpace(vars["namespace"]) + name := strings.TrimSpace(vars["name"]) + if namespace == "" || name == "" { + http.Error(w, "namespace and name are required", http.StatusBadRequest) + return + } + + var ah v1alpha2.AgentHarness + if err := h.KubeClient.Get(r.Context(), types.NamespacedName{Namespace: namespace, Name: name}, &ah); err != nil { + if apierrors.IsNotFound(err) { + http.Error(w, "AgentHarness not found", http.StatusNotFound) + return + } + log.Error(err, "get AgentHarness") + http.Error(w, "failed to load AgentHarness", http.StatusInternalServerError) + return + } + + runtime := ah.Spec.Runtime + if runtime == "" { + runtime = v1alpha2.AgentHarnessRuntimeOpenshell + } + if runtime != v1alpha2.AgentHarnessRuntimeSubstrate { + http.Error(w, "gateway proxy is only available for runtime=substrate", http.StatusBadRequest) + return + } + if ah.Status.BackendRef == nil || ah.Status.BackendRef.ID == "" { + http.Error(w, "harness has no substrate actor yet", http.StatusServiceUnavailable) + return + } + + token, err := h.resolveHarnessGatewayToken(r.Context(), &ah) + if err != nil { + log.Error(err, "resolve gateway token") + http.Error(w, "gateway token not configured", http.StatusInternalServerError) + return + } + + target, upstreamHost, err := h.resolveSubstrateGatewayTarget(r.Context(), &ah) + if err != nil { + log.Info("resolve substrate gateway target failed", "error", err) + http.Error(w, err.Error(), http.StatusServiceUnavailable) + return + } + + publicPrefix := agentHarnessGatewayPublicPrefix(namespace, name) + + _, redirectTo, ok := resolveGatewayUpstreamPath(r.URL.Path, namespace, name, isWebSocketUpgrade(r)) + if !ok { + http.NotFound(w, r) + return + } + // Browsers do not complete WebSocket handshakes through 30x redirects. + if redirectTo != "" && !isWebSocketUpgrade(r) { + dest := redirectTo + if r.URL.RawQuery != "" { + dest += "?" + r.URL.RawQuery + } + http.Redirect(w, r, dest, http.StatusPermanentRedirect) + return + } + + proxy := newAgentHarnessGatewayProxy(target, upstreamHost, token, publicPrefix, namespace, name, log) + proxy.ServeHTTP(w, r) +} + +func (h *Handlers) resolveSubstrateGatewayTarget(ctx context.Context, ah *v1alpha2.AgentHarness) (*url.URL, string, error) { + cfg := h.AgentHarnessGateway + if cfg == nil { + return nil, "", fmt.Errorf("substrate gateway is not configured") + } + + actorID := strings.TrimSpace(ah.Status.BackendRef.ID) + target, host, err := substrate.GatewayRouterTarget(cfg.AtenetRouterURL, actorID) + if err != nil { + return nil, "", fmt.Errorf("substrate actor %q: %w", actorID, err) + } + ctrllog.FromContext(ctx).WithName("agentharness-gateway").Info( + "proxying via atenet-router", + "actor", actorID, + "router", target.String(), + "host", host, + ) + return target, host, nil +} + +func agentHarnessHarnessBase(namespace, name string) string { + return "/api/agentharnesses/" + namespace + "/" + name +} + +func agentHarnessGatewayPublicPrefix(namespace, name string) string { + return agentHarnessHarnessBase(namespace, name) + "/gateway/" +} + +// resolveGatewayUpstreamPath maps the public URL to the upstream path on the actor. +// redirectTo is set when the browser should use a trailing slash under /gateway/. +// OpenClaw is configured with the same controlUi.basePath, so the proxy preserves +// the public gateway base path when forwarding to the actor. +func resolveGatewayUpstreamPath(requestPath, namespace, name string, wsUpgrade bool) (upstreamPath, redirectTo string, ok bool) { + base := agentHarnessHarnessBase(namespace, name) + if !strings.HasPrefix(requestPath, base) { + return "", "", false + } + rel := strings.TrimPrefix(requestPath, base) + if rel == "" { + return "", agentHarnessGatewayPublicPrefix(namespace, name), true + } + + switch { + case rel == "/gateway": + upstream := agentHarnessGatewayPublicPrefix(namespace, name) + if wsUpgrade { + return upstream, "", true + } + return upstream, upstream, true + case strings.HasPrefix(rel, "/gateway/"): + return requestPath, "", true + default: + return "", "", false + } +} + +// normalizeOpenClawBrowserOrigin rewrites Origin/Referer so OpenClaw accepts WS/API from kagent-ui +// (e.g. http://localhost:8001) while the gateway listens on the actor pod :80. +func normalizeOpenClawBrowserOrigin(req *http.Request) { + if req == nil { + return + } + if req.Header.Get("Origin") != "" { + req.Header.Set("Origin", openclawLoopbackOrigin) + } + if req.Header.Get("Referer") != "" { + req.Header.Set("Referer", openclawLoopbackOrigin+"/") + } +} + +func isWebSocketUpgrade(r *http.Request) bool { + if r == nil { + return false + } + return strings.EqualFold(r.Header.Get("Upgrade"), "websocket") && + strings.Contains(strings.ToLower(r.Header.Get("Connection")), "upgrade") +} + +func newAgentHarnessGatewayProxy(target *url.URL, upstreamHost, token, publicPrefix, namespace, name string, log interface { + Error(error, string, ...any) +}) *httputil.ReverseProxy { + proxy := &httputil.ReverseProxy{ + FlushInterval: -1, + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + ResponseHeaderTimeout: 0, + IdleConnTimeout: 90 * time.Second, + }, + Rewrite: func(pr *httputil.ProxyRequest) { + pr.SetURL(target) + pr.Out.Host = upstreamHost + if token != "" { + pr.Out.Header.Set("Authorization", "Bearer "+token) + } + pr.Out.Header.Set("x-openclaw-scopes", openclawDefaultOperatorScopes) + normalizeOpenClawBrowserOrigin(pr.Out) + subPath, _, pathOK := resolveGatewayUpstreamPath(pr.In.URL.Path, namespace, name, isWebSocketUpgrade(pr.In)) + if !pathOK { + subPath = "/" + } + if subPath == "" { + subPath = "/" + } else if !strings.HasPrefix(subPath, "/") { + subPath = "/" + subPath + } + pr.Out.URL.Path = subPath + pr.Out.URL.RawPath = subPath + }, + } + proxy.ModifyResponse = func(resp *http.Response) error { + if resp.StatusCode == http.StatusSwitchingProtocols { + return nil + } + + if loc := resp.Header.Get("Location"); loc != "" { + publicBase := strings.TrimSuffix(publicPrefix, "/") + if strings.HasPrefix(loc, "/") && !strings.HasPrefix(loc, publicBase) { + resp.Header.Set("Location", publicBase+loc) + } + } + return nil + } + proxy.ErrorHandler = func(rw http.ResponseWriter, req *http.Request, proxyErr error) { + log.Error(proxyErr, "gateway proxy error", "host", upstreamHost) + http.Error(rw, "gateway proxy error", http.StatusBadGateway) + } + return proxy +} + +func (h *Handlers) resolveHarnessGatewayToken(ctx context.Context, ah *v1alpha2.AgentHarness) (string, error) { + return substrate.ResolveGatewayToken(ctx, h.KubeClient, ah) +} diff --git a/go/core/internal/httpserver/handlers/agentharness_gateway_path_test.go b/go/core/internal/httpserver/handlers/agentharness_gateway_path_test.go new file mode 100644 index 0000000000..433bcd5205 --- /dev/null +++ b/go/core/internal/httpserver/handlers/agentharness_gateway_path_test.go @@ -0,0 +1,89 @@ +package handlers + +import ( + "net/http" + "testing" +) + +func TestResolveGatewayUpstreamPath(t *testing.T) { + t.Parallel() + ns, name := "kagent", "my-claw" + public := agentHarnessGatewayPublicPrefix(ns, name) + + tests := []struct { + name string + path string + wsUpgrade bool + wantUp string + wantRedir string + wantOK bool + }{ + { + name: "harness root redirects", + path: "/api/agentharnesses/kagent/my-claw", + wantRedir: public, + wantOK: true, + }, + { + name: "gateway without slash redirects", + path: "/api/agentharnesses/kagent/my-claw/gateway", + wantUp: public, + wantRedir: public, + wantOK: true, + }, + { + name: "gateway without slash websocket", + path: "/api/agentharnesses/kagent/my-claw/gateway", + wsUpgrade: true, + wantUp: public, + wantOK: true, + }, + { + name: "gateway index", + path: "/api/agentharnesses/kagent/my-claw/gateway/", + wantUp: public, + wantOK: true, + }, + { + name: "gateway asset", + path: "/api/agentharnesses/kagent/my-claw/gateway/assets/foo.js", + wantUp: "/api/agentharnesses/kagent/my-claw/gateway/assets/foo.js", + wantOK: true, + }, + { + name: "unknown path", + path: "/api/agentharnesses/kagent/my-claw/api/v1/foo", + wantOK: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + up, redir, ok := resolveGatewayUpstreamPath(tt.path, ns, name, tt.wsUpgrade) + if ok != tt.wantOK { + t.Fatalf("ok = %v, want %v", ok, tt.wantOK) + } + if up != tt.wantUp { + t.Fatalf("upstream = %q, want %q", up, tt.wantUp) + } + if redir != tt.wantRedir { + t.Fatalf("redirect = %q, want %q", redir, tt.wantRedir) + } + }) + } +} + +func TestIsWebSocketUpgrade(t *testing.T) { + t.Parallel() + req, _ := http.NewRequest(http.MethodGet, "http://example.com/api/x/gateway", nil) + req.Header.Set("Connection", "Upgrade") + req.Header.Set("Upgrade", "websocket") + if !isWebSocketUpgrade(req) { + t.Fatal("expected websocket upgrade") + } + req2, _ := http.NewRequest(http.MethodGet, "http://example.com/", nil) + if isWebSocketUpgrade(req2) { + t.Fatal("expected not websocket upgrade") + } +} diff --git a/go/core/internal/httpserver/handlers/agentharness_gateway_test.go b/go/core/internal/httpserver/handlers/agentharness_gateway_test.go new file mode 100644 index 0000000000..943aff65ce --- /dev/null +++ b/go/core/internal/httpserver/handlers/agentharness_gateway_test.go @@ -0,0 +1,117 @@ +package handlers + +import ( + "io" + "net/http" + "net/http/httptest" + "net/http/httputil" + "net/url" + "strings" + "testing" + + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" +) + +func TestGatewayProxyForwardsToAtenetRouterWithActorHost(t *testing.T) { + t.Parallel() + const actorHost = "ahr-kagent-my-claw.actors.resources.substrate.ate.dev" + const token = "some-token" + ns, name := "kagent", "my-claw" + publicPrefix := agentHarnessGatewayPublicPrefix(ns, name) + + var gotHost, gotAuth, gotScopes, gotPath string + upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotHost = r.Host + gotAuth = r.Header.Get("Authorization") + gotScopes = r.Header.Get("x-openclaw-scopes") + gotPath = r.URL.Path + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte("ok")) + })) + defer upstream.Close() + + target, err := url.Parse(upstream.URL) + if err != nil { + t.Fatal(err) + } + + proxy := newAgentHarnessGatewayProxy(target, actorHost, token, publicPrefix, ns, name, testLog{t}) + req := httptest.NewRequest(http.MethodGet, publicPrefix, nil) + rec := httptest.NewRecorder() + proxy.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, body = %s", rec.Code, rec.Body.String()) + } + if gotHost != actorHost { + t.Fatalf("upstream Host = %q, want %q", gotHost, actorHost) + } + if gotAuth != "Bearer "+token { + t.Fatalf("Authorization = %q", gotAuth) + } + if gotScopes != openclawDefaultOperatorScopes { + t.Fatalf("x-openclaw-scopes = %q", gotScopes) + } + if gotPath != publicPrefix { + t.Fatalf("upstream path = %q, want %q", gotPath, publicPrefix) + } + body, _ := io.ReadAll(rec.Body) + if !strings.Contains(string(body), "ok") { + t.Fatalf("response body missing upstream content: %s", body) + } +} + +func TestGatewayProxyRewriteTargetsAtenetRouterHostOnWebSocketPath(t *testing.T) { + t.Parallel() + const actorHost = "ahr-kagent-my-claw.actors.resources.substrate.ate.dev" + ns, name := "kagent", "my-claw" + publicPrefix := agentHarnessGatewayPublicPrefix(ns, name) + + target, host, err := substrate.GatewayRouterTarget(substrate.DefaultAtenetRouterURL, "ahr-kagent-my-claw") + if err != nil { + t.Fatal(err) + } + if host != actorHost { + t.Fatalf("host = %q, want %q", host, actorHost) + } + proxy := newAgentHarnessGatewayProxy(target, host, "tok", publicPrefix, ns, name, testLog{t}) + req := httptest.NewRequest(http.MethodGet, strings.TrimSuffix(publicPrefix, "/"), nil) + req.Header.Set("Connection", "Upgrade") + req.Header.Set("Upgrade", "websocket") + req.Header.Set("Origin", "http://localhost:8001") + req.Header.Set("Referer", "http://localhost:8001/api/agentharnesses/kagent/my-claw/gateway/") + outReq := req.Clone(req.Context()) + + proxy.Rewrite(&httputil.ProxyRequest{In: req, Out: outReq}) + + if outReq.Host != actorHost { + t.Fatalf("Host = %q, want actor host", outReq.Host) + } + if outReq.URL.Host != target.Host { + t.Fatalf("URL.Host = %q, want router %q", outReq.URL.Host, target.Host) + } + if outReq.URL.Path != publicPrefix { + t.Fatalf("URL.Path = %q, want %q", outReq.URL.Path, publicPrefix) + } + if outReq.Header.Get("Authorization") != "Bearer tok" { + t.Fatalf("missing Authorization") + } + if outReq.Header.Get("x-openclaw-scopes") != openclawDefaultOperatorScopes { + t.Fatalf("missing scopes header") + } + if outReq.Header.Get("Origin") != openclawLoopbackOrigin { + t.Fatalf("Origin = %q, want %q", outReq.Header.Get("Origin"), openclawLoopbackOrigin) + } + if outReq.Header.Get("Referer") != openclawLoopbackOrigin+"/" { + t.Fatalf("Referer = %q", outReq.Header.Get("Referer")) + } +} + +type testLog struct { + t *testing.T +} + +func (l testLog) Error(err error, msg string, _ ...any) { + l.t.Helper() + l.t.Logf("%s: %v", msg, err) +} diff --git a/go/core/internal/httpserver/handlers/agents.go b/go/core/internal/httpserver/handlers/agents.go index 59c68ce27f..96249cf9d3 100644 --- a/go/core/internal/httpserver/handlers/agents.go +++ b/go/core/internal/httpserver/handlers/agents.go @@ -160,19 +160,13 @@ func (h *AgentsHandler) openshellAgentHarnessAgentResponse(ctx context.Context, } } + runtime := sb.Spec.Runtime + if runtime == "" { + runtime = v1alpha2.AgentHarnessRuntimeOpenshell + } + gatewayName := fmt.Sprintf("%s-%s", sb.Namespace, sb.Name) desc := strings.TrimSpace(sb.Spec.Description) - entry := &api.OpenshellAgentHarnessListEntry{ - Backend: sb.Spec.Backend, - GatewaySandboxName: gatewayName, - ModelConfigRef: sb.Spec.ModelConfigRef, - } - if sb.Status.BackendRef != nil { - entry.BackendRefID = sb.Status.BackendRef.ID - } - if sb.Status.Connection != nil { - entry.Endpoint = sb.Status.Connection.Endpoint - } resp := api.AgentResponse{ ID: id, @@ -184,9 +178,39 @@ func (h *AgentsHandler) openshellAgentHarnessAgentResponse(ctx context.Context, Description: desc, }, }, - DeploymentReady: ready, - Accepted: accepted, - OpenshellAgentHarness: entry, + DeploymentReady: ready, + Accepted: accepted, + } + + switch runtime { + case v1alpha2.AgentHarnessRuntimeSubstrate: + subEntry := &api.SubstrateAgentHarnessListEntry{ + Backend: sb.Spec.Backend, + Runtime: runtime, + ModelConfigRef: sb.Spec.ModelConfigRef, + GatewayUIPath: fmt.Sprintf("/api/agentharnesses/%s/%s/gateway/", sb.Namespace, sb.Name), + } + if sb.Status.BackendRef != nil { + subEntry.BackendRefID = sb.Status.BackendRef.ID + subEntry.ActorID = sb.Status.BackendRef.ID + } + if sb.Status.Connection != nil { + subEntry.Endpoint = sb.Status.Connection.Endpoint + } + resp.SubstrateAgentHarness = subEntry + default: + entry := &api.OpenshellAgentHarnessListEntry{ + Backend: sb.Spec.Backend, + GatewaySandboxName: gatewayName, + ModelConfigRef: sb.Spec.ModelConfigRef, + } + if sb.Status.BackendRef != nil { + entry.BackendRefID = sb.Status.BackendRef.ID + } + if sb.Status.Connection != nil { + entry.Endpoint = sb.Status.Connection.Endpoint + } + resp.OpenshellAgentHarness = entry } mcRef := strings.TrimSpace(sb.Spec.ModelConfigRef) diff --git a/go/core/internal/httpserver/handlers/handlers.go b/go/core/internal/httpserver/handlers/handlers.go index 13a66adeb9..a0fab3a3a7 100644 --- a/go/core/internal/httpserver/handlers/handlers.go +++ b/go/core/internal/httpserver/handlers/handlers.go @@ -8,10 +8,14 @@ import ( "github.com/kagent-dev/kagent/go/core/internal/controller/reconciler" "github.com/kagent-dev/kagent/go/core/pkg/auth" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" ) // Handlers holds all the HTTP handler components type Handlers struct { + KubeClient client.Client + AgentHarnessGateway *AgentHarnessGatewayConfig + Health *HealthHandler ModelConfig *ModelConfigHandler Model *ModelHandler @@ -29,6 +33,7 @@ type Handlers struct { Checkpoints *CheckpointsHandler CrewAI *CrewAIHandler CurrentUser *CurrentUserHandler + Substrate *SubstrateHandler } // Base holds common dependencies for all handlers @@ -43,7 +48,18 @@ type Base struct { } // NewHandlers creates a new Handlers instance with all handler components. -func NewHandlers(kubeClient client.Client, defaultModelConfig types.NamespacedName, dbService database.Client, watchedNamespaces []string, authorizer auth.Authorizer, proxyURL string, rcnclr reconciler.KagentReconciler, sandboxBackend sandboxbackend.Backend) *Handlers { +func NewHandlers( + kubeClient client.Client, + defaultModelConfig types.NamespacedName, + dbService database.Client, + watchedNamespaces []string, + authorizer auth.Authorizer, + proxyURL string, + rcnclr reconciler.KagentReconciler, + sandboxBackend sandboxbackend.Backend, + agentHarnessGateway *AgentHarnessGatewayConfig, + substrateAteClient *substrate.Client, +) *Handlers { base := &Base{ KubeClient: kubeClient, DefaultModelConfig: defaultModelConfig, @@ -55,6 +71,8 @@ func NewHandlers(kubeClient client.Client, defaultModelConfig types.NamespacedNa } return &Handlers{ + KubeClient: kubeClient, + AgentHarnessGateway: agentHarnessGateway, Health: NewHealthHandler(), ModelConfig: NewModelConfigHandler(base), Model: NewModelHandler(base), @@ -72,5 +90,6 @@ func NewHandlers(kubeClient client.Client, defaultModelConfig types.NamespacedNa Checkpoints: NewCheckpointsHandler(base), CrewAI: NewCrewAIHandler(base), CurrentUser: NewCurrentUserHandler(), + Substrate: NewSubstrateHandler(base, substrateAteClient), } } diff --git a/go/core/internal/httpserver/handlers/substrate.go b/go/core/internal/httpserver/handlers/substrate.go new file mode 100644 index 0000000000..47c87d4351 --- /dev/null +++ b/go/core/internal/httpserver/handlers/substrate.go @@ -0,0 +1,249 @@ +package handlers + +import ( + "context" + "fmt" + "net/http" + "slices" + "strings" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/agent-substrate/substrate/proto/ateapipb" + api "github.com/kagent-dev/kagent/go/api/httpapi" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/errors" + "github.com/kagent-dev/kagent/go/core/pkg/auth" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" + utilvalidation "k8s.io/apimachinery/pkg/util/validation" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" +) + +// SubstrateHandler exposes Agent Substrate inventory for the UI. +type SubstrateHandler struct { + *Base + AteClient *substrate.Client +} + +// NewSubstrateHandler creates a SubstrateHandler. +func NewSubstrateHandler(base *Base, ateClient *substrate.Client) *SubstrateHandler { + return &SubstrateHandler{Base: base, AteClient: ateClient} +} + +// HandleGetSubstrateStatus handles GET /api/substrate/status?namespace=… +func (h *SubstrateHandler) HandleGetSubstrateStatus(w ErrorResponseWriter, r *http.Request) { + log := ctrllog.FromContext(r.Context()).WithName("substrate-handler").WithValues("operation", "status") + if err := Check(h.Authorizer, r, auth.Resource{Type: "Agent"}); err != nil { + w.RespondWithError(err) + return + } + + namespace := strings.TrimSpace(r.URL.Query().Get("namespace")) + if namespace != "" { + if errs := utilvalidation.IsDNS1123Label(namespace); len(errs) > 0 { + w.RespondWithError(errors.NewBadRequestError( + fmt.Sprintf("invalid namespace %q: %s", namespace, strings.Join(errs, ", ")), + nil, + )) + return + } + } + + namespaces, err := h.substrateNamespaces(namespace) + if err != nil { + w.RespondWithError(err) + return + } + + resp := api.SubstrateStatusResponse{ + Enabled: h.AteClient != nil, + WorkerPools: []api.SubstrateWorkerPoolEntry{}, + ActorTemplates: []api.SubstrateActorTemplateEntry{}, + Actors: []api.SubstrateActorEntry{}, + Workers: []api.SubstrateWorkerEntry{}, + } + + for _, ns := range namespaces { + wpEntries, tmplEntries, err := h.listSubstrateCRs(r.Context(), ns) + if err != nil { + log.Error(err, "list substrate CRs", "namespace", ns) + w.RespondWithError(errors.NewInternalServerError("Failed to list substrate resources from Kubernetes", err)) + return + } + resp.WorkerPools = append(resp.WorkerPools, wpEntries...) + resp.ActorTemplates = append(resp.ActorTemplates, tmplEntries...) + } + + if h.AteClient != nil { + actors, workers, ateErr := h.listAteAPIState(r.Context(), namespaces) + resp.Actors = actors + resp.Workers = workers + if ateErr != nil { + resp.AteAPIError = ateErr.Error() + log.Error(ateErr, "list ate-api state") + } + } + + slices.SortStableFunc(resp.WorkerPools, compareWorkerPool) + slices.SortStableFunc(resp.ActorTemplates, compareActorTemplate) + slices.SortStableFunc(resp.Actors, compareActor) + slices.SortStableFunc(resp.Workers, compareWorker) + + data := api.NewResponse(resp, "Successfully listed substrate status", false) + RespondWithJSON(w, http.StatusOK, data) +} + +func (h *SubstrateHandler) substrateNamespaces(requested string) ([]string, error) { + if requested != "" { + return []string{requested}, nil + } + if len(h.WatchedNamespaces) > 0 { + return slices.Clone(h.WatchedNamespaces), nil + } + return []string{""}, nil +} + +func (h *SubstrateHandler) listSubstrateCRs(ctx context.Context, namespace string) ([]api.SubstrateWorkerPoolEntry, []api.SubstrateActorTemplateEntry, error) { + var listOpts []client.ListOption + if namespace != "" { + listOpts = append(listOpts, client.InNamespace(namespace)) + } + + wpList := &atev1alpha1.WorkerPoolList{} + if err := h.KubeClient.List(ctx, wpList, listOpts...); err != nil { + return nil, nil, err + } + tmplList := &atev1alpha1.ActorTemplateList{} + if err := h.KubeClient.List(ctx, tmplList, listOpts...); err != nil { + return nil, nil, err + } + + workerPools := make([]api.SubstrateWorkerPoolEntry, 0, len(wpList.Items)) + for i := range wpList.Items { + wp := &wpList.Items[i] + workerPools = append(workerPools, api.SubstrateWorkerPoolEntry{ + Namespace: wp.Namespace, + Name: wp.Name, + Replicas: wp.Spec.Replicas, + AteomImage: wp.Spec.AteomImage, + }) + } + + templates := make([]api.SubstrateActorTemplateEntry, 0, len(tmplList.Items)) + for i := range tmplList.Items { + tmpl := &tmplList.Items[i] + entry := api.SubstrateActorTemplateEntry{ + Namespace: tmpl.Namespace, + Name: tmpl.Name, + Phase: string(tmpl.Status.Phase), + GoldenActorID: tmpl.Status.GoldenActorID, + GoldenSnapshot: tmpl.Status.GoldenSnapshot, + ManagedByKagent: tmpl.Labels["app.kubernetes.io/managed-by"] == "kagent", + } + if harness := strings.TrimSpace(tmpl.Labels[substrate.HarnessLabelKey]); harness != "" { + entry.HarnessName = harness + } + if ref := tmpl.Spec.WorkerPoolRef; ref.Name != "" { + wpNS := ref.Namespace + if wpNS == "" { + wpNS = tmpl.Namespace + } + entry.WorkerPoolRef = wpNS + "/" + ref.Name + } + templates = append(templates, entry) + } + + return workerPools, templates, nil +} + +func (h *SubstrateHandler) listAteAPIState(ctx context.Context, namespaces []string) ([]api.SubstrateActorEntry, []api.SubstrateWorkerEntry, error) { + allowAll := len(namespaces) == 1 && namespaces[0] == "" + allowed := make(map[string]struct{}, len(namespaces)) + for _, ns := range namespaces { + if ns != "" { + allowed[ns] = struct{}{} + } + } + + actorPB, err := h.AteClient.ListActors(ctx) + if err != nil { + return nil, nil, err + } + workerPB, err := h.AteClient.ListWorkers(ctx) + if err != nil { + return nil, nil, err + } + + actors := make([]api.SubstrateActorEntry, 0, len(actorPB)) + for _, a := range actorPB { + if a == nil { + continue + } + ns := strings.TrimSpace(a.GetActorTemplateNamespace()) + if !allowAll && ns != "" { + if _, ok := allowed[ns]; !ok { + continue + } + } + actors = append(actors, actorEntryFromPB(a)) + } + + workers := make([]api.SubstrateWorkerEntry, 0, len(workerPB)) + for _, w := range workerPB { + if w == nil { + continue + } + ns := strings.TrimSpace(w.GetWorkerNamespace()) + if !allowAll && ns != "" { + if _, ok := allowed[ns]; !ok { + continue + } + } + workers = append(workers, workerEntryFromPB(w)) + } + + return actors, workers, nil +} + +func actorEntryFromPB(a *ateapipb.Actor) api.SubstrateActorEntry { + return api.SubstrateActorEntry{ + ActorID: a.GetActorId(), + Status: substrate.ActorStatusLabel(a.GetStatus()), + ActorTemplateNamespace: a.GetActorTemplateNamespace(), + ActorTemplateName: a.GetActorTemplateName(), + AteomPodNamespace: a.GetAteomPodNamespace(), + AteomPodName: a.GetAteomPodName(), + AteomPodIP: a.GetAteomPodIp(), + LastSnapshot: a.GetLastSnapshot(), + InProgressSnapshot: a.GetInProgressSnapshot(), + Version: a.GetVersion(), + } +} + +func workerEntryFromPB(w *ateapipb.Worker) api.SubstrateWorkerEntry { + return api.SubstrateWorkerEntry{ + WorkerNamespace: w.GetWorkerNamespace(), + WorkerPool: w.GetWorkerPool(), + WorkerPod: w.GetWorkerPod(), + ActorNamespace: w.GetActorNamespace(), + ActorTemplate: w.GetActorTemplate(), + ActorID: w.GetActorId(), + IP: w.GetIp(), + Version: w.GetVersion(), + } +} + +func compareWorkerPool(a, b api.SubstrateWorkerPoolEntry) int { + return strings.Compare(a.Namespace+"/"+a.Name, b.Namespace+"/"+b.Name) +} + +func compareActorTemplate(a, b api.SubstrateActorTemplateEntry) int { + return strings.Compare(a.Namespace+"/"+a.Name, b.Namespace+"/"+b.Name) +} + +func compareActor(a, b api.SubstrateActorEntry) int { + return strings.Compare(a.ActorID, b.ActorID) +} + +func compareWorker(a, b api.SubstrateWorkerEntry) int { + return strings.Compare(a.WorkerNamespace+"/"+a.WorkerPool+"/"+a.WorkerPod, b.WorkerNamespace+"/"+b.WorkerPool+"/"+b.WorkerPod) +} diff --git a/go/core/internal/httpserver/handlers/substrate_test.go b/go/core/internal/httpserver/handlers/substrate_test.go new file mode 100644 index 0000000000..6f34f3fe39 --- /dev/null +++ b/go/core/internal/httpserver/handlers/substrate_test.go @@ -0,0 +1,104 @@ +package handlers_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/agent-substrate/substrate/proto/ateapipb" + api "github.com/kagent-dev/kagent/go/api/httpapi" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/auth" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/handlers" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type stubAteControl struct { + ateapipb.ControlClient + actors []*ateapipb.Actor + workers []*ateapipb.Worker +} + +func (s *stubAteControl) ListActors(context.Context, *ateapipb.ListActorsRequest, ...grpc.CallOption) (*ateapipb.ListActorsResponse, error) { + return &ateapipb.ListActorsResponse{Actors: s.actors}, nil +} + +func (s *stubAteControl) ListWorkers(context.Context, *ateapipb.ListWorkersRequest, ...grpc.CallOption) (*ateapipb.ListWorkersResponse, error) { + return &ateapipb.ListWorkersResponse{Workers: s.workers}, nil +} + +func TestHandleGetSubstrateStatus(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(atev1alpha1.AddToScheme(scheme)) + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects( + &atev1alpha1.WorkerPool{ + ObjectMeta: metav1.ObjectMeta{Name: "default-wp", Namespace: "kagent"}, + Spec: atev1alpha1.WorkerPoolSpec{Replicas: 2, AteomImage: "localhost:5001/ateom:latest"}, + }, + &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-claw", + Namespace: "kagent", + Labels: map[string]string{ + "app.kubernetes.io/managed-by": "kagent", + substrate.HarnessLabelKey: "my-claw", + }, + }, + Spec: atev1alpha1.ActorTemplateSpec{ + WorkerPoolRef: corev1.ObjectReference{Name: "default-wp", Namespace: "kagent"}, + }, + Status: atev1alpha1.ActorTemplateStatus{Phase: atev1alpha1.PhaseReady, GoldenActorID: "golden-1"}, + }, + ).Build() + + ate := &substrate.Client{ControlClient: &stubAteControl{ + actors: []*ateapipb.Actor{{ + ActorId: "ahr-kagent-my-claw", + Status: ateapipb.Actor_STATUS_RUNNING, + ActorTemplateNamespace: "kagent", + ActorTemplateName: "my-claw", + }}, + workers: []*ateapipb.Worker{{ + WorkerNamespace: "kagent", + WorkerPool: "default-wp", + WorkerPod: "ateom-0", + ActorId: "ahr-kagent-my-claw", + }}, + }} + + base := &handlers.Base{KubeClient: kube, Authorizer: &auth.NoopAuthorizer{}} + h := handlers.NewSubstrateHandler(base, ate) + + req := httptest.NewRequest(http.MethodGet, "/api/substrate/status?namespace=kagent", nil) + req = setUser(req, "test-user") + rec := httptest.NewRecorder() + h.HandleGetSubstrateStatus(&testErrorResponseWriter{ResponseWriter: rec}, req) + require.Equal(t, http.StatusOK, rec.Code) + + var wrapped api.StandardResponse[api.SubstrateStatusResponse] + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &wrapped)) + require.True(t, wrapped.Data.Enabled) + require.Len(t, wrapped.Data.WorkerPools, 1) + require.Equal(t, "default-wp", wrapped.Data.WorkerPools[0].Name) + require.Len(t, wrapped.Data.ActorTemplates, 1) + require.Equal(t, "Ready", wrapped.Data.ActorTemplates[0].Phase) + require.True(t, wrapped.Data.ActorTemplates[0].ManagedByKagent) + require.Equal(t, "my-claw", wrapped.Data.ActorTemplates[0].HarnessName) + require.Len(t, wrapped.Data.Actors, 1) + require.Equal(t, "Running", wrapped.Data.Actors[0].Status) + require.Len(t, wrapped.Data.Workers, 1) +} diff --git a/go/core/internal/httpserver/middleware.go b/go/core/internal/httpserver/middleware.go index 2f3a329378..31280097e4 100644 --- a/go/core/internal/httpserver/middleware.go +++ b/go/core/internal/httpserver/middleware.go @@ -5,6 +5,7 @@ import ( "fmt" "net" "net/http" + "strings" "time" "github.com/kagent-dev/kagent/go/core/internal/httpserver/handlers" @@ -77,9 +78,18 @@ func (w *statusResponseWriter) RespondWithError(err error) { } } +func isAgentHarnessGatewayPath(path string) bool { + if !strings.HasPrefix(path, "/api/agentharnesses/") { + return false + } + return strings.Contains(path, "/gateway") +} + func contentTypeMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if len(r.URL.Path) >= 4 && r.URL.Path[:4] == "/api" && r.URL.Path != APIPathSandboxSSH { + if len(r.URL.Path) >= 4 && r.URL.Path[:4] == "/api" && + r.URL.Path != APIPathSandboxSSH && + !isAgentHarnessGatewayPath(r.URL.Path) { w.Header().Set("Content-Type", "application/json") } next.ServeHTTP(w, r) diff --git a/go/core/internal/httpserver/server.go b/go/core/internal/httpserver/server.go index aac7e831ab..eb88dfe0a5 100644 --- a/go/core/internal/httpserver/server.go +++ b/go/core/internal/httpserver/server.go @@ -17,6 +17,7 @@ import ( "github.com/kagent-dev/kagent/go/core/internal/version" "github.com/kagent-dev/kagent/go/core/pkg/auth" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "k8s.io/apimachinery/pkg/types" ctrl_client "sigs.k8s.io/controller-runtime/pkg/client" @@ -50,6 +51,8 @@ const ( APIPathLangGraph = "/api/langgraph" APIPathCrewAI = "/api/crewai" APIPathSandboxSSH = "/api/sandbox/ssh" + APIPathAgentHarnessHarness = "/api/agentharnesses/{namespace}/{name}/" + APIPathSubstrateStatus = "/api/substrate/status" ) var defaultModelConfig = types.NamespacedName{ @@ -59,18 +62,20 @@ var defaultModelConfig = types.NamespacedName{ // ServerConfig holds the configuration for the HTTP server type ServerConfig struct { - Router *mux.Router - BindAddr string - KubeClient ctrl_client.Client - A2AHandler a2a.A2AHandlerMux - MCPHandler *mcp.MCPHandler - WatchedNamespaces []string - DbClient dbpkg.Client - Authenticator auth.AuthProvider - Authorizer auth.Authorizer - ProxyURL string - Reconciler reconciler.KagentReconciler - SandboxBackend sandboxbackend.Backend + Router *mux.Router + BindAddr string + KubeClient ctrl_client.Client + A2AHandler a2a.A2AHandlerMux + MCPHandler *mcp.MCPHandler + WatchedNamespaces []string + DbClient dbpkg.Client + Authenticator auth.AuthProvider + Authorizer auth.Authorizer + ProxyURL string + Reconciler reconciler.KagentReconciler + SandboxBackend sandboxbackend.Backend + AgentHarnessGateway *handlers.AgentHarnessGatewayConfig + SubstrateAteClient *substrate.Client } // HTTPServer is the structure that manages the HTTP server @@ -87,9 +92,20 @@ func NewHTTPServer(config ServerConfig) (*HTTPServer, error) { // Initialize database return &HTTPServer{ - config: config, - router: config.Router, - handlers: handlers.NewHandlers(config.KubeClient, defaultModelConfig, config.DbClient, config.WatchedNamespaces, config.Authorizer, config.ProxyURL, config.Reconciler, config.SandboxBackend), + config: config, + router: config.Router, + handlers: handlers.NewHandlers( + config.KubeClient, + defaultModelConfig, + config.DbClient, + config.WatchedNamespaces, + config.Authorizer, + config.ProxyURL, + config.Reconciler, + config.SandboxBackend, + config.AgentHarnessGateway, + config.SubstrateAteClient, + ), authenticator: config.Authenticator, }, nil } @@ -276,6 +292,9 @@ func (s *HTTPServer) setupRoutes() { // Namespaces s.router.HandleFunc(APIPathNamespaces, adaptHandler(s.handlers.Namespaces.HandleListNamespaces)).Methods(http.MethodGet) + // Agent Substrate inventory (WorkerPools, ActorTemplates, ate-api actors/workers) + s.router.HandleFunc(APIPathSubstrateStatus, adaptHandler(s.handlers.Substrate.HandleGetSubstrateStatus)).Methods(http.MethodGet) + // Prompt template libraries (ConfigMaps) s.router.HandleFunc(APIPathPromptTemplates, adaptHandler(s.handlers.PromptTemplates.HandleListPromptTemplates)).Methods(http.MethodGet) s.router.HandleFunc(APIPathPromptTemplates, adaptHandler(s.handlers.PromptTemplates.HandleCreatePromptTemplate)).Methods(http.MethodPost) @@ -303,6 +322,11 @@ func (s *HTTPServer) setupRoutes() { // OpenShell sandbox PTY (browser WebSocket → gateway CONNECT → SSH). Authenticated like other /api routes. s.router.HandleFunc(APIPathSandboxSSH, adaptHandler(s.handlers.HandleSandboxSSHWebSocket)).Methods(http.MethodGet) + // Substrate OpenClaw gateway proxy (HTTP + WebSocket) via atenet-router. + s.router.PathPrefix(APIPathAgentHarnessHarness).Handler( + adaptHandler(s.handlers.HandleAgentHarnessGateway), + ) + // A2A s.router.PathPrefix(APIPathA2A + "/{namespace}/{name}").Handler(s.config.A2AHandler) s.router.PathPrefix(APIPathA2ASandboxes + "/{namespace}/{name}").Handler(s.config.A2AHandler) @@ -313,21 +337,30 @@ func (s *HTTPServer) setupRoutes() { } // Use middleware for common functionality (first registered runs outermost on incoming requests). - s.router.Use(wsSandboxSSHAuthQueryMiddleware) + s.router.Use(wsAuthQueryMiddleware) s.router.Use(auth.AuthnMiddleware(s.authenticator)) s.router.Use(contentTypeMiddleware) s.router.Use(loggingMiddleware) s.router.Use(errorHandlerMiddleware) } -// wsSandboxSSHAuthQueryMiddleware maps access_token query → Authorization for browser WebSocket upgrades +// wsAuthQueryMiddleware maps token query params → Authorization for browser WebSocket upgrades // (fetch can send headers; WebSocket cannot). -func wsSandboxSSHAuthQueryMiddleware(next http.Handler) http.Handler { +func wsAuthQueryMiddleware(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.URL.Path == APIPathSandboxSSH && r.Header.Get("Authorization") == "" { - if t := r.URL.Query().Get("access_token"); t != "" { - r.Header.Set("Authorization", "Bearer "+strings.TrimSpace(t)) - } + if r.Header.Get("Authorization") != "" { + next.ServeHTTP(w, r) + return + } + var token string + switch { + case r.URL.Path == APIPathSandboxSSH || strings.HasSuffix(r.URL.Path, "/ssh"): + token = r.URL.Query().Get("access_token") + case isAgentHarnessGatewayPath(r.URL.Path): + token = r.URL.Query().Get("token") + } + if token != "" { + r.Header.Set("Authorization", "Bearer "+strings.TrimSpace(token)) } next.ServeHTTP(w, r) }) diff --git a/go/core/pkg/app/app.go b/go/core/pkg/app/app.go index ddad07d546..d9adf663d1 100644 --- a/go/core/pkg/app/app.go +++ b/go/core/pkg/app/app.go @@ -53,11 +53,15 @@ import ( // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" dbpkg "github.com/kagent-dev/kagent/go/api/database" + "github.com/kagent-dev/kagent/go/core/internal/httpserver/handlers" "github.com/kagent-dev/kagent/go/core/pkg/auth" "github.com/kagent-dev/kagent/go/core/pkg/migrations" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/substrate" "github.com/kagent-dev/kagent/go/core/pkg/translator" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -99,6 +103,7 @@ func init() { utilruntime.Must(v1alpha1.AddToScheme(scheme)) utilruntime.Must(v1alpha2.AddToScheme(scheme)) utilruntime.Must(agentsandboxv1.AddToScheme(scheme)) + utilruntime.Must(atev1alpha1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -148,6 +153,20 @@ type Config struct { DialTimeout time.Duration CallTimeout time.Duration } + Substrate struct { + AteAPIEndpoint string + AtenetRouterURL string + Insecure bool + DialTimeout time.Duration + CallTimeout time.Duration + DefaultWorkerPoolNamespace string + DefaultWorkerPoolName string + PauseImage string + RunscAMD64URL string + RunscAMD64SHA256 string + RunscARM64URL string + RunscARM64SHA256 string + } } func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { @@ -207,6 +226,19 @@ func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { commandLine.DurationVar(&cfg.Openshell.DialTimeout, "openshell-dial-timeout", 10*time.Second, "Timeout for the initial dial to the OpenShell gateway.") commandLine.DurationVar(&cfg.Openshell.CallTimeout, "openshell-call-timeout", 30*time.Second, "Per-RPC timeout for OpenShell gateway calls.") + commandLine.StringVar(&cfg.Substrate.AteAPIEndpoint, "substrate-ate-api-endpoint", "", "gRPC target for Agent Substrate ate-api (e.g. dns:///api.ate-system.svc:443). Enables substrate AgentHarness runtime when set.") + commandLine.StringVar(&cfg.Substrate.AtenetRouterURL, "substrate-atenet-router-url", "", "HTTP URL for Substrate atenet-router (Envoy). Defaults to http://atenet-router.ate-system.svc:80 when unset.") + commandLine.BoolVar(&cfg.Substrate.Insecure, "substrate-ate-api-insecure", false, "Dial ate-api without TLS (local dev only).") + commandLine.DurationVar(&cfg.Substrate.DialTimeout, "substrate-dial-timeout", 10*time.Second, "Timeout for the initial dial to ate-api.") + commandLine.DurationVar(&cfg.Substrate.CallTimeout, "substrate-call-timeout", 30*time.Second, "Per-RPC timeout for ate-api calls.") + commandLine.StringVar(&cfg.Substrate.DefaultWorkerPoolNamespace, "substrate-default-workerpool-namespace", kagentNamespace, "Default Agent Substrate WorkerPool namespace when spec.substrate.workerPoolRef is unset.") + commandLine.StringVar(&cfg.Substrate.DefaultWorkerPoolName, "substrate-default-workerpool-name", "", "Default Agent Substrate WorkerPool name when spec.substrate.workerPoolRef is unset.") + commandLine.StringVar(&cfg.Substrate.PauseImage, "substrate-pause-image", "gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da", "Pause image for generated ActorTemplates.") + commandLine.StringVar(&cfg.Substrate.RunscAMD64URL, "substrate-runsc-amd64-url", "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc", "gVisor runsc URL for amd64.") + commandLine.StringVar(&cfg.Substrate.RunscAMD64SHA256, "substrate-runsc-amd64-sha256", "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63", "gVisor runsc sha256 for amd64.") + commandLine.StringVar(&cfg.Substrate.RunscARM64URL, "substrate-runsc-arm64-url", "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc", "gVisor runsc URL for arm64.") + commandLine.StringVar(&cfg.Substrate.RunscARM64SHA256, "substrate-runsc-arm64-sha256", "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9", "gVisor runsc sha256 for arm64.") + commandLine.StringVar(&agent_translator.DefaultServiceAccountName, "default-service-account-name", "", "Global default ServiceAccount name for agent pods. When set, agents without an explicit serviceAccountName will use this instead of creating a per-agent ServiceAccount.") commandLine.Var(&MapValue{Target: &agent_translator.DefaultAgentPodLabels}, "default-agent-pod-labels", "Comma-separated key=value pairs of labels to apply to all agent pod templates (e.g. 'team=platform,env=prod'). Per-agent labels take precedence.") @@ -430,7 +462,7 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne clientOpts := client.Options{} if len(watchNamespacesList) > 0 { // In namespaced RBAC mode a Role cannot grant access to cluster-scoped - // resources, so prevent the cached client from starting a cluster-scoped + // lifecycle, so prevent the cached client from starting a cluster-scoped // Namespace informer whose list/watch would keep crashing. clientOpts.Cache = &client.CacheOptions{ DisableFor: []client.Object{&corev1.Namespace{}}, @@ -563,23 +595,53 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne os.Exit(1) } + kubeClient := mgr.GetClient() + var openshellOpenClawBackend sandboxbackend.AsyncBackend + var openshellHermesBackend sandboxbackend.AsyncBackend if cfg.Openshell.GatewayURL != "" { - kubeClient := mgr.GetClient() - openshellBackends, err := buildOpenshellSandboxBackends(ctx, &cfg, kubeClient) + var err error + openshellOpenClawBackend, openshellHermesBackend, err = buildOpenshellSandboxBackends(ctx, &cfg, kubeClient) if err != nil { setupLog.Error(err, "unable to build openshell sandbox backends") os.Exit(1) } - if err := (&controller.AgentHarnessController{ - Client: kubeClient, - Recorder: mgr.GetEventRecorder("agentharness-controller"), - Backends: openshellBackends, + } + var substrateAteClient *substrate.Client + var substrateOpenClawBackend sandboxbackend.AsyncBackend + var substrateNemoClawBackend sandboxbackend.AsyncBackend + if cfg.Substrate.AteAPIEndpoint != "" { + var err error + substrateOpenClawBackend, substrateNemoClawBackend, substrateAteClient, err = buildSubstrateSandboxBackends(ctx, &cfg) + if err != nil { + setupLog.Error(err, "unable to build substrate sandbox backends") + os.Exit(1) + } + } + if openshellOpenClawBackend != nil || openshellHermesBackend != nil { + if err := (&controller.OpenShellAgentHarnessController{ + Client: kubeClient, + Recorder: mgr.GetEventRecorder("agentharness-openshell-controller"), + OpenClawBackend: openshellOpenClawBackend, + HermesBackend: openshellHermesBackend, }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "AgentHarness") + setupLog.Error(err, "unable to create controller", "controller", "OpenShellAgentHarness") os.Exit(1) } - } else { - setupLog.Info("AgentHarness controller disabled: --openshell-gateway-url not set") + } + if substrateOpenClawBackend != nil || substrateNemoClawBackend != nil { + if err := (&controller.SubstrateAgentHarnessController{ + Client: kubeClient, + Recorder: mgr.GetEventRecorder("agentharness-substrate-controller"), + OpenClawBackend: substrateOpenClawBackend, + NemoClawBackend: substrateNemoClawBackend, + SubstrateLifecycle: substrateLifecycleFromConfig(kubeClient, &cfg, substrateAteClient), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "SubstrateAgentHarness") + os.Exit(1) + } + } + if openshellOpenClawBackend == nil && openshellHermesBackend == nil && substrateOpenClawBackend == nil && substrateNemoClawBackend == nil { + setupLog.Info("AgentHarness controller disabled: set --openshell-gateway-url and/or --substrate-ate-api-endpoint") } if err = (&controller.ModelConfigController{ @@ -607,7 +669,7 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne } if err := reconcilerutils.SetupOwnerIndexes(mgr, rcnclr.GetOwnedResourceTypes()); err != nil { - setupLog.Error(err, "failed to setup indexes for owned resources") + setupLog.Error(err, "failed to setup indexes for owned lifecycle") os.Exit(1) } @@ -677,19 +739,28 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne os.Exit(1) } + var agentHarnessGateway *handlers.AgentHarnessGatewayConfig + if cfg.Substrate.AteAPIEndpoint != "" { + agentHarnessGateway = &handlers.AgentHarnessGatewayConfig{ + AtenetRouterURL: cfg.Substrate.AtenetRouterURL, + } + } + httpServer, err := httpserver.NewHTTPServer(httpserver.ServerConfig{ - Router: router, - BindAddr: cfg.HttpServerAddr, - KubeClient: mgr.GetClient(), - A2AHandler: a2aHandler, - MCPHandler: mcpHandler, - WatchedNamespaces: watchNamespacesList, - DbClient: dbClient, - Authorizer: extensionCfg.Authorizer, - Authenticator: extensionCfg.Authenticator, - ProxyURL: cfg.Proxy.URL, - Reconciler: rcnclr, - SandboxBackend: extensionCfg.SandboxBackend, + Router: router, + BindAddr: cfg.HttpServerAddr, + KubeClient: mgr.GetClient(), + A2AHandler: a2aHandler, + MCPHandler: mcpHandler, + WatchedNamespaces: watchNamespacesList, + DbClient: dbClient, + Authorizer: extensionCfg.Authorizer, + Authenticator: extensionCfg.Authenticator, + ProxyURL: cfg.Proxy.URL, + Reconciler: rcnclr, + SandboxBackend: extensionCfg.SandboxBackend, + AgentHarnessGateway: agentHarnessGateway, + SubstrateAteClient: substrateAteClient, }) if err != nil { setupLog.Error(err, "unable to create HTTP server") @@ -717,7 +788,7 @@ func Start(getExtensionConfig GetExtensionConfig, migrationRunner MigrationRunne // nemoclaw from flag config. It dials the gateway once; OpenShell and Inference RPCs // share that connection (see openshell.OpenShellClients). The connection is not explicitly // closed today — same lifetime as the process. -func buildOpenshellSandboxBackends(ctx context.Context, cfg *Config, kubeClient client.Client) (map[v1alpha2.AgentHarnessBackendType]sandboxbackend.AsyncBackend, error) { +func buildOpenshellSandboxBackends(ctx context.Context, cfg *Config, kubeClient client.Client) (sandboxbackend.AsyncBackend, sandboxbackend.AsyncBackend, error) { oc := openshell.Config{ GatewayURL: cfg.Openshell.GatewayURL, Token: cfg.Openshell.Token, @@ -728,29 +799,62 @@ func buildOpenshellSandboxBackends(ctx context.Context, cfg *Config, kubeClient if cfg.Openshell.TokenFile != "" { data, err := os.ReadFile(cfg.Openshell.TokenFile) if err != nil { - return nil, fmt.Errorf("read openshell token file: %w", err) + return nil, nil, fmt.Errorf("read openshell token file: %w", err) } oc.Token = strings.TrimSpace(string(data)) } if cfg.Openshell.CAFile != "" { data, err := os.ReadFile(cfg.Openshell.CAFile) if err != nil { - return nil, fmt.Errorf("read openshell CA file: %w", err) + return nil, nil, fmt.Errorf("read openshell CA file: %w", err) } oc.TLSCAPEM = data } clients, err := openshell.Dial(ctx, oc) if err != nil { - return nil, err + return nil, nil, err } ocl := openshell.NewOpenClawBackend(kubeClient, clients, oc, nil) hermesBackend := openshell.NewHermesBackend(kubeClient, clients, oc, nil) - return map[v1alpha2.AgentHarnessBackendType]sandboxbackend.AsyncBackend{ - v1alpha2.AgentHarnessBackendOpenClaw: ocl, - v1alpha2.AgentHarnessBackendNemoClaw: ocl, - v1alpha2.AgentHarnessBackendHermes: hermesBackend, - }, nil + return ocl, hermesBackend, nil +} + +func buildSubstrateSandboxBackends(ctx context.Context, cfg *Config) (sandboxbackend.AsyncBackend, sandboxbackend.AsyncBackend, *substrate.Client, error) { + sc := substrateAppConfig(cfg) + client, err := substrate.Dial(ctx, sc) + if err != nil { + return nil, nil, nil, err + } + + ocl := substrate.NewOpenClawBackend(client, v1alpha2.AgentHarnessBackendOpenClaw, nil) + ncl := substrate.NewOpenClawBackend(client, v1alpha2.AgentHarnessBackendNemoClaw, nil) + return ocl, ncl, client, nil +} + +func substrateAppConfig(cfg *Config) substrate.Config { + sc := substrate.Config{ + AteAPIEndpoint: cfg.Substrate.AteAPIEndpoint, + Insecure: cfg.Substrate.Insecure, + DialTimeout: cfg.Substrate.DialTimeout, + CallTimeout: cfg.Substrate.CallTimeout, + } + return sc +} + +func substrateLifecycleFromConfig(kubeClient client.Client, cfg *Config, ate *substrate.Client) *substrate.Lifecycle { + return substrate.NewLifecycle(kubeClient, substrate.LifecycleDefaults{ + PauseImage: cfg.Substrate.PauseImage, + RunscAMD64URL: cfg.Substrate.RunscAMD64URL, + RunscAMD64SHA256: cfg.Substrate.RunscAMD64SHA256, + RunscARM64URL: cfg.Substrate.RunscARM64URL, + RunscARM64SHA256: cfg.Substrate.RunscARM64SHA256, + DefaultWorkloadImage: openclaw.NemoclawSandboxBaseImage, + DefaultWorkerPool: types.NamespacedName{ + Namespace: cfg.Substrate.DefaultWorkerPoolNamespace, + Name: cfg.Substrate.DefaultWorkerPoolName, + }, + }, ate) } // configureNamespaceWatching sets up the controller manager to watch specific namespaces diff --git a/go/core/pkg/sandboxbackend/async.go b/go/core/pkg/sandboxbackend/async.go index e680259949..52c85d5849 100644 --- a/go/core/pkg/sandboxbackend/async.go +++ b/go/core/pkg/sandboxbackend/async.go @@ -14,7 +14,8 @@ type Handle struct { } // EnsureResult is returned by EnsureAgentHarness. Endpoint (if set) is surfaced -// to users via AgentHarness.Status.Connection. +// to users via AgentHarness.Status.Connection (OpenShell: gateway URL#sandbox id; +// Substrate: kagent gateway proxy path). type EnsureResult struct { Handle Handle Endpoint string @@ -39,9 +40,11 @@ type AsyncBackend interface { // each reconcile. GetStatus(ctx context.Context, h Handle) (metav1.ConditionStatus, string, string) - // DeleteAgentHarness releases the sandbox. NotFound must be treated as - // success so the finalizer can be removed idempotently. - DeleteAgentHarness(ctx context.Context, h Handle) error + // DeleteAgentHarness releases the sandbox. It performs at most one + // reconcile-safe delete step and returns done=true once the sandbox is gone. + // NotFound must be treated as success so the finalizer can be removed + // idempotently. + DeleteAgentHarness(ctx context.Context, h Handle) (done bool, err error) // OnAgentHarnessReady runs one-time work after the AgentHarness reports // Ready (for example ExecSandbox bootstrap inside the VM). Backends that diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap.go b/go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell.go similarity index 52% rename from go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap.go rename to go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell.go index db2fdb373e..ede55d15b8 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap.go +++ b/go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "slices" - "strings" "github.com/kagent-dev/kagent/go/api/v1alpha2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -13,7 +12,10 @@ import ( // BuildBootstrapJSON builds ~/.openclaw/openclaw.json contents plus environment variables that must be present when // OpenClaw resolves openshell:resolve:env: (API key + channel tokens). -func BuildBootstrapJSON(ctx context.Context, kube client.Client, namespace string, sbx *v1alpha2.AgentHarness, mc *v1alpha2.ModelConfig, gwPort int) ([]byte, map[string]string, error) { +// +// defaultBaseURLWhenUnset is used when ModelConfig has no explicit provider base URL. +// OpenShell callers should pass DefaultInferenceBaseURL. +func BuildBootstrapJSON(ctx context.Context, kube client.Client, namespace string, sbx *v1alpha2.AgentHarness, mc *v1alpha2.ModelConfig, gw GatewayBootstrapConfig, defaultBaseURLWhenUnset string) ([]byte, map[string]string, error) { if mc == nil { return nil, nil, fmt.Errorf("ModelConfig is required") } @@ -31,21 +33,21 @@ func BuildBootstrapJSON(ctx context.Context, kube client.Client, namespace strin apiKeyEnv: apiKey, } - modelID := strings.TrimSpace(mc.Spec.Model) - if modelID == "" { - return nil, nil, fmt.Errorf("ModelConfig.spec.model is required for OpenClaw bootstrap JSON") + modelID, err := requiredModelID(mc) + if err != nil { + return nil, nil, err } providerRecord := GatewayProviderRecordName(mc.Spec.Provider) - doc := buildCoreBootstrapDocument(mc, gwPort, apiKeyEnv, providerRecord, modelID, apiAdapter) + doc := buildCoreBootstrapDocument(mc, gw, credentialValue{literal: openshellResolveEnv(apiKeyEnv)}, providerRecord, modelID, apiAdapter, defaultBaseURLWhenUnset) - chState, err := accumulateHarnessChannels(ctx, kube, namespace, sbx.Spec.Backend, sbx.Spec.Channels, env) + chState, err := accumulateHarnessChannels(ctx, kube, namespace, sbx.Spec.Channels, env) if err != nil { return nil, nil, err } doc.Channels = chState.channelsJSON() - applySecretsAllowlist(&doc, env) + applyOpenshellSecretsAllowlist(&doc, env) raw, err := json.Marshal(doc) if err != nil { @@ -54,48 +56,25 @@ func BuildBootstrapJSON(ctx context.Context, kube client.Client, namespace strin return raw, env, nil } -func buildCoreBootstrapDocument(mc *v1alpha2.ModelConfig, gwPort int, apiKeyEnv, providerRecord, modelID, apiAdapter string) bootstrapDocument { - baseURL := bootstrapProviderBaseURL(mc) - return bootstrapDocument{ - Gateway: gatewaySection{ - Mode: "local", - Bind: "loopback", - Auth: gatewayAuth{Mode: "none"}, - Port: gwPort, - }, - Models: modelsSection{ - Mode: "merge", - Providers: map[string]providerSettings{ - providerRecord: { - BaseURL: baseURL, - APIKey: openshellResolveEnv(apiKeyEnv), - Auth: providerAuth(mc), - API: apiAdapter, - Models: []modelSlot{ - {ID: modelID, Name: modelID}, - }, - }, - }, - }, - Agents: agentsSection{ - Defaults: agentDefaults{ - Model: defaultModelPick{ - Primary: fmt.Sprintf("%s/%s", providerRecord, modelID), - }, - }, - }, - } -} - -func applySecretsAllowlist(doc *bootstrapDocument, env map[string]string) { - secretAllow := make([]string, 0, len(env)) +func applyOpenshellSecretsAllowlist(doc *bootstrapDocument, env map[string]string, extraEnvNames ...string) { + seen := make(map[string]struct{}, len(env)+len(extraEnvNames)) + secretAllow := make([]string, 0, len(env)+len(extraEnvNames)) for k := range env { - secretAllow = append(secretAllow, k) + if _, ok := seen[k]; !ok { + seen[k] = struct{}{} + secretAllow = append(secretAllow, k) + } + } + for _, k := range extraEnvNames { + if _, ok := seen[k]; !ok { + seen[k] = struct{}{} + secretAllow = append(secretAllow, k) + } } slices.Sort(secretAllow) doc.Secrets = secretsSection{ Providers: map[string]secretProvider{ - bootstrapSecretProviderID: { + openshellSecretProviderID: { Source: "env", Allowlist: secretAllow, }, diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap_test.go b/go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell_test.go similarity index 73% rename from go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap_test.go rename to go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell_test.go index 4dfa1a2633..18f7b1ce42 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/bootstrap_test.go +++ b/go/core/pkg/sandboxbackend/openclaw/bootstrap_openshell_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/kagent-dev/kagent/go/api/v1alpha2" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -39,7 +39,7 @@ func TestBuildBootstrapJSON_OpenAIDefaultBaseURLInferenceLocal(t *testing.T) { sbx := &v1alpha2.AgentHarness{ObjectMeta: metav1.ObjectMeta{Name: "s1", Namespace: ns}} kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret, mc).Build() - raw, _, err := openclaw.BuildBootstrapJSON(context.Background(), kube, ns, sbx, mc, 18800) + raw, _, err := openclaw.BuildBootstrapJSON(context.Background(), kube, ns, sbx, mc, openclaw.OpenshellGatewayBootstrap(18800), openclaw.DefaultInferenceBaseURL) require.NoError(t, err) var root map[string]any @@ -56,6 +56,42 @@ func TestBuildBootstrapJSON_OpenAIDefaultBaseURLInferenceLocal(t *testing.T) { require.Contains(t, kagent["allowlist"], "OPENAI_API_KEY") } +func TestBuildBootstrapJSON_SubstrateOmitsModelsWhenNoExplicitBaseURL(t *testing.T) { + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "default" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openai-key", Namespace: ns}, + Data: map[string][]byte{"OPENAI_API_KEY": []byte("sk-test")}, + } + mc := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "mc1", Namespace: ns}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + APIKeySecret: "openai-key", + APIKeySecretKey: "OPENAI_API_KEY", + OpenAI: &v1alpha2.OpenAIConfig{}, + }, + } + sbx := &v1alpha2.AgentHarness{ObjectMeta: metav1.ObjectMeta{Name: "s1", Namespace: ns}} + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret, mc).Build() + raw, _, err := openclaw.BuildBootstrapJSON(context.Background(), kube, ns, sbx, mc, openclaw.SubstrateGatewayBootstrap("tok", 80, "/api/agentharnesses/default/s1/gateway"), openclaw.SubstrateBootstrapDefaultBaseURL) + require.NoError(t, err) + + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + _, hasModels := root["models"] + require.False(t, hasModels) + agents := root["agents"].(map[string]any) + defaults := agents["defaults"].(map[string]any) + model := defaults["model"].(map[string]any) + require.Equal(t, "openai/gpt-4o", model["primary"]) +} + func TestBuildBootstrapJSON_OpenAIAndTelegram(t *testing.T) { scheme := runtime.NewScheme() utilruntime.Must(clientgoscheme.AddToScheme(scheme)) @@ -92,7 +128,7 @@ func TestBuildBootstrapJSON_OpenAIAndTelegram(t *testing.T) { } kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret, mc).Build() - raw, env, err := openclaw.BuildBootstrapJSON(context.Background(), kube, ns, sbx, mc, 18800) + raw, env, err := openclaw.BuildBootstrapJSON(context.Background(), kube, ns, sbx, mc, openclaw.OpenshellGatewayBootstrap(18800), openclaw.DefaultInferenceBaseURL) require.NoError(t, err) require.Equal(t, "sk-test", env["OPENAI_API_KEY"]) require.Equal(t, "telegram-bot-token", env["TELEGRAM_BOT_TOKEN_TG1"]) diff --git a/go/core/pkg/sandboxbackend/openclaw/bootstrap_shared.go b/go/core/pkg/sandboxbackend/openclaw/bootstrap_shared.go new file mode 100644 index 0000000000..b547c83f37 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/bootstrap_shared.go @@ -0,0 +1,158 @@ +package openclaw + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" +) + +// GatewayBootstrapConfig describes the gateway section of openclaw.json for a harness runtime. +type GatewayBootstrapConfig struct { + Port int + Bind string // loopback | lan + AuthMode string // none | token + Token string // required when AuthMode is token + ControlUI *ControlUIBootstrapConfig +} + +// ControlUIBootstrapConfig maps to gateway.controlUi in openclaw.json. +type ControlUIBootstrapConfig struct { + BasePath string + AllowedOrigins []string + DangerouslyDisableDeviceAuth bool +} + +// OpenshellGatewayBootstrap is the default gateway profile for OpenShell sandboxes. +func OpenshellGatewayBootstrap(port int) GatewayBootstrapConfig { + return GatewayBootstrapConfig{Port: port, Bind: "loopback", AuthMode: "none"} +} + +// SubstrateGatewayBootstrap is the gateway profile for Agent Substrate actors (port 80, token auth, proxied Control UI). +func SubstrateGatewayBootstrap(token string, port int, controlUIBasePath string) GatewayBootstrapConfig { + return GatewayBootstrapConfig{ + Port: port, + Bind: "lan", + AuthMode: "token", + Token: strings.TrimSpace(token), + ControlUI: &ControlUIBootstrapConfig{ + BasePath: normalizeControlUIBasePath(controlUIBasePath), + AllowedOrigins: []string{"*"}, + DangerouslyDisableDeviceAuth: true, + }, + } +} + +func normalizeControlUIBasePath(path string) string { + path = strings.TrimSpace(path) + if path == "" || path == "/" { + return "" + } + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + return strings.TrimRight(path, "/") +} + +// BuildGatewayOnlyBootstrapJSON returns a minimal openclaw.json with gateway settings only (no models/channels). +func BuildGatewayOnlyBootstrapJSON(gw GatewayBootstrapConfig) ([]byte, error) { + doc := bootstrapDocument{Gateway: buildGatewaySection(gw)} + raw, err := json.Marshal(doc) + if err != nil { + return nil, fmt.Errorf("marshal openclaw json: %w", err) + } + return raw, nil +} + +func buildCoreBootstrapDocument(mc *v1alpha2.ModelConfig, gw GatewayBootstrapConfig, apiKey credentialValue, providerRecord, modelID, apiAdapter, defaultBaseURLWhenUnset string) bootstrapDocument { + doc := bootstrapDocument{ + Gateway: buildGatewaySection(gw), + Agents: agentsSection{ + Defaults: agentDefaults{ + Model: defaultModelPick{ + Primary: fmt.Sprintf("%s/%s", providerRecord, modelID), + }, + }, + }, + } + + // Substrate: do not emit models.providers without baseUrl (OpenClaw rejects undefined baseUrl). + // Rely on agents.defaults + API key env unless the user set an explicit URL on ModelConfig. + if defaultBaseURLWhenUnset == SubstrateBootstrapDefaultBaseURL { + if explicit := modelConfigExplicitBaseURL(mc); explicit != "" { + doc.Models = &modelsSection{ + Mode: "merge", + Providers: map[string]providerSettings{ + providerRecord: { + BaseURL: explicit, + APIKey: apiKey, + Auth: providerAuth(mc), + API: apiAdapter, + Models: []modelSlot{ + {ID: modelID, Name: modelID}, + }, + }, + }, + } + } + return doc + } + + baseURL := bootstrapProviderBaseURL(mc, defaultBaseURLWhenUnset) + doc.Models = &modelsSection{ + Mode: "merge", + Providers: map[string]providerSettings{ + providerRecord: { + BaseURL: baseURL, + APIKey: apiKey, + Auth: providerAuth(mc), + API: apiAdapter, + Models: []modelSlot{ + {ID: modelID, Name: modelID}, + }, + }, + }, + } + return doc +} + +func buildGatewaySection(gw GatewayBootstrapConfig) gatewaySection { + port := gw.Port + if port <= 0 { + port = 18800 + } + bind := strings.TrimSpace(gw.Bind) + if bind == "" { + bind = "loopback" + } + authMode := strings.TrimSpace(gw.AuthMode) + if authMode == "" { + authMode = "none" + } + section := gatewaySection{ + Mode: "local", + Bind: bind, + Auth: gatewayAuth{Mode: authMode}, + Port: port, + } + if authMode == "token" { + section.Auth.Token = gw.Token + } + if gw.ControlUI != nil { + section.ControlUi = &controlUiSection{ + BasePath: normalizeControlUIBasePath(gw.ControlUI.BasePath), + AllowedOrigins: gw.ControlUI.AllowedOrigins, + DangerouslyDisableDeviceAuth: gw.ControlUI.DangerouslyDisableDeviceAuth, + } + } + return section +} + +func requiredModelID(mc *v1alpha2.ModelConfig) (string, error) { + modelID := strings.TrimSpace(mc.Spec.Model) + if modelID == "" { + return "", fmt.Errorf("ModelConfig.spec.model is required for OpenClaw bootstrap JSON") + } + return modelID, nil +} diff --git a/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate.go b/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate.go new file mode 100644 index 0000000000..f20e1294b6 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate.go @@ -0,0 +1,87 @@ +package openclaw + +import ( + "context" + "encoding/json" + "fmt" + "slices" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// BuildSubstrateBootstrapJSON builds openclaw.json and ActorTemplate container env for Agent Substrate. +// Model and channel credentials use OpenClaw env SecretRefs in openclaw.json ({source:"env",provider:"default",id:"..."}) +// and ActorTemplate container env (literal value or valueFrom secretKeyRef/configMapKeyRef, resolved by ate-api at resume). +func BuildSubstrateBootstrapJSON(ctx context.Context, kube client.Client, namespace string, sbx *v1alpha2.AgentHarness, mc *v1alpha2.ModelConfig, gw GatewayBootstrapConfig) ([]byte, []corev1.EnvVar, error) { + if mc == nil { + return nil, nil, fmt.Errorf("ModelConfig is required") + } + apiKeyEnvVar, err := ModelConfigAPIKeyEnvVar(mc) + if err != nil { + return nil, nil, err + } + apiAdapter, err := providerAPI(mc) + if err != nil { + return nil, nil, err + } + + modelID, err := requiredModelID(mc) + if err != nil { + return nil, nil, err + } + + apiKeyEnv := apiKeyEnvVar.Name + providerRecord := GatewayProviderRecordName(mc.Spec.Provider) + apiKeyRef := openclawEnvSecretRef(apiKeyEnv) + doc := buildCoreBootstrapDocument(mc, gw, credentialValue{envSecret: &apiKeyRef}, providerRecord, modelID, apiAdapter, SubstrateBootstrapDefaultBaseURL) + + chState, channelEnv, err := accumulateSubstrateHarnessChannels(ctx, kube, namespace, sbx.Spec.Channels) + if err != nil { + return nil, nil, err + } + doc.Channels = chState.channelsJSON() + + applySubstrateSecretsAllowlist(&doc, apiKeyEnv, channelEnv) + + raw, err := json.Marshal(doc) + if err != nil { + return nil, nil, fmt.Errorf("marshal openclaw json: %w", err) + } + return raw, substrateContainerEnv(apiKeyEnvVar, channelEnv), nil +} + +func substrateContainerEnv(apiKey corev1.EnvVar, extra []corev1.EnvVar) []corev1.EnvVar { + out := make([]corev1.EnvVar, 0, len(extra)+2) + out = append(out, apiKey) + out = append(out, extra...) + out = append(out, corev1.EnvVar{Name: "HOME", Value: "/root"}) + return out +} + +func applySubstrateSecretsAllowlist(doc *bootstrapDocument, apiKeyEnv string, channelEnv []corev1.EnvVar) { + seen := make(map[string]struct{}, len(channelEnv)+1) + secretAllow := make([]string, 0, len(channelEnv)+1) + add := func(name string) { + if _, ok := seen[name]; ok { + return + } + seen[name] = struct{}{} + secretAllow = append(secretAllow, name) + } + add(apiKeyEnv) + for _, env := range channelEnv { + add(env.Name) + } + slices.Sort(secretAllow) + doc.Secrets = secretsSection{ + Providers: map[string]secretProvider{ + substrateSecretProviderID: { + Source: "env", + Allowlist: secretAllow, + }, + }, + Defaults: &secretsDefaults{Env: substrateSecretProviderID}, + } +} diff --git a/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate_test.go b/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate_test.go new file mode 100644 index 0000000000..a5136bc81f --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/bootstrap_substrate_test.go @@ -0,0 +1,145 @@ +package openclaw_test + +import ( + "context" + "encoding/json" + "testing" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestSubstrateGatewayBootstrap(t *testing.T) { + t.Parallel() + raw, err := openclaw.BuildGatewayOnlyBootstrapJSON(openclaw.SubstrateGatewayBootstrap("tok", 80, "/api/agentharnesses/kagent/claw/gateway/")) + require.NoError(t, err) + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + gw := root["gateway"].(map[string]any) + require.Equal(t, "lan", gw["bind"]) + cui := gw["controlUi"].(map[string]any) + require.Equal(t, "/api/agentharnesses/kagent/claw/gateway", cui["basePath"]) + require.Equal(t, true, cui["dangerouslyDisableDeviceAuth"]) +} + +func TestBuildSubstrateBootstrapJSON_ModelConfigAPIKeyUsesSecretRef(t *testing.T) { + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "default" + mc := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "mc1", Namespace: ns}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + APIKeySecret: "openai-key", + APIKeySecretKey: "OPENAI_API_KEY", + OpenAI: &v1alpha2.OpenAIConfig{}, + }, + } + sbx := &v1alpha2.AgentHarness{ObjectMeta: metav1.ObjectMeta{Name: "s1", Namespace: ns}} + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mc).Build() + raw, env, err := openclaw.BuildSubstrateBootstrapJSON(context.Background(), kube, ns, sbx, mc, openclaw.SubstrateGatewayBootstrap("tok", 80, "/gw/")) + require.NoError(t, err) + + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + secRoot := root["secrets"].(map[string]any) + secProvs := secRoot["providers"].(map[string]any) + defaultProv := secProvs["default"].(map[string]any) + require.Contains(t, defaultProv["allowlist"], "OPENAI_API_KEY") + defaults := secRoot["defaults"].(map[string]any) + require.Equal(t, "default", defaults["env"]) + + var apiKeyEnv *corev1.EnvVar + for i := range env { + if env[i].Name == "OPENAI_API_KEY" { + apiKeyEnv = &env[i] + break + } + } + require.NotNil(t, apiKeyEnv) + require.NotNil(t, apiKeyEnv.ValueFrom) + require.NotNil(t, apiKeyEnv.ValueFrom.SecretKeyRef) + require.Equal(t, "openai-key", apiKeyEnv.ValueFrom.SecretKeyRef.Name) + require.Equal(t, "OPENAI_API_KEY", apiKeyEnv.ValueFrom.SecretKeyRef.Key) + require.Empty(t, apiKeyEnv.Value) +} + +func TestBuildSubstrateBootstrapJSON_TelegramUsesEnvSecretRef(t *testing.T) { + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "default" + mc := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "mc1", Namespace: ns}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + APIKeySecret: "openai-key", + APIKeySecretKey: "OPENAI_API_KEY", + OpenAI: &v1alpha2.OpenAIConfig{}, + }, + } + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "tg-token", Namespace: ns}, + Data: map[string][]byte{"token": []byte("telegram-bot-token")}, + } + sbx := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "s1", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + Channels: []v1alpha2.AgentHarnessChannel{ + { + Name: "tg1", + Type: v1alpha2.AgentHarnessChannelTypeTelegram, + Telegram: &v1alpha2.AgentHarnessTelegramChannelSpec{ + BotToken: v1alpha2.AgentHarnessChannelCredential{ + ValueFrom: &v1alpha2.ValueSource{ + Type: v1alpha2.SecretValueSource, + Name: "tg-token", + Key: "token", + }, + }, + }, + }, + }, + }, + } + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(mc, secret).Build() + raw, env, err := openclaw.BuildSubstrateBootstrapJSON(context.Background(), kube, ns, sbx, mc, openclaw.SubstrateGatewayBootstrap("tok", 80, "/gw/")) + require.NoError(t, err) + + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + tg := root["channels"].(map[string]any)["telegram"].(map[string]any) + tg1 := tg["accounts"].(map[string]any)["tg1"].(map[string]any) + botToken := tg1["botToken"].(map[string]any) + require.Equal(t, "env", botToken["source"]) + require.Equal(t, "default", botToken["provider"]) + require.Equal(t, "KAGENT_SB_CH_TG1_TELEGRAM_BOT", botToken["id"]) + require.NotEqual(t, "telegram-bot-token", tg1["botToken"]) + + var tgEnv *corev1.EnvVar + for i := range env { + if env[i].Name == "KAGENT_SB_CH_TG1_TELEGRAM_BOT" { + tgEnv = &env[i] + break + } + } + require.NotNil(t, tgEnv) + require.NotNil(t, tgEnv.ValueFrom) + require.NotNil(t, tgEnv.ValueFrom.SecretKeyRef) + require.Equal(t, "tg-token", tgEnv.ValueFrom.SecretKeyRef.Name) + require.Equal(t, "token", tgEnv.ValueFrom.SecretKeyRef.Key) +} diff --git a/go/core/pkg/sandboxbackend/openclaw/channels_openshell.go b/go/core/pkg/sandboxbackend/openclaw/channels_openshell.go new file mode 100644 index 0000000000..a4000aa0b7 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/channels_openshell.go @@ -0,0 +1,123 @@ +package openclaw + +import ( + "context" + "fmt" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/channels" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func accumulateHarnessChannels(ctx context.Context, kube client.Client, namespace string, specChannels []v1alpha2.AgentHarnessChannel, env map[string]string) (*harnessChannels, error) { + a := newHarnessChannels() + for _, ch := range specChannels { + switch ch.Type { + case v1alpha2.AgentHarnessChannelTypeTelegram: + if err := a.addTelegram(ctx, kube, namespace, ch, env); err != nil { + return nil, err + } + case v1alpha2.AgentHarnessChannelTypeSlack: + if err := a.addSlack(ctx, kube, namespace, ch, env); err != nil { + return nil, err + } + default: + return nil, unsupportedChannelType(ch.Name, ch.Type) + } + } + return a, nil +} + +func (a *harnessChannels) addTelegram(ctx context.Context, kube client.Client, namespace string, ch v1alpha2.AgentHarnessChannel, env map[string]string) error { + spec := ch.Telegram + if spec == nil { + return fmt.Errorf("channel %q: telegram spec is required", ch.Name) + } + botEnv := channels.TelegramBotTokenEnvKey(ch.Name) + if err := putChannelCredential(ctx, kube, namespace, spec.BotToken, botEnv, env); err != nil { + return fmt.Errorf("channel %q telegram bot token: %w", ch.Name, err) + } + allowFrom, err := telegramAllowFrom(ctx, kube, namespace, spec) + if err != nil { + return fmt.Errorf("channel %q telegram allowlist: %w", ch.Name, err) + } + acc := telegramAccount{ + Name: ch.Name, + Enabled: true, + BotToken: credentialValue{literal: openshellResolveEnv(botEnv)}, + } + if len(allowFrom) > 0 { + acc.DMPolicy = "allowlist" + acc.AllowFrom = allowFrom + } else { + acc.DMPolicy = "pairing" + } + a.telegram[ch.Name] = acc + if a.tgDef == "" { + a.tgDef = ch.Name + } + return nil +} + +func (a *harnessChannels) addSlack(ctx context.Context, kube client.Client, namespace string, ch v1alpha2.AgentHarnessChannel, env map[string]string) error { + spec := ch.Slack + if spec == nil { + return fmt.Errorf("channel %q: slack spec is required", ch.Name) + } + botEnv := channels.SlackBotTokenEnvKey(ch.Name) + appEnv := channels.SlackAppTokenEnvKey(ch.Name) + if err := putChannelCredential(ctx, kube, namespace, spec.BotToken, botEnv, env); err != nil { + return fmt.Errorf("channel %q slack bot token: %w", ch.Name, err) + } + if err := putChannelCredential(ctx, kube, namespace, spec.AppToken, appEnv, env); err != nil { + return fmt.Errorf("channel %q slack app token: %w", ch.Name, err) + } + opts := openClawSlackOptions(spec) + access := openClawSlackChannelAccess(opts) + acc := slackAccount{ + Name: ch.Name, + Enabled: true, + Mode: "socket", + BotToken: credentialValue{literal: channels.SlackBotTokenPlaceholder(botEnv)}, + AppToken: credentialValue{literal: channels.SlackAppTokenPlaceholder(appEnv)}, + UserTokenReadOnly: true, + GroupPolicy: string(access), + Capabilities: slackCaps{ + InteractiveReplies: slackInteractiveReplies(opts), + }, + } + if chans := trimNonEmptyStrings(opts.AllowlistChannels); len(chans) > 0 { + acc.DM = &groupDM{GroupEnabled: true, GroupChannels: chans} + } + a.slack[ch.Name] = acc + if a.slDef == "" { + a.slDef = ch.Name + } + if !a.slackSeen { + a.slackRootPolicy = access + a.slackSeen = true + } + return nil +} + +func telegramAllowFrom(ctx context.Context, kube client.Client, namespace string, spec *v1alpha2.AgentHarnessTelegramChannelSpec) ([]string, error) { + if len(spec.AllowedUserIDs) > 0 { + out := make([]string, 0, len(spec.AllowedUserIDs)) + for _, id := range spec.AllowedUserIDs { + s := strings.TrimSpace(id) + if s != "" { + out = append(out, s) + } + } + return out, nil + } + if spec.AllowedUserIDsFrom != nil { + raw, err := spec.AllowedUserIDsFrom.Resolve(ctx, kube, namespace) + if err != nil { + return nil, fmt.Errorf("resolve allowedUserIDsFrom: %w", err) + } + return splitAllowedList(raw), nil + } + return nil, nil +} diff --git a/go/core/pkg/sandboxbackend/openclaw/channels_shared.go b/go/core/pkg/sandboxbackend/openclaw/channels_shared.go new file mode 100644 index 0000000000..75aa66872c --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/channels_shared.go @@ -0,0 +1,105 @@ +package openclaw + +import ( + "fmt" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" +) + +type harnessChannels struct { + telegram map[string]telegramAccount + tgDef string + + slack map[string]slackAccount + slDef string + + slackRootPolicy v1alpha2.AgentHarnessChannelAccess + slackSeen bool +} + +func newHarnessChannels() *harnessChannels { + return &harnessChannels{ + telegram: make(map[string]telegramAccount), + slack: make(map[string]slackAccount), + } +} + +func (a *harnessChannels) channelsJSON() *channelsConfig { + if len(a.telegram) == 0 && len(a.slack) == 0 { + return nil + } + out := &channelsConfig{} + if len(a.telegram) > 0 { + out.Telegram = &telegramBundle{ + Enabled: true, + Accounts: a.telegram, + DefaultAccount: a.tgDef, + } + } + if len(a.slack) > 0 { + out.Slack = &slackBundle{ + Enabled: true, + Mode: "socket", + WebhookPath: "/slack/events", + UserTokenReadOnly: true, + GroupPolicy: string(a.slackRootPolicy), + Accounts: a.slack, + DefaultAccount: a.slDef, + } + } + return out +} + +func openClawSlackOptions(spec *v1alpha2.AgentHarnessSlackChannelSpec) *v1alpha2.AgentHarnessOpenClawSlackOptions { + if spec == nil || spec.OpenClaw == nil { + return &v1alpha2.AgentHarnessOpenClawSlackOptions{} + } + return spec.OpenClaw +} + +func slackInteractiveReplies(opts *v1alpha2.AgentHarnessOpenClawSlackOptions) bool { + if opts == nil || opts.InteractiveReplies == nil { + return true + } + return *opts.InteractiveReplies +} + +func openClawSlackChannelAccess(opts *v1alpha2.AgentHarnessOpenClawSlackOptions) v1alpha2.AgentHarnessChannelAccess { + if opts == nil || opts.ChannelAccess == "" { + return v1alpha2.AgentHarnessChannelAccessOpen + } + return opts.ChannelAccess +} + +func splitAllowedList(raw string) []string { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil + } + var out []string + for _, part := range strings.FieldsFunc(raw, func(r rune) bool { + return r == ',' || r == '\n' || r == ';' + }) { + s := strings.TrimSpace(part) + if s != "" { + out = append(out, s) + } + } + return out +} + +func trimNonEmptyStrings(ss []string) []string { + out := make([]string, 0, len(ss)) + for _, s := range ss { + s = strings.TrimSpace(s) + if s != "" { + out = append(out, s) + } + } + return out +} + +func unsupportedChannelType(name string, typ v1alpha2.AgentHarnessChannelType) error { + return fmt.Errorf("channel %q: unsupported type %q", name, typ) +} diff --git a/go/core/pkg/sandboxbackend/openclaw/channels_substrate.go b/go/core/pkg/sandboxbackend/openclaw/channels_substrate.go new file mode 100644 index 0000000000..594661fb59 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/channels_substrate.go @@ -0,0 +1,114 @@ +package openclaw + +import ( + "context" + "fmt" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// accumulateSubstrateHarnessChannels configures channels with OpenClaw env SecretRefs in openclaw.json +// and returns container env vars (inline value or Kubernetes valueFrom refs) for the ActorTemplate. +func accumulateSubstrateHarnessChannels(ctx context.Context, kube client.Client, namespace string, channels []v1alpha2.AgentHarnessChannel) (*harnessChannels, []corev1.EnvVar, error) { + a := newHarnessChannels() + var containerEnv []corev1.EnvVar + for _, ch := range channels { + switch ch.Type { + case v1alpha2.AgentHarnessChannelTypeTelegram: + env, err := a.addSubstrateTelegram(ctx, kube, namespace, ch) + if err != nil { + return nil, nil, err + } + containerEnv = append(containerEnv, env...) + case v1alpha2.AgentHarnessChannelTypeSlack: + env, err := a.addSubstrateSlack(ctx, kube, namespace, ch) + if err != nil { + return nil, nil, err + } + containerEnv = append(containerEnv, env...) + default: + return nil, nil, unsupportedChannelType(ch.Name, ch.Type) + } + } + return a, containerEnv, nil +} + +func (a *harnessChannels) addSubstrateTelegram(ctx context.Context, kube client.Client, namespace string, ch v1alpha2.AgentHarnessChannel) ([]corev1.EnvVar, error) { + spec := ch.Telegram + if spec == nil { + return nil, fmt.Errorf("channel %q: telegram spec is required", ch.Name) + } + botEnv := channelSecretEnvVar(ch.Name, "TELEGRAM_BOT") + botEnvVar, err := channelCredentialContainerEnv(spec.BotToken, botEnv) + if err != nil { + return nil, fmt.Errorf("channel %q telegram bot token: %w", ch.Name, err) + } + allowFrom, err := telegramAllowFrom(ctx, kube, namespace, spec) + if err != nil { + return nil, fmt.Errorf("channel %q telegram allowlist: %w", ch.Name, err) + } + ref := openclawEnvSecretRef(botEnv) + acc := telegramAccount{ + Name: ch.Name, + Enabled: true, + BotToken: credentialValue{envSecret: &ref}, + } + if len(allowFrom) > 0 { + acc.DMPolicy = "allowlist" + acc.AllowFrom = allowFrom + } else { + acc.DMPolicy = "pairing" + } + a.telegram[ch.Name] = acc + if a.tgDef == "" { + a.tgDef = ch.Name + } + return []corev1.EnvVar{botEnvVar}, nil +} + +func (a *harnessChannels) addSubstrateSlack(ctx context.Context, kube client.Client, namespace string, ch v1alpha2.AgentHarnessChannel) ([]corev1.EnvVar, error) { + spec := ch.Slack + if spec == nil { + return nil, fmt.Errorf("channel %q: slack spec is required", ch.Name) + } + botEnv := channelSecretEnvVar(ch.Name, "SLACK_BOT") + appEnv := channelSecretEnvVar(ch.Name, "SLACK_APP") + botEnvVar, err := channelCredentialContainerEnv(spec.BotToken, botEnv) + if err != nil { + return nil, fmt.Errorf("channel %q slack bot token: %w", ch.Name, err) + } + appEnvVar, err := channelCredentialContainerEnv(spec.AppToken, appEnv) + if err != nil { + return nil, fmt.Errorf("channel %q slack app token: %w", ch.Name, err) + } + botRef := openclawEnvSecretRef(botEnv) + appRef := openclawEnvSecretRef(appEnv) + opts := openClawSlackOptions(spec) + access := openClawSlackChannelAccess(opts) + acc := slackAccount{ + Name: ch.Name, + Enabled: true, + Mode: "socket", + BotToken: credentialValue{envSecret: &botRef}, + AppToken: credentialValue{envSecret: &appRef}, + UserTokenReadOnly: true, + GroupPolicy: string(access), + Capabilities: slackCaps{ + InteractiveReplies: slackInteractiveReplies(opts), + }, + } + if chans := trimNonEmptyStrings(opts.AllowlistChannels); len(chans) > 0 { + acc.DM = &groupDM{GroupEnabled: true, GroupChannels: chans} + } + a.slack[ch.Name] = acc + if a.slDef == "" { + a.slDef = ch.Name + } + if !a.slackSeen { + a.slackRootPolicy = access + a.slackSeen = true + } + return []corev1.EnvVar{botEnvVar, appEnvVar}, nil +} diff --git a/go/core/pkg/sandboxbackend/openclaw/constants.go b/go/core/pkg/sandboxbackend/openclaw/constants.go new file mode 100644 index 0000000000..1f6f88b687 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/constants.go @@ -0,0 +1,22 @@ +package openclaw + +const ( + // NemoclawSandboxBaseImage is the default OpenShell VM image for OpenClaw/NemoClaw harnesses. + // Substrate requires workload images to use @sha256:... refs (see pinImageRef). (OpenShell doesn't care) + // Tag: 2026.5.4 + NemoclawSandboxBaseImage = "ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:d52bee415dc4c0dba7164f9eabe727574c056d4f211781f20af249707883a3b4" + + // openshellSecretProviderID is the secrets.providers key written into openclaw.json for OpenShell sandboxes. + openshellSecretProviderID = "kagent" + + // substrateSecretProviderID is the env SecretRef provider id for native OpenClaw on Substrate. + substrateSecretProviderID = "default" + + // DefaultInferenceBaseURL is the Model provider baseUrl when ModelConfig does not set an explicit upstream (OpenShell). + DefaultInferenceBaseURL = "https://inference.local/v1" + + // SubstrateBootstrapDefaultBaseURL is passed when building openclaw.json for Substrate harnesses. + // When ModelConfig has no explicit provider URL, the models section is omitted entirely so + // OpenClaw is not given a partial providers.* block (baseUrl is required when present). + SubstrateBootstrapDefaultBaseURL = "" +) diff --git a/go/core/pkg/sandboxbackend/openclaw/credentials.go b/go/core/pkg/sandboxbackend/openclaw/credentials.go new file mode 100644 index 0000000000..f0d56c4bc9 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/credentials.go @@ -0,0 +1,83 @@ +package openclaw + +import ( + "context" + "fmt" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func sandboxChannelEnvSuffix(name string) string { + var b strings.Builder + for _, r := range strings.ToUpper(strings.TrimSpace(name)) { + switch { + case r >= 'A' && r <= 'Z', r >= '0' && r <= '9': + b.WriteRune(r) + default: + b.WriteByte('_') + } + } + s := strings.Trim(b.String(), "_") + if s == "" { + return "CH" + } + return s +} + +func channelSecretEnvVar(channelName, tokenRole string) string { + return fmt.Sprintf("KAGENT_SB_CH_%s_%s", sandboxChannelEnvSuffix(channelName), tokenRole) +} + +func putChannelCredential(ctx context.Context, kube client.Client, namespace string, cred v1alpha2.AgentHarnessChannelCredential, envKey string, env map[string]string) error { + if strings.TrimSpace(cred.Value) != "" { + env[envKey] = strings.TrimSpace(cred.Value) + return nil + } + if cred.ValueFrom == nil { + return fmt.Errorf("channel credential requires value or valueFrom") + } + v, err := cred.ValueFrom.Resolve(ctx, kube, namespace) + if err != nil { + return fmt.Errorf("resolve credential %s: %w", envKey, err) + } + env[envKey] = v + return nil +} + +// channelCredentialContainerEnv maps a harness channel credential to an ActorTemplate env var. +// Inline values use env.Value; Secret/ConfigMap sources use valueFrom refs resolved by Substrate ate-api at resume. +func channelCredentialContainerEnv(cred v1alpha2.AgentHarnessChannelCredential, envKey string) (corev1.EnvVar, error) { + if v := strings.TrimSpace(cred.Value); v != "" { + return corev1.EnvVar{Name: envKey, Value: v}, nil + } + if cred.ValueFrom == nil { + return corev1.EnvVar{}, fmt.Errorf("channel credential requires value or valueFrom") + } + switch cred.ValueFrom.Type { + case v1alpha2.SecretValueSource: + return corev1.EnvVar{ + Name: envKey, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: cred.ValueFrom.Name}, + Key: cred.ValueFrom.Key, + }, + }, + }, nil + case v1alpha2.ConfigMapValueSource: + return corev1.EnvVar{ + Name: envKey, + ValueFrom: &corev1.EnvVarSource{ + ConfigMapKeyRef: &corev1.ConfigMapKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: cred.ValueFrom.Name}, + Key: cred.ValueFrom.Key, + }, + }, + }, nil + default: + return corev1.EnvVar{}, fmt.Errorf("unknown value source type %q", cred.ValueFrom.Type) + } +} diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/defaults.go b/go/core/pkg/sandboxbackend/openclaw/defaults.go similarity index 100% rename from go/core/pkg/sandboxbackend/openshell/openclaw/defaults.go rename to go/core/pkg/sandboxbackend/openclaw/defaults.go diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go b/go/core/pkg/sandboxbackend/openclaw/modelconfig.go similarity index 58% rename from go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go rename to go/core/pkg/sandboxbackend/openclaw/modelconfig.go index 3bb29e88fd..a83e5871b6 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go +++ b/go/core/pkg/sandboxbackend/openclaw/modelconfig.go @@ -16,6 +16,31 @@ func GatewayProviderRecordName(provider v1alpha2.ModelProvider) string { return strings.ToLower(string(provider)) } +// ModelConfigAPIKeyEnvVar returns a container env var that references the ModelConfig API key Secret. +// Substrate ate-api resolves secretKeyRef when resuming an actor (see workload_spec in substrate ate-api). +func ModelConfigAPIKeyEnvVar(mc *v1alpha2.ModelConfig) (corev1.EnvVar, error) { + if mc == nil { + return corev1.EnvVar{}, fmt.Errorf("ModelConfig is required") + } + if mc.Spec.APIKeyPassthrough { + return corev1.EnvVar{}, fmt.Errorf("APIKeyPassthrough is not supported for Substrate OpenClaw provisioning from ModelConfig") + } + if mc.Spec.APIKeySecret == "" || mc.Spec.APIKeySecretKey == "" { + return corev1.EnvVar{}, fmt.Errorf("modelConfig %s/%s requires apiKeySecret and apiKeySecretKey", mc.Namespace, mc.Name) + } + return corev1.EnvVar{ + Name: DefaultAPIKeyEnvVar(mc.Spec.Provider), + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: mc.Spec.APIKeySecret, + }, + Key: mc.Spec.APIKeySecretKey, + }, + }, + }, nil +} + // ResolveModelConfigAPIKey reads the API key from the Secret referenced by ModelConfig. func ResolveModelConfigAPIKey(ctx context.Context, kube client.Client, mc *v1alpha2.ModelConfig) (string, error) { if mc.Spec.APIKeyPassthrough { diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/policy.go b/go/core/pkg/sandboxbackend/openclaw/policy.go similarity index 100% rename from go/core/pkg/sandboxbackend/openshell/openclaw/policy.go rename to go/core/pkg/sandboxbackend/openclaw/policy.go diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go b/go/core/pkg/sandboxbackend/openclaw/provider.go similarity index 89% rename from go/core/pkg/sandboxbackend/openshell/openclaw/provider.go rename to go/core/pkg/sandboxbackend/openclaw/provider.go index 70a075a272..8c4183e0e9 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go +++ b/go/core/pkg/sandboxbackend/openclaw/provider.go @@ -7,7 +7,7 @@ import ( "github.com/kagent-dev/kagent/go/api/v1alpha2" ) -func bootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string { +func modelConfigExplicitBaseURL(mc *v1alpha2.ModelConfig) string { switch mc.Spec.Provider { case v1alpha2.ModelProviderOpenAI: if mc.Spec.OpenAI != nil && strings.TrimSpace(mc.Spec.OpenAI.BaseURL) != "" { @@ -30,7 +30,14 @@ func bootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string { return strings.TrimSpace(mc.Spec.SAPAICore.BaseURL) } } - return DefaultInferenceBaseURL + return "" +} + +func bootstrapProviderBaseURL(mc *v1alpha2.ModelConfig, defaultWhenUnset string) string { + if u := modelConfigExplicitBaseURL(mc); u != "" { + return u + } + return defaultWhenUnset } func providerAuth(mc *v1alpha2.ModelConfig) string { diff --git a/go/core/pkg/sandboxbackend/openclaw/secrets.go b/go/core/pkg/sandboxbackend/openclaw/secrets.go new file mode 100644 index 0000000000..82bf3b8e97 --- /dev/null +++ b/go/core/pkg/sandboxbackend/openclaw/secrets.go @@ -0,0 +1,33 @@ +package openclaw + +import ( + "encoding/json" +) + +// envSecretRef is OpenClaw's structured env SecretRef (see https://docs.openclaw.ai/gateway/secrets). +type envSecretRef struct { + Source string `json:"source"` + Provider string `json:"provider"` + ID string `json:"id"` +} + +func openclawEnvSecretRef(envVar string) envSecretRef { + return envSecretRef{ + Source: "env", + Provider: substrateSecretProviderID, + ID: envVar, + } +} + +// credentialValue marshals as either a plaintext string (OpenShell) or an OpenClaw env SecretRef (Substrate). +type credentialValue struct { + literal string + envSecret *envSecretRef +} + +func (c credentialValue) MarshalJSON() ([]byte, error) { + if c.envSecret != nil { + return json.Marshal(c.envSecret) + } + return json.Marshal(c.literal) +} diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/ssh_test.go b/go/core/pkg/sandboxbackend/openclaw/ssh_test.go similarity index 72% rename from go/core/pkg/sandboxbackend/openshell/openclaw/ssh_test.go rename to go/core/pkg/sandboxbackend/openclaw/ssh_test.go index a2f5d32aef..0d2773a544 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/ssh_test.go +++ b/go/core/pkg/sandboxbackend/openclaw/ssh_test.go @@ -3,7 +3,7 @@ package openclaw_test import ( "testing" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/stretchr/testify/require" ) diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/types.go b/go/core/pkg/sandboxbackend/openclaw/types.go similarity index 55% rename from go/core/pkg/sandboxbackend/openshell/openclaw/types.go rename to go/core/pkg/sandboxbackend/openclaw/types.go index da73347668..5d993dd824 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/types.go +++ b/go/core/pkg/sandboxbackend/openclaw/types.go @@ -5,21 +5,29 @@ package openclaw type bootstrapDocument struct { Gateway gatewaySection `json:"gateway"` - Models modelsSection `json:"models"` + Models *modelsSection `json:"models,omitempty"` Agents agentsSection `json:"agents"` Channels *channelsConfig `json:"channels,omitempty"` Secrets secretsSection `json:"secrets"` } type gatewaySection struct { - Mode string `json:"mode"` - Bind string `json:"bind"` - Auth gatewayAuth `json:"auth"` - Port int `json:"port"` + Mode string `json:"mode"` + Bind string `json:"bind"` + Auth gatewayAuth `json:"auth"` + Port int `json:"port"` + ControlUi *controlUiSection `json:"controlUi,omitempty"` } type gatewayAuth struct { - Mode string `json:"mode"` + Mode string `json:"mode"` + Token string `json:"token,omitempty"` +} + +type controlUiSection struct { + BasePath string `json:"basePath,omitempty"` + AllowedOrigins []string `json:"allowedOrigins,omitempty"` + DangerouslyDisableDeviceAuth bool `json:"dangerouslyDisableDeviceAuth,omitempty"` } type modelsSection struct { @@ -28,11 +36,11 @@ type modelsSection struct { } type providerSettings struct { - BaseURL string `json:"baseUrl"` - APIKey string `json:"apiKey"` - Auth string `json:"auth"` - API string `json:"api"` - Models []modelSlot `json:"models"` + BaseURL string `json:"baseUrl,omitempty"` + APIKey credentialValue `json:"apiKey"` + Auth string `json:"auth"` + API string `json:"api"` + Models []modelSlot `json:"models"` } type modelSlot struct { @@ -64,11 +72,11 @@ type telegramBundle struct { } type telegramAccount struct { - Name string `json:"name"` - Enabled bool `json:"enabled"` - BotToken string `json:"botToken"` - DMPolicy string `json:"dmPolicy"` - AllowFrom []string `json:"allowFrom,omitempty"` + Name string `json:"name"` + Enabled bool `json:"enabled"` + BotToken credentialValue `json:"botToken"` + DMPolicy string `json:"dmPolicy"` + AllowFrom []string `json:"allowFrom,omitempty"` } type slackBundle struct { @@ -82,15 +90,15 @@ type slackBundle struct { } type slackAccount struct { - Name string `json:"name"` - Enabled bool `json:"enabled"` - Mode string `json:"mode"` - BotToken string `json:"botToken"` - AppToken string `json:"appToken"` - UserTokenReadOnly bool `json:"userTokenReadOnly"` - GroupPolicy string `json:"groupPolicy"` - Capabilities slackCaps `json:"capabilities"` - DM *groupDM `json:"dm,omitempty"` + Name string `json:"name"` + Enabled bool `json:"enabled"` + Mode string `json:"mode"` + BotToken credentialValue `json:"botToken"` + AppToken credentialValue `json:"appToken"` + UserTokenReadOnly bool `json:"userTokenReadOnly"` + GroupPolicy string `json:"groupPolicy"` + Capabilities slackCaps `json:"capabilities"` + DM *groupDM `json:"dm,omitempty"` } type slackCaps struct { @@ -104,9 +112,14 @@ type groupDM struct { type secretsSection struct { Providers map[string]secretProvider `json:"providers"` + Defaults *secretsDefaults `json:"defaults,omitempty"` } type secretProvider struct { Source string `json:"source"` - Allowlist []string `json:"allowlist"` + Allowlist []string `json:"allowlist,omitempty"` +} + +type secretsDefaults struct { + Env string `json:"env"` } diff --git a/go/core/pkg/sandboxbackend/openshell/agentharness_openshell_client.go b/go/core/pkg/sandboxbackend/openshell/agentharness_openshell_client.go index 7394beae62..f35fec7acf 100644 --- a/go/core/pkg/sandboxbackend/openshell/agentharness_openshell_client.go +++ b/go/core/pkg/sandboxbackend/openshell/agentharness_openshell_client.go @@ -129,9 +129,9 @@ func (c *AgentHarnessOpenShellClient) GetSandboxStatus(ctx context.Context, h sa } // DeleteAgentHarnessSandbox deletes the OpenShell sandbox; NotFound is success. -func (c *AgentHarnessOpenShellClient) DeleteAgentHarnessSandbox(ctx context.Context, h sandboxbackend.Handle) error { +func (c *AgentHarnessOpenShellClient) DeleteAgentHarnessSandbox(ctx context.Context, h sandboxbackend.Handle) (bool, error) { if h.ID == "" { - return nil + return true, nil } ctx, cancel := c.CallCtx(ctx) defer cancel() @@ -139,17 +139,17 @@ func (c *AgentHarnessOpenShellClient) DeleteAgentHarnessSandbox(ctx context.Cont osCli := c.openShell() if osCli == nil { - return fmt.Errorf("openshell: OpenShell client is required") + return false, fmt.Errorf("openshell: OpenShell client is required") } _, err := osCli.DeleteSandbox(ctx, &openshellv1.DeleteSandboxRequest{Name: h.ID}) if err == nil { - return nil + return true, nil } if status.Code(err) == codes.NotFound { - return nil + return true, nil } - return fmt.Errorf("openshell DeleteSandbox %s: %w", h.ID, err) + return false, fmt.Errorf("openshell DeleteSandbox %s: %w", h.ID, err) } // ExecSandboxID resolves metadata.id for ExecSandbox RPCs. diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw.go b/go/core/pkg/sandboxbackend/openshell/openclaw.go index 9f95a407a5..44a3508e10 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw.go +++ b/go/core/pkg/sandboxbackend/openshell/openclaw.go @@ -10,7 +10,7 @@ import ( "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/utils" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" @@ -87,7 +87,7 @@ func (b *ClawBackend) OnAgentHarnessReady(ctx context.Context, ah *v1alpha2.Agen gwPort := defaultOpenclawGatewayPort token := b.cfg.Token - jsonBytes, env, err := openclaw.BuildBootstrapJSON(ctx, b.kubeClient, ah.Namespace, ah, mc, gwPort) + jsonBytes, env, err := openclaw.BuildBootstrapJSON(ctx, b.kubeClient, ah.Namespace, ah, mc, openclaw.OpenshellGatewayBootstrap(gwPort), openclaw.DefaultInferenceBaseURL) if err != nil { return fmt.Errorf("build openclaw config: %w", err) } diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/channels.go b/go/core/pkg/sandboxbackend/openshell/openclaw/channels.go deleted file mode 100644 index 1223a30f2f..0000000000 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/channels.go +++ /dev/null @@ -1,97 +0,0 @@ -package openclaw - -import ( - "context" - "maps" - - "github.com/kagent-dev/kagent/go/api/v1alpha2" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/channels" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type harnessChannels struct { - resolved *channels.Resolved -} - -func accumulateHarnessChannels(ctx context.Context, kube client.Client, namespace string, backend v1alpha2.AgentHarnessBackendType, specChannels []v1alpha2.AgentHarnessChannel, env map[string]string) (*harnessChannels, error) { - resolved, err := channels.Resolve(ctx, kube, namespace, backend, specChannels) - if err != nil { - return nil, err - } - maps.Copy(env, resolved.Secrets) - return &harnessChannels{resolved: resolved}, nil -} - -func (a *harnessChannels) channelsJSON() *channelsConfig { - if a == nil || a.resolved == nil { - return nil - } - r := a.resolved - if len(r.Telegram) == 0 && len(r.Slack) == 0 { - return nil - } - out := &channelsConfig{} - if len(r.Telegram) > 0 { - accounts := make(map[string]telegramAccount, len(r.Telegram)) - var def string - for _, tg := range r.Telegram { - acc := telegramAccount{ - Name: tg.Name, - Enabled: true, - BotToken: openshellResolveEnv(channels.TelegramBotTokenEnvKey(tg.Name)), - } - if len(tg.AllowFrom) > 0 { - acc.DMPolicy = "allowlist" - acc.AllowFrom = tg.AllowFrom - } else { - acc.DMPolicy = "pairing" - } - accounts[tg.Name] = acc - if def == "" { - def = tg.Name - } - } - out.Telegram = &telegramBundle{ - Enabled: true, - Accounts: accounts, - DefaultAccount: def, - } - } - if len(r.Slack) > 0 { - accounts := make(map[string]slackAccount, len(r.Slack)) - var def string - for _, sl := range r.Slack { - botKey := channels.SlackBotTokenEnvKey(sl.Name) - appKey := channels.SlackAppTokenEnvKey(sl.Name) - acc := slackAccount{ - Name: sl.Name, - Enabled: true, - Mode: "socket", - BotToken: channels.SlackBotTokenPlaceholder(botKey), - AppToken: channels.SlackAppTokenPlaceholder(appKey), - UserTokenReadOnly: true, - GroupPolicy: string(sl.ChannelAccess), - Capabilities: slackCaps{ - InteractiveReplies: sl.InteractiveReplies, - }, - } - if chans := sl.AllowlistChannels; len(chans) > 0 { - acc.DM = &groupDM{GroupEnabled: true, GroupChannels: chans} - } - accounts[sl.Name] = acc - if def == "" { - def = sl.Name - } - } - out.Slack = &slackBundle{ - Enabled: true, - Mode: "socket", - WebhookPath: "/slack/events", - UserTokenReadOnly: true, - GroupPolicy: string(r.SlackRootGroupPolicy()), - Accounts: accounts, - DefaultAccount: def, - } - } - return out -} diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/constants.go b/go/core/pkg/sandboxbackend/openshell/openclaw/constants.go deleted file mode 100644 index dd0f98cdc8..0000000000 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/constants.go +++ /dev/null @@ -1,12 +0,0 @@ -package openclaw - -const ( - // NemoclawSandboxBaseImage is the default OpenShell VM image for OpenClaw/NemoClaw harnesses. - NemoclawSandboxBaseImage = "ghcr.io/kagent-dev/nemoclaw/sandbox-base:2026.5.4" - - // bootstrapSecretProviderID is the secrets.providers key written into openclaw.json. - bootstrapSecretProviderID = "kagent" - - // DefaultInferenceBaseURL is the Model provider baseUrl when ModelConfig does not set an explicit upstream. - DefaultInferenceBaseURL = "https://inference.local/v1" -) diff --git a/go/core/pkg/sandboxbackend/openshell/openshell.go b/go/core/pkg/sandboxbackend/openshell/openshell.go index 9802b1b9e7..299617245f 100644 --- a/go/core/pkg/sandboxbackend/openshell/openshell.go +++ b/go/core/pkg/sandboxbackend/openshell/openshell.go @@ -82,6 +82,6 @@ func (b *agentHarnessOpenShellBackend) GetStatus(ctx context.Context, h sandboxb } // DeleteAgentHarness implements AsyncBackend. -func (b *agentHarnessOpenShellBackend) DeleteAgentHarness(ctx context.Context, h sandboxbackend.Handle) error { +func (b *agentHarnessOpenShellBackend) DeleteAgentHarness(ctx context.Context, h sandboxbackend.Handle) (bool, error) { return b.DeleteAgentHarnessSandbox(ctx, h) } diff --git a/go/core/pkg/sandboxbackend/openshell/openshell_test.go b/go/core/pkg/sandboxbackend/openshell/openshell_test.go index 1f21c0d161..bf154f7bcd 100644 --- a/go/core/pkg/sandboxbackend/openshell/openshell_test.go +++ b/go/core/pkg/sandboxbackend/openshell/openshell_test.go @@ -13,7 +13,7 @@ import ( openshellv1 "github.com/kagent-dev/kagent/go/api/openshell/gen/openshellv1" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/stretchr/testify/require" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -304,14 +304,20 @@ func TestDeleteSandbox(t *testing.T) { r, err := b.EnsureAgentHarness(context.Background(), sampleClawSandbox()) require.NoError(t, err) - require.NoError(t, b.DeleteAgentHarness(context.Background(), r.Handle)) + done, err := b.DeleteAgentHarness(context.Background(), r.Handle) + require.NoError(t, err) + require.True(t, done) require.Equal(t, 1, fg.deleteCalls) - require.NoError(t, b.DeleteAgentHarness(context.Background(), r.Handle)) + done, err = b.DeleteAgentHarness(context.Background(), r.Handle) + require.NoError(t, err) + require.True(t, done) require.Equal(t, 2, fg.deleteCalls) before := fg.deleteCalls - require.NoError(t, b.DeleteAgentHarness(context.Background(), sandboxbackend.Handle{})) + done, err = b.DeleteAgentHarness(context.Background(), sandboxbackend.Handle{}) + require.NoError(t, err) + require.True(t, done) require.Equal(t, before, fg.deleteCalls) } diff --git a/go/core/pkg/sandboxbackend/openshell/policy.go b/go/core/pkg/sandboxbackend/openshell/policy.go index b01b3a20e5..5f82cac698 100644 --- a/go/core/pkg/sandboxbackend/openshell/policy.go +++ b/go/core/pkg/sandboxbackend/openshell/policy.go @@ -6,8 +6,8 @@ import ( sandboxv1 "github.com/kagent-dev/kagent/go/api/openshell/gen/sandboxv1" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/hermes" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" "google.golang.org/protobuf/proto" ) diff --git a/go/core/pkg/sandboxbackend/openshell/ssh_terminal.go b/go/core/pkg/sandboxbackend/openshell/ssh_terminal.go index 2299a76dc8..4437ba35b4 100644 --- a/go/core/pkg/sandboxbackend/openshell/ssh_terminal.go +++ b/go/core/pkg/sandboxbackend/openshell/ssh_terminal.go @@ -4,8 +4,8 @@ import ( "strings" "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/hermes" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" ) // ResolveSSHRemoteCommand decides whether to run an interactive shell or a harness CLI. diff --git a/go/core/pkg/sandboxbackend/openshell/ssh_terminal_test.go b/go/core/pkg/sandboxbackend/openshell/ssh_terminal_test.go index d9d1451dea..252b111a7b 100644 --- a/go/core/pkg/sandboxbackend/openshell/ssh_terminal_test.go +++ b/go/core/pkg/sandboxbackend/openshell/ssh_terminal_test.go @@ -3,9 +3,9 @@ package openshell_test import ( "testing" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/hermes" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" ) func TestResolveSSHRemoteCommand(t *testing.T) { diff --git a/go/core/pkg/sandboxbackend/openshell/translate.go b/go/core/pkg/sandboxbackend/openshell/translate.go index c6ae5a0d0d..d29c1fd8b1 100644 --- a/go/core/pkg/sandboxbackend/openshell/translate.go +++ b/go/core/pkg/sandboxbackend/openshell/translate.go @@ -9,7 +9,7 @@ import ( openshellv1 "github.com/kagent-dev/kagent/go/api/openshell/gen/openshellv1" "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/internal/utils" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" diff --git a/go/core/pkg/sandboxbackend/openshell/translate_test.go b/go/core/pkg/sandboxbackend/openshell/translate_test.go index 463ad5aa14..88f7124f42 100644 --- a/go/core/pkg/sandboxbackend/openshell/translate_test.go +++ b/go/core/pkg/sandboxbackend/openshell/translate_test.go @@ -4,8 +4,8 @@ import ( "testing" "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/hermes" - "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/go/core/pkg/sandboxbackend/substrate/client.go b/go/core/pkg/sandboxbackend/substrate/client.go new file mode 100644 index 0000000000..bb23ebbf32 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/client.go @@ -0,0 +1,143 @@ +package substrate + +import ( + "context" + "crypto/tls" + "fmt" + "time" + + "github.com/agent-substrate/substrate/proto/ateapipb" + "google.golang.org/grpc" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/credentials" +) + +// Client wraps ate-api Control gRPC. +type Client struct { + ateapipb.ControlClient + conn *grpc.ClientConn + cfg Config +} + +// Dial connects to the ate-api server. +func Dial(ctx context.Context, cfg Config) (*Client, error) { + if cfg.AteAPIEndpoint == "" { + return nil, fmt.Errorf("substrate: ate-api endpoint is required") + } + dialTimeout := cfg.DialTimeout + if dialTimeout <= 0 { + dialTimeout = 10 * time.Second + } + dialCtx, cancel := context.WithTimeout(ctx, dialTimeout) + defer cancel() + + opts := []grpc.DialOption{ + grpc.WithTransportCredentials(credentials.NewTLS(ateAPITLSConfig(cfg.Insecure))), + } + + conn, err := grpc.NewClient(cfg.AteAPIEndpoint, opts...) + if err != nil { + return nil, fmt.Errorf("substrate: dial ate-api %q: %w", cfg.AteAPIEndpoint, err) + } + // NewClient stays idle until Connect() or an RPC; waitConnReady enforces DialTimeout. + conn.Connect() + if err := waitConnReady(dialCtx, conn); err != nil { + _ = conn.Close() + return nil, fmt.Errorf("substrate: dial ate-api %q: %w", cfg.AteAPIEndpoint, err) + } + + return &Client{ + ControlClient: ateapipb.NewControlClient(conn), + conn: conn, + cfg: cfg, + }, nil +} + +func ateAPITLSConfig(insecure bool) *tls.Config { + tlsCfg := &tls.Config{MinVersion: tls.VersionTLS12} + if insecure { + // Kind/local ate-api uses pod-issued certs; skip verification (same as grpcurl -insecure). + tlsCfg.InsecureSkipVerify = true + } + return tlsCfg +} + +func waitConnReady(ctx context.Context, conn *grpc.ClientConn) error { + for { + switch s := conn.GetState(); s { + case connectivity.Ready: + return nil + case connectivity.Shutdown: + return fmt.Errorf("connection shut down") + default: + if !conn.WaitForStateChange(ctx, s) { + if err := ctx.Err(); err != nil { + return err + } + return fmt.Errorf("connection closed before ready") + } + } + } +} + +func (c *Client) Close() error { + if c.conn != nil { + return c.conn.Close() + } + return nil +} + +func (c *Client) callCtx(ctx context.Context) (context.Context, context.CancelFunc) { + if c.cfg.CallTimeout <= 0 { + return ctx, func() {} + } + return context.WithTimeout(ctx, c.cfg.CallTimeout) +} + +func (c *Client) GetActor(ctx context.Context, actorID string) (*ateapipb.Actor, error) { + ctx, cancel := c.callCtx(ctx) + defer cancel() + resp, err := c.ControlClient.GetActor(ctx, &ateapipb.GetActorRequest{ActorId: actorID}) + if err != nil { + return nil, err + } + return resp.GetActor(), nil +} + +func (c *Client) CreateActor(ctx context.Context, actorID, tmplNS, tmplName string) (*ateapipb.Actor, error) { + ctx, cancel := c.callCtx(ctx) + defer cancel() + resp, err := c.ControlClient.CreateActor(ctx, &ateapipb.CreateActorRequest{ + ActorId: actorID, + ActorTemplateNamespace: tmplNS, + ActorTemplateName: tmplName, + }) + if err != nil { + return nil, err + } + return resp.GetActor(), nil +} + +func (c *Client) ResumeActor(ctx context.Context, actorID string) (*ateapipb.Actor, error) { + ctx, cancel := c.callCtx(ctx) + defer cancel() + resp, err := c.ControlClient.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ActorId: actorID}) + if err != nil { + return nil, err + } + return resp.GetActor(), nil +} + +func (c *Client) SuspendActor(ctx context.Context, actorID string) error { + ctx, cancel := c.callCtx(ctx) + defer cancel() + _, err := c.ControlClient.SuspendActor(ctx, &ateapipb.SuspendActorRequest{ActorId: actorID}) + return err +} + +func (c *Client) DeleteActor(ctx context.Context, actorID string) error { + ctx, cancel := c.callCtx(ctx) + defer cancel() + _, err := c.ControlClient.DeleteActor(ctx, &ateapipb.DeleteActorRequest{ActorId: actorID}) + return err +} diff --git a/go/core/pkg/sandboxbackend/substrate/client_test.go b/go/core/pkg/sandboxbackend/substrate/client_test.go new file mode 100644 index 0000000000..cc381cd350 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/client_test.go @@ -0,0 +1,66 @@ +package substrate + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "math/big" + "net" + "testing" + "time" + + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" +) + +func TestAteAPITLSConfig(t *testing.T) { + cfg := ateAPITLSConfig(false) + require.False(t, cfg.InsecureSkipVerify) + + cfg = ateAPITLSConfig(true) + require.True(t, cfg.InsecureSkipVerify) + require.Equal(t, uint16(tls.VersionTLS12), cfg.MinVersion) +} + +func TestDial_tlsSkipVerifyReachesReady(t *testing.T) { + cert := newTestTLSCert(t) + + lis, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + srv := grpc.NewServer(grpc.Creds(credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + MinVersion: tls.VersionTLS12, + }))) + go func() { _ = srv.Serve(lis) }() + t.Cleanup(func() { + srv.Stop() + _ = lis.Close() + }) + + c, err := Dial(context.Background(), Config{ + AteAPIEndpoint: lis.Addr().String(), + Insecure: true, + DialTimeout: 2 * time.Second, + }) + require.NoError(t, err) + require.NoError(t, c.Close()) +} + +func newTestTLSCert(t *testing.T) tls.Certificate { + t.Helper() + key, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(t, err) + template := x509.Certificate{ + SerialNumber: big.NewInt(1), + NotBefore: time.Now(), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + } + der, err := x509.CreateCertificate(rand.Reader, &template, &template, &key.PublicKey, key) + require.NoError(t, err) + return tls.Certificate{Certificate: [][]byte{der}, PrivateKey: key} +} diff --git a/go/core/pkg/sandboxbackend/substrate/config.go b/go/core/pkg/sandboxbackend/substrate/config.go new file mode 100644 index 0000000000..071478d0c1 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/config.go @@ -0,0 +1,12 @@ +package substrate + +import "time" + +// Config holds connection settings for Agent Substrate ate-api. +type Config struct { + // AteAPIEndpoint is a gRPC target (e.g. dns:///api.ate-system.svc:443). + AteAPIEndpoint string + Insecure bool + DialTimeout time.Duration + CallTimeout time.Duration +} diff --git a/go/core/pkg/sandboxbackend/substrate/delete_actor.go b/go/core/pkg/sandboxbackend/substrate/delete_actor.go new file mode 100644 index 0000000000..34c9e5cc45 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/delete_actor.go @@ -0,0 +1,54 @@ +package substrate + +import ( + "context" + "fmt" + + "github.com/agent-substrate/substrate/proto/ateapipb" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// deleteActor performs at most one mutating ate-api step per call. +// Returns true when the actor no longer exists. Callers should requeue until true. +func deleteActor(ctx context.Context, c *Client, actorID string) (bool, error) { + if actorID == "" { + return true, nil + } + if c == nil { + return false, fmt.Errorf("substrate ate-api client is required") + } + + actor, err := c.GetActor(ctx, actorID) + if err != nil { + if status.Code(err) == codes.NotFound { + return true, nil + } + return false, fmt.Errorf("get actor %q: %w", actorID, err) + } + + switch actor.GetStatus() { + case ateapipb.Actor_STATUS_SUSPENDED, ateapipb.Actor_STATUS_UNSPECIFIED: + if err := c.DeleteActor(ctx, actorID); err != nil { + if status.Code(err) == codes.NotFound { + return true, nil + } + if status.Code(err) == codes.FailedPrecondition { + return false, fmt.Errorf("delete actor %q: not suspended (status %s)", actorID, actor.GetStatus()) + } + return false, fmt.Errorf("delete actor %q: %w", actorID, err) + } + return false, nil + case ateapipb.Actor_STATUS_SUSPENDING: + _ = c.SuspendActor(ctx, actorID) + return false, nil + case ateapipb.Actor_STATUS_RUNNING, ateapipb.Actor_STATUS_RESUMING: + if err := c.SuspendActor(ctx, actorID); err != nil && status.Code(err) != codes.NotFound { + return false, fmt.Errorf("suspend actor %q: %w", actorID, err) + } + return false, nil + default: + _ = c.SuspendActor(ctx, actorID) + return false, nil + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/delete_actor_test.go b/go/core/pkg/sandboxbackend/substrate/delete_actor_test.go new file mode 100644 index 0000000000..c61ab4edf4 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/delete_actor_test.go @@ -0,0 +1,14 @@ +package substrate + +import "testing" + +func TestDeleteActorEmptyID(t *testing.T) { + t.Parallel() + done, err := deleteActor(t.Context(), nil, "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !done { + t.Fatal("expected done for empty actor id") + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/gateway.go b/go/core/pkg/sandboxbackend/substrate/gateway.go new file mode 100644 index 0000000000..b1631d94e5 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/gateway.go @@ -0,0 +1,31 @@ +package substrate + +import ( + "fmt" + "net/url" + "strings" +) + +// DefaultAtenetRouterURL is the in-cluster HTTP endpoint for Substrate's Envoy router. +const DefaultAtenetRouterURL = "http://atenet-router.ate-system.svc:80" + +// GatewayRouterTarget returns the atenet-router reverse-proxy URL and Host header for an actor. +func GatewayRouterTarget(routerURL, actorID string) (*url.URL, string, error) { + routerURL = strings.TrimSpace(routerURL) + if routerURL == "" { + routerURL = DefaultAtenetRouterURL + } + actorID = strings.TrimSpace(actorID) + if actorID == "" { + return nil, "", fmt.Errorf("actor id is required") + } + target, err := url.Parse(routerURL) + if err != nil { + return nil, "", fmt.Errorf("parse atenet-router URL %q: %w", routerURL, err) + } + if target.Scheme == "" { + return nil, "", fmt.Errorf("atenet-router URL %q must include a scheme (http or https)", routerURL) + } + host := ActorHost(actorID, "") + return target, host, nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/gateway_test.go b/go/core/pkg/sandboxbackend/substrate/gateway_test.go new file mode 100644 index 0000000000..8d89cf7e5b --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/gateway_test.go @@ -0,0 +1,41 @@ +package substrate + +import ( + "testing" +) + +func TestGatewayRouterTarget(t *testing.T) { + t.Parallel() + target, host, err := GatewayRouterTarget("", "ahr-kagent-my-claw") + if err != nil { + t.Fatal(err) + } + if target.String() != DefaultAtenetRouterURL { + t.Fatalf("target = %s, want %s", target, DefaultAtenetRouterURL) + } + if host != "ahr-kagent-my-claw.actors.resources.substrate.ate.dev" { + t.Fatalf("host = %q", host) + } +} + +func TestGatewayRouterTargetCustomURL(t *testing.T) { + t.Parallel() + target, host, err := GatewayRouterTarget("http://atenet-router.custom.svc:8080", "actor-1") + if err != nil { + t.Fatal(err) + } + if target.Host != "atenet-router.custom.svc:8080" { + t.Fatalf("target host = %q", target.Host) + } + if host != "actor-1.actors.resources.substrate.ate.dev" { + t.Fatalf("host = %q", host) + } +} + +func TestGatewayRouterTargetRejectsEmptyActor(t *testing.T) { + t.Parallel() + _, _, err := GatewayRouterTarget("", "") + if err == nil { + t.Fatal("expected error for empty actor id") + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/gateway_token.go b/go/core/pkg/sandboxbackend/substrate/gateway_token.go new file mode 100644 index 0000000000..10eaca47c6 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/gateway_token.go @@ -0,0 +1,50 @@ +package substrate + +import ( + "context" + "fmt" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// GatewayTokenSecretKey is the Secret data key used for per-harness OpenClaw gateway tokens. +const GatewayTokenSecretKey = "token" + +// ResolveGatewayToken returns the per-harness gateway token. +// Token source is validated at admission via AgentHarnessSubstrateSpec CEL rules. +func ResolveGatewayToken(ctx context.Context, kube client.Client, ah *v1alpha2.AgentHarness) (string, error) { + if ah == nil || ah.Spec.Substrate == nil { + return "", fmt.Errorf("spec.substrate is required") + } + sub := ah.Spec.Substrate + if sub.GatewayTokenSecretRef != nil && strings.TrimSpace(sub.GatewayTokenSecretRef.Name) != "" { + return resolveGatewayTokenSecret(ctx, kube, ah.Namespace, sub.GatewayTokenSecretRef) + } + return strings.TrimSpace(sub.GatewayToken), nil +} + +func resolveGatewayTokenSecret(ctx context.Context, kube client.Client, namespace string, ref *v1alpha2.TypedLocalReference) (string, error) { + if kube == nil { + return "", fmt.Errorf("kubernetes client is required to resolve gateway token secret") + } + var secret corev1.Secret + if err := kube.Get(ctx, types.NamespacedName{Namespace: namespace, Name: ref.Name}, &secret); err != nil { + return "", fmt.Errorf("get gateway token secret %s/%s: %w", namespace, ref.Name, err) + } + if secret.Data == nil { + return "", fmt.Errorf("gateway token secret %s/%s is empty", namespace, ref.Name) + } + val, ok := secret.Data[GatewayTokenSecretKey] + if !ok { + return "", fmt.Errorf("gateway token secret %s/%s missing key %q", namespace, ref.Name, GatewayTokenSecretKey) + } + token := strings.TrimSpace(string(val)) + if token == "" { + return "", fmt.Errorf("gateway token secret %s/%s key %q must not be empty", namespace, ref.Name, GatewayTokenSecretKey) + } + return token, nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/gateway_token_test.go b/go/core/pkg/sandboxbackend/substrate/gateway_token_test.go new file mode 100644 index 0000000000..9cfb7dc57f --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/gateway_token_test.go @@ -0,0 +1,82 @@ +package substrate + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" +) + +func TestResolveGatewayTokenRejectsEmptySecretValue(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + const ns = "kagent" + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "claw", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayTokenSecretRef: &v1alpha2.TypedLocalReference{Name: "openclaw-token"}, + }, + }, + } + + for _, tt := range []struct { + name string + value []byte + }{ + {name: "empty", value: []byte{}}, + {name: "whitespace", value: []byte(" \t\n ")}, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openclaw-token", Namespace: ns}, + Data: map[string][]byte{GatewayTokenSecretKey: tt.value}, + } + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret).Build() + + _, err := ResolveGatewayToken(context.Background(), kube, ah) + require.Error(t, err) + require.Contains(t, err.Error(), `key "token" must not be empty`) + }) + } +} + +func TestResolveGatewayTokenAcceptsNonemptySecretValue(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + const ns = "kagent" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openclaw-token", Namespace: ns}, + Data: map[string][]byte{GatewayTokenSecretKey: []byte(" secret-token ")}, + } + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret).Build() + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "claw", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayTokenSecretRef: &v1alpha2.TypedLocalReference{Name: "openclaw-token"}, + }, + }, + } + + token, err := ResolveGatewayToken(context.Background(), kube, ah) + require.NoError(t, err) + require.Equal(t, "secret-token", token) +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle.go b/go/core/pkg/sandboxbackend/substrate/lifecycle.go new file mode 100644 index 0000000000..51649f1118 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle.go @@ -0,0 +1,61 @@ +package substrate + +import ( + "context" + "fmt" + "strings" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "k8s.io/apimachinery/pkg/types" +) + +// EnsureGeneratedTemplate creates or updates the generated ActorTemplate and reports whether it is Ready. +func (p *Lifecycle) EnsureGeneratedTemplate(ctx context.Context, ah *v1alpha2.AgentHarness) (LifecycleState, error) { + if ah == nil || ah.Spec.Substrate == nil { + return LifecycleState{}, fmt.Errorf("spec.substrate is required") + } + + wpKey, err := p.resolveWorkerPoolRef(ctx, ah) + if err != nil { + return LifecycleState{}, err + } + + tmplKey, err := p.ensureActorTemplate(ctx, ah, wpKey) + if err != nil { + return LifecycleState{}, err + } + + ready, err := p.actorTemplateReady(ctx, tmplKey) + if err != nil { + return LifecycleState{}, err + } + + return LifecycleState{ + ActorTemplateReady: ready, + }, nil +} + +func (p *Lifecycle) resolveWorkerPoolRef(ctx context.Context, ah *v1alpha2.AgentHarness) (types.NamespacedName, error) { + if p == nil || p.Client == nil { + return types.NamespacedName{}, fmt.Errorf("substrate lifecycle kubernetes client is required") + } + key := p.Defaults.DefaultWorkerPool + if sub := ah.Spec.Substrate; sub != nil && sub.WorkerPoolRef != nil { + if name := strings.TrimSpace(sub.WorkerPoolRef.Name); name != "" { + key = types.NamespacedName{Namespace: ah.Namespace, Name: name} + } + } + if key.Name == "" { + return types.NamespacedName{}, fmt.Errorf("spec.substrate.workerPoolRef is required when no default substrate WorkerPool is configured") + } + if key.Namespace == "" { + key.Namespace = ah.Namespace + } + + var wp atev1alpha1.WorkerPool + if err := p.Client.Get(ctx, key, &wp); err != nil { + return types.NamespacedName{}, fmt.Errorf("get WorkerPool %s: %w", key, err) + } + return key, nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_actortemplate.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_actortemplate.go new file mode 100644 index 0000000000..8c8586fe58 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_actortemplate.go @@ -0,0 +1,115 @@ +package substrate + +import ( + "context" + "fmt" + "maps" + "strings" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" +) + +func (p *Lifecycle) ensureActorTemplate(ctx context.Context, ah *v1alpha2.AgentHarness, wpKey types.NamespacedName) (types.NamespacedName, error) { + key := types.NamespacedName{Namespace: ah.Namespace, Name: actorTemplateName(ah)} + desired, err := p.buildActorTemplate(ctx, ah, wpKey) + if err != nil { + return types.NamespacedName{}, err + } + + existing := &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + } + if _, err := controllerutil.CreateOrUpdate(ctx, p.Client, existing, func() error { + existing.Labels = mergeLabels(existing.Labels, desired.Labels) + existing.OwnerReferences = desired.OwnerReferences + existing.Spec = desired.Spec + return nil + }); err != nil { + return types.NamespacedName{}, fmt.Errorf("reconcile ActorTemplate %s: %w", key, err) + } + return key, nil +} + +func (p *Lifecycle) buildActorTemplate(ctx context.Context, ah *v1alpha2.AgentHarness, wpKey types.NamespacedName) (*atev1alpha1.ActorTemplate, error) { + key := types.NamespacedName{Namespace: ah.Namespace, Name: actorTemplateName(ah)} + workloadImage := strings.TrimSpace(ah.Spec.Substrate.WorkloadImage) + if workloadImage == "" { + workloadImage = strings.TrimSpace(p.Defaults.DefaultWorkloadImage) + } + if workloadImage == "" { + workloadImage = openclaw.NemoclawSandboxBaseImage + } else { + var err error + workloadImage, err = pinImageRef(workloadImage) + if err != nil { + return nil, err + } + } + startupScript, containerEnv, err := p.buildOpenClawActorStartup(ctx, ah) + if err != nil { + return nil, fmt.Errorf("build openclaw actor startup: %w", err) + } + + desired := &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + Labels: lifecycleLabels(ah), + }, + Spec: atev1alpha1.ActorTemplateSpec{ + PauseImage: p.Defaults.PauseImage, + Runsc: defaultRunscConfig(p.Defaults), + Containers: []atev1alpha1.Container{ + { + Name: defaultOpenClawContainer, + Image: workloadImage, + Ports: []corev1.ContainerPort{{ContainerPort: 80}}, + Command: []string{ + "/bin/sh", + "-c", + startupScript, + }, + Env: containerEnv, + }, + }, + WorkerPoolRef: corev1.ObjectReference{ + Name: wpKey.Name, + Namespace: wpKey.Namespace, + }, + SnapshotsConfig: atev1alpha1.SnapshotsConfig{ + Location: substrateSnapshotsLocation(ah), + }, + }, + } + if err := controllerutil.SetControllerReference(ah, desired, p.Client.Scheme()); err != nil { + return nil, fmt.Errorf("set ActorTemplate owner ref: %w", err) + } + return desired, nil +} + +func mergeLabels(existing, desired map[string]string) map[string]string { + if len(existing) == 0 && len(desired) == 0 { + return nil + } + merged := make(map[string]string, len(existing)+len(desired)) + maps.Copy(merged, existing) + maps.Copy(merged, desired) + return merged +} + +func (p *Lifecycle) actorTemplateReady(ctx context.Context, key types.NamespacedName) (bool, error) { + var tmpl atev1alpha1.ActorTemplate + if err := p.Client.Get(ctx, key, &tmpl); err != nil { + return false, fmt.Errorf("get ActorTemplate %s: %w", key, err) + } + return tmpl.Status.Phase == atev1alpha1.PhaseReady, nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_delete.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_delete.go new file mode 100644 index 0000000000..3b9a51b09f --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_delete.go @@ -0,0 +1,68 @@ +package substrate + +import ( + "context" + "fmt" + "strings" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" +) + +// CleanupGeneratedTemplate removes external Substrate actors that Kubernetes garbage collection cannot see. +// The generated ActorTemplate CR is deleted by owner-reference garbage collection after the +// AgentHarness finalizer is removed. WorkerPools are externally owned and are never deleted here. +func (p *Lifecycle) CleanupGeneratedTemplate(ctx context.Context, ah *v1alpha2.AgentHarness) (bool, error) { + if ah == nil { + return true, nil + } + if p.Client == nil { + return true, nil + } + + tmplKey := types.NamespacedName{Namespace: ah.Namespace, Name: actorTemplateName(ah)} + goldenID, err := p.goldenActorID(ctx, tmplKey) + if err != nil { + return false, err + } + if goldenID == "" { + return true, nil + } + done, err := deleteGoldenActor(ctx, p.AteClient, goldenID) + if err != nil { + return false, fmt.Errorf("delete golden actor %q for ActorTemplate %s: %w", goldenID, tmplKey, err) + } + if !done { + return false, nil + } + + return true, nil +} + +func deleteGoldenActor(ctx context.Context, ateClient *Client, actorID string) (bool, error) { + return deleteActor(ctx, ateClient, actorID) +} + +func (p *Lifecycle) goldenActorID(ctx context.Context, tmplKey types.NamespacedName) (string, error) { + var tmpl atev1alpha1.ActorTemplate + if err := p.Client.Get(ctx, tmplKey, &tmpl); err != nil { + if apierrors.IsNotFound(err) { + return "", nil + } + return "", fmt.Errorf("get ActorTemplate %s for golden actor cleanup: %w", tmplKey, err) + } + return strings.TrimSpace(tmpl.Status.GoldenActorID), nil +} + +// HarnessLabelKey labels substrate lifecycle managed for an AgentHarness. +const HarnessLabelKey = "kagent.dev/agent-harness" + +// HarnessNameFromLabels returns the AgentHarness name from generated lifecycle labels. +func HarnessNameFromLabels(labels map[string]string) string { + if labels == nil { + return "" + } + return strings.TrimSpace(labels[HarnessLabelKey]) +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_delete_test.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_delete_test.go new file mode 100644 index 0000000000..4970d7ad40 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_delete_test.go @@ -0,0 +1,110 @@ +package substrate + +import ( + "context" + "slices" + "testing" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/agent-substrate/substrate/proto/ateapipb" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +type recordingActorClient struct { + deleted []string +} + +func (r *recordingActorClient) GetActor(_ context.Context, in *ateapipb.GetActorRequest, _ ...grpc.CallOption) (*ateapipb.GetActorResponse, error) { + if slices.Contains(r.deleted, in.GetActorId()) { + return nil, status.Error(codes.NotFound, "actor deleted") + } + return &ateapipb.GetActorResponse{ + Actor: &ateapipb.Actor{ + ActorId: in.GetActorId(), + Status: ateapipb.Actor_STATUS_SUSPENDED, + }, + }, nil +} + +func (r *recordingActorClient) DeleteActor(_ context.Context, in *ateapipb.DeleteActorRequest, _ ...grpc.CallOption) (*ateapipb.DeleteActorResponse, error) { + r.deleted = append(r.deleted, in.GetActorId()) + return &ateapipb.DeleteActorResponse{}, nil +} + +func (r *recordingActorClient) CreateActor(context.Context, *ateapipb.CreateActorRequest, ...grpc.CallOption) (*ateapipb.CreateActorResponse, error) { + panic("not used") +} + +func (r *recordingActorClient) SuspendActor(context.Context, *ateapipb.SuspendActorRequest, ...grpc.CallOption) (*ateapipb.SuspendActorResponse, error) { + panic("not used") +} + +func (r *recordingActorClient) ResumeActor(context.Context, *ateapipb.ResumeActorRequest, ...grpc.CallOption) (*ateapipb.ResumeActorResponse, error) { + panic("not used") +} + +func (r *recordingActorClient) ListWorkers(context.Context, *ateapipb.ListWorkersRequest, ...grpc.CallOption) (*ateapipb.ListWorkersResponse, error) { + panic("not used") +} + +func (r *recordingActorClient) ListActors(context.Context, *ateapipb.ListActorsRequest, ...grpc.CallOption) (*ateapipb.ListActorsResponse, error) { + panic("not used") +} + +func (r *recordingActorClient) DebugClear(context.Context, *ateapipb.DebugClearRequest, ...grpc.CallOption) (*ateapipb.DebugClearResponse, error) { + panic("not used") +} + +func TestLifecycleCleanupGeneratedTemplate_DeletesGoldenActor(t *testing.T) { + t.Parallel() + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + utilruntime.Must(atev1alpha1.AddToScheme(scheme)) + + ns := "kagent" + tmpl := &atev1alpha1.ActorTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "peterj-claw", Namespace: ns, Labels: map[string]string{ + HarnessLabelKey: "peterj-claw", + }}, + Status: atev1alpha1.ActorTemplateStatus{ + GoldenActorID: "golden-actor-uuid", + Phase: atev1alpha1.PhaseReady, + }, + } + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{ + Name: "peterj-claw", + Namespace: ns, + }, + } + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tmpl).Build() + rec := &recordingActorClient{} + p := &Lifecycle{Client: kube, AteClient: &Client{ControlClient: rec}} + + var complete bool + var err error + for range 5 { + complete, err = p.CleanupGeneratedTemplate(context.Background(), ah) + require.NoError(t, err) + if complete { + break + } + } + require.True(t, complete, "CleanupGeneratedTemplate should finish within a few reconcile passes") + require.Equal(t, []string{"golden-actor-uuid"}, rec.deleted) + + var got atev1alpha1.ActorTemplate + require.NoError(t, kube.Get(context.Background(), client.ObjectKeyFromObject(tmpl), &got)) +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw.go new file mode 100644 index 0000000000..4bc5608a5b --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw.go @@ -0,0 +1,93 @@ +package substrate + +import ( + "bytes" + "context" + _ "embed" + "encoding/base64" + "fmt" + "strings" + "text/template" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/internal/utils" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openclaw" + corev1 "k8s.io/api/core/v1" +) + +const defaultSubstrateOpenClawGatewayPort = 80 + +//go:embed templates/openclaw_startup.sh.tmpl +var openClawStartupScriptTmplContent string + +var openClawStartupScriptTmpl = template.Must(template.New("openclaw_startup").Parse(openClawStartupScriptTmplContent)) + +type openClawStartupScriptData struct { + OpenClawJSONBase64 string + GatewayPort int +} + +// buildOpenClawActorStartup returns the ateom workload startup script and container env for OpenClaw on Substrate. +// When spec.modelConfigRef is set, openclaw.json includes models/agents/channels like the OpenShell bootstrap path. +func (p *Lifecycle) buildOpenClawActorStartup(ctx context.Context, ah *v1alpha2.AgentHarness) (script string, env []corev1.EnvVar, err error) { + if ah == nil { + return "", nil, fmt.Errorf("AgentHarness is required") + } + if p.Client == nil { + return "", nil, fmt.Errorf("substrate lifecycle kubernetes client is required") + } + + token, err := ResolveGatewayToken(ctx, p.Client, ah) + if err != nil { + return "", nil, fmt.Errorf("resolve gateway token: %w", err) + } + gw := openclaw.SubstrateGatewayBootstrap(token, defaultSubstrateOpenClawGatewayPort, openClawControlUIBasePath(ah)) + + var jsonBytes []byte + var containerEnv []corev1.EnvVar + + ref := strings.TrimSpace(ah.Spec.ModelConfigRef) + if ref != "" { + mcRef, parseErr := utils.ParseRefString(ref, ah.Namespace) + if parseErr != nil { + return "", nil, fmt.Errorf("parse modelConfigRef %q: %w", ref, parseErr) + } + mc := &v1alpha2.ModelConfig{} + if getErr := p.Client.Get(ctx, mcRef, mc); getErr != nil { + return "", nil, fmt.Errorf("get ModelConfig %s: %w", mcRef, getErr) + } + jsonBytes, containerEnv, err = openclaw.BuildSubstrateBootstrapJSON(ctx, p.Client, ah.Namespace, ah, mc, gw) + if err != nil { + return "", nil, fmt.Errorf("build openclaw bootstrap json: %w", err) + } + } else { + jsonBytes, err = openclaw.BuildGatewayOnlyBootstrapJSON(gw) + if err != nil { + return "", nil, fmt.Errorf("build gateway-only openclaw json: %w", err) + } + containerEnv = []corev1.EnvVar{{Name: "HOME", Value: "/root"}} + } + script, err = openClawStartupScript(jsonBytes, gw.Port) + if err != nil { + return "", nil, err + } + return script, containerEnv, nil +} + +func openClawControlUIBasePath(ah *v1alpha2.AgentHarness) string { + if ah == nil { + return "" + } + return "/api/agentharnesses/" + ah.Namespace + "/" + ah.Name + "/gateway" +} + +func openClawStartupScript(jsonBytes []byte, gwPort int) (string, error) { + var buf bytes.Buffer + if err := openClawStartupScriptTmpl.Execute(&buf, openClawStartupScriptData{ + OpenClawJSONBase64: base64.StdEncoding.EncodeToString(jsonBytes), + GatewayPort: gwPort, + }); err != nil { + return "", fmt.Errorf("render openclaw startup script: %w", err) + } + return strings.TrimRight(buf.String(), "\n"), nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw_test.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw_test.go new file mode 100644 index 0000000000..6d8578f3c3 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_openclaw_test.go @@ -0,0 +1,238 @@ +package substrate + +import ( + "context" + "encoding/base64" + "encoding/json" + "strings" + "testing" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestBuildOpenClawActorStartup_WithModelConfig(t *testing.T) { + t.Parallel() + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "kagent" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openai-key", Namespace: ns}, + Data: map[string][]byte{"OPENAI_API_KEY": []byte("sk-test")}, + } + mc := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "default-model-config", Namespace: ns}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + APIKeySecret: "openai-key", + APIKeySecretKey: "OPENAI_API_KEY", + OpenAI: &v1alpha2.OpenAIConfig{}, + }, + } + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "peterj-claw", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + ModelConfigRef: "default-model-config", + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "some-token", + SnapshotsConfig: &v1alpha2.AgentHarnessSubstrateSnapshotsConfig{ + Location: "gs://bucket/prefix/", + }, + }, + }, + } + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret, mc).Build() + p := &Lifecycle{ + Client: kube, + } + + script, env, err := p.buildOpenClawActorStartup(context.Background(), ah) + require.NoError(t, err) + require.Contains(t, script, "base64 -d") + require.Contains(t, script, "openclaw gateway run --port 80") + + var foundKey bool + for _, e := range env { + if e.Name != "OPENAI_API_KEY" { + continue + } + require.NotNil(t, e.ValueFrom) + require.NotNil(t, e.ValueFrom.SecretKeyRef) + require.Equal(t, "openai-key", e.ValueFrom.SecretKeyRef.Name) + require.Equal(t, "OPENAI_API_KEY", e.ValueFrom.SecretKeyRef.Key) + require.Empty(t, e.Value, "API key must not be inlined in ActorTemplate env") + foundKey = true + } + require.True(t, foundKey, "expected OPENAI_API_KEY secretKeyRef in container env") + + // Decode embedded JSON from the base64 line in the startup script. + var payload string + for line := range strings.SplitSeq(script, "\n") { + if !strings.Contains(line, "base64 -d") { + continue + } + start := strings.Index(line, `'`) + 1 + end := strings.LastIndex(line, `'`) + require.Greater(t, end, start) + payload = line[start:end] + break + } + require.NotEmpty(t, payload) + raw, decErr := base64.StdEncoding.DecodeString(payload) + require.NoError(t, decErr) + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + gw := root["gateway"].(map[string]any) + require.Equal(t, "lan", gw["bind"]) + require.Equal(t, float64(80), gw["port"]) + auth := gw["auth"].(map[string]any) + require.Equal(t, "token", auth["mode"]) + require.Equal(t, "some-token", auth["token"]) + controlUI := gw["controlUi"].(map[string]any) + require.Equal(t, "/api/agentharnesses/kagent/peterj-claw/gateway", controlUI["basePath"]) + _, hasModels := root["models"] + require.False(t, hasModels, "substrate bootstrap should omit models unless ModelConfig sets an explicit baseUrl") + require.Contains(t, root, "agents") +} + +func TestBuildOpenClawActorStartup_WithHarnessGatewayToken(t *testing.T) { + t.Parallel() + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "kagent" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openclaw-token", Namespace: ns}, + Data: map[string][]byte{GatewayTokenSecretKey: []byte("secret-token")}, + } + for _, tt := range []struct { + name string + substrate *v1alpha2.AgentHarnessSubstrateSpec + wantToken string + }{ + { + name: "inline token", + substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "inline-token", + }, + wantToken: "inline-token", + }, + { + name: "secret token", + substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayTokenSecretRef: &v1alpha2.TypedLocalReference{Name: "openclaw-token"}, + }, + wantToken: "secret-token", + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret.DeepCopy()).Build() + p := &Lifecycle{ + Client: kube, + } + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "claw", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + Substrate: tt.substrate, + }, + } + + script, _, err := p.buildOpenClawActorStartup(context.Background(), ah) + require.NoError(t, err) + require.Equal(t, tt.wantToken, gatewayTokenFromStartup(t, script)) + }) + } +} + +func TestBuildOpenClawActorStartup_WithExplicitBaseURL(t *testing.T) { + t.Parallel() + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + + ns := "kagent" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "openai-key", Namespace: ns}, + Data: map[string][]byte{"OPENAI_API_KEY": []byte("sk-test")}, + } + mc := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "mc", Namespace: ns}, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o", + Provider: v1alpha2.ModelProviderOpenAI, + APIKeySecret: "openai-key", + APIKeySecretKey: "OPENAI_API_KEY", + OpenAI: &v1alpha2.OpenAIConfig{BaseURL: "https://api.example/v1"}, + }, + } + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Name: "claw", Namespace: ns}, + Spec: v1alpha2.AgentHarnessSpec{ + ModelConfigRef: "mc", + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "some-token", + SnapshotsConfig: &v1alpha2.AgentHarnessSubstrateSnapshotsConfig{ + Location: "gs://bucket/prefix/", + }, + }, + }, + } + + kube := fake.NewClientBuilder().WithScheme(scheme).WithObjects(secret, mc).Build() + p := &Lifecycle{Client: kube, Defaults: LifecycleDefaults{}} + script, _, err := p.buildOpenClawActorStartup(context.Background(), ah) + require.NoError(t, err) + + var payload string + for line := range strings.SplitSeq(script, "\n") { + if strings.Contains(line, "base64 -d") { + start := strings.Index(line, `'`) + 1 + end := strings.LastIndex(line, `'`) + payload = line[start:end] + break + } + } + raw, decErr := base64.StdEncoding.DecodeString(payload) + require.NoError(t, decErr) + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + openai := root["models"].(map[string]any)["providers"].(map[string]any)["openai"].(map[string]any) + require.Equal(t, "https://api.example/v1", openai["baseUrl"]) +} + +func gatewayTokenFromStartup(t *testing.T, script string) string { + t.Helper() + + var payload string + for line := range strings.SplitSeq(script, "\n") { + if strings.Contains(line, "base64 -d") { + start := strings.Index(line, `'`) + 1 + end := strings.LastIndex(line, `'`) + require.Greater(t, end, start) + payload = line[start:end] + break + } + } + require.NotEmpty(t, payload) + raw, decErr := base64.StdEncoding.DecodeString(payload) + require.NoError(t, decErr) + var root map[string]any + require.NoError(t, json.Unmarshal(raw, &root)) + gw := root["gateway"].(map[string]any) + auth := gw["auth"].(map[string]any) + token, _ := auth["token"].(string) + return token +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_shared.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_shared.go new file mode 100644 index 0000000000..24fb02f5d0 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_shared.go @@ -0,0 +1,118 @@ +package substrate + +import ( + "context" + "fmt" + "strings" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + defaultSnapshotsBucket = "ate-snapshots" + defaultOpenClawContainer = "openclaw" +) + +// LifecycleDefaults are cluster-wide defaults for generated ActorTemplate lifecycle. +type LifecycleDefaults struct { + PauseImage string + RunscAMD64URL string + RunscAMD64SHA256 string + RunscARM64URL string + RunscARM64SHA256 string + DefaultWorkloadImage string + DefaultWorkerPool types.NamespacedName +} + +// Lifecycle reconciles the Kubernetes lifecycle that kagent owns for a substrate AgentHarness. +// WorkerPools are externally owned; this helper only resolves the selected WorkerPool. +type Lifecycle struct { + Client client.Client + Defaults LifecycleDefaults + AteClient *Client +} + +// AgentHarnessLifecycle is the substrate lifecycle surface used by the +// AgentHarness controller. +type AgentHarnessLifecycle interface { + EnsureGeneratedTemplate(ctx context.Context, ah *v1alpha2.AgentHarness) (LifecycleState, error) + CleanupGeneratedTemplate(ctx context.Context, ah *v1alpha2.AgentHarness) (bool, error) +} + +var _ AgentHarnessLifecycle = (*Lifecycle)(nil) + +func NewLifecycle(kube client.Client, defaults LifecycleDefaults, ateClient *Client) *Lifecycle { + return &Lifecycle{ + Client: kube, + Defaults: defaults, + AteClient: ateClient, + } +} + +// LifecycleState describes the generated Substrate lifecycle for an AgentHarness. +type LifecycleState struct { + ActorTemplateReady bool +} + +func defaultRunscConfig(d LifecycleDefaults) atev1alpha1.RunscConfig { + return atev1alpha1.RunscConfig{ + AMD64: &atev1alpha1.RunscPlatformConfig{ + URL: d.RunscAMD64URL, + SHA256Hash: d.RunscAMD64SHA256, + }, + ARM64: &atev1alpha1.RunscPlatformConfig{ + URL: d.RunscARM64URL, + SHA256Hash: d.RunscARM64SHA256, + }, + } +} + +func substrateSnapshotsLocation(ah *v1alpha2.AgentHarness) string { + if ah == nil { + return defaultSubstrateSnapshotsLocation("", "") + } + if sub := ah.Spec.Substrate; sub != nil && sub.SnapshotsConfig != nil { + if loc := strings.TrimSpace(sub.SnapshotsConfig.Location); loc != "" { + return loc + } + } + return defaultSubstrateSnapshotsLocation(ah.Namespace, ah.Name) +} + +func defaultSubstrateSnapshotsLocation(namespace, name string) string { + return fmt.Sprintf("gs://%s/%s/%s", defaultSnapshotsBucket, namespace, name) +} + +func lifecycleLabels(ah *v1alpha2.AgentHarness) map[string]string { + return map[string]string{ + "app.kubernetes.io/managed-by": "kagent", + "kagent.dev/agent-harness": ah.Name, + } +} + +func actorTemplateName(ah *v1alpha2.AgentHarness) string { + return truncateDNS1123(ah.Name) +} + +func truncateDNS1123(s string) string { + s = strings.ToLower(strings.ReplaceAll(s, "_", "-")) + if len(s) > 63 { + s = strings.TrimRight(s[:63], "-") + } + return s +} + +// pinImageRef ensures image refs satisfy Substrate ActorTemplate validation (must contain "@"). +func pinImageRef(image string) (string, error) { + image = strings.TrimSpace(image) + if image == "" { + return "", fmt.Errorf("workload image is required") + } + if !strings.Contains(image, "@") { + return "", fmt.Errorf("workload image %q must be pinned with a digest (@sha256:...)", image) + } + return image, nil +} diff --git a/go/core/pkg/sandboxbackend/substrate/lifecycle_test.go b/go/core/pkg/sandboxbackend/substrate/lifecycle_test.go new file mode 100644 index 0000000000..f2c3c0ac9e --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/lifecycle_test.go @@ -0,0 +1,146 @@ +package substrate + +import ( + "context" + "testing" + + atev1alpha1 "github.com/agent-substrate/substrate/api/v1alpha1" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" +) + +func TestSubstrateSnapshotsLocationDefault(t *testing.T) { + t.Parallel() + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{Namespace: "kagent", Name: "claw"}, + Spec: v1alpha2.AgentHarnessSpec{ + Runtime: v1alpha2.AgentHarnessRuntimeSubstrate, + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "test-token", + }, + }, + } + if got := substrateSnapshotsLocation(ah); got != "gs://ate-snapshots/kagent/claw" { + t.Fatalf("got default snapshots location %q", got) + } +} + +func TestResolveWorkerPoolRef(t *testing.T) { + t.Parallel() + + for _, tt := range []struct { + name string + refName string + defaultRef types.NamespacedName + wantRef types.NamespacedName + }{ + { + name: "uses default workerpool", + defaultRef: types.NamespacedName{Namespace: "kagent", Name: "default-wp"}, + wantRef: types.NamespacedName{Namespace: "kagent", Name: "default-wp"}, + }, + { + name: "spec workerpool overrides default", + refName: "custom-wp", + defaultRef: types.NamespacedName{Namespace: "kagent", Name: "default-wp"}, + wantRef: types.NamespacedName{Namespace: "kagent", Name: "custom-wp"}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + utilruntime.Must(atev1alpha1.AddToScheme(scheme)) + + ah := &v1alpha2.AgentHarness{ + TypeMeta: metav1.TypeMeta{APIVersion: v1alpha2.GroupVersion.String(), Kind: "AgentHarness"}, + ObjectMeta: metav1.ObjectMeta{Namespace: "kagent", Name: "claw"}, + Spec: v1alpha2.AgentHarnessSpec{ + Runtime: v1alpha2.AgentHarnessRuntimeSubstrate, + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{}, + }, + } + if tt.refName != "" { + ah.Spec.Substrate.WorkerPoolRef = &v1alpha2.TypedLocalReference{Name: tt.refName} + } + wp := &atev1alpha1.WorkerPool{ + ObjectMeta: metav1.ObjectMeta{Name: tt.wantRef.Name, Namespace: tt.wantRef.Namespace}, + Spec: atev1alpha1.WorkerPoolSpec{ + Replicas: 1, + AteomImage: "registry.example/ateom:default", + }, + } + p := &Lifecycle{ + Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(wp).Build(), + Defaults: LifecycleDefaults{ + DefaultWorkerPool: tt.defaultRef, + }, + } + + key, err := p.resolveWorkerPoolRef(context.Background(), ah) + require.NoError(t, err) + require.Equal(t, tt.wantRef, key) + }) + } +} + +func TestActorTemplateName(t *testing.T) { + t.Parallel() + ah := &v1alpha2.AgentHarness{ObjectMeta: metav1.ObjectMeta{Name: "my-claw"}} + if got := actorTemplateName(ah); got != "my-claw" { + t.Fatalf("got %q", got) + } +} + +func TestEnsureActorTemplateDoesNotUpdateWhenDesiredStateMatches(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + utilruntime.Must(v1alpha2.AddToScheme(scheme)) + utilruntime.Must(atev1alpha1.AddToScheme(scheme)) + + var updateCalls int + kube := fake.NewClientBuilder(). + WithScheme(scheme). + WithInterceptorFuncs(interceptor.Funcs{ + Update: func(ctx context.Context, c ctrlclient.WithWatch, obj ctrlclient.Object, opts ...ctrlclient.UpdateOption) error { + if _, ok := obj.(*atev1alpha1.ActorTemplate); ok { + updateCalls++ + } + return c.Update(ctx, obj, opts...) + }, + }). + Build() + + ah := &v1alpha2.AgentHarness{ + TypeMeta: metav1.TypeMeta{APIVersion: v1alpha2.GroupVersion.String(), Kind: "AgentHarness"}, + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kagent", + Name: "claw", + UID: "00000000-0000-0000-0000-000000000001", + }, + Spec: v1alpha2.AgentHarnessSpec{ + Runtime: v1alpha2.AgentHarnessRuntimeSubstrate, + Substrate: &v1alpha2.AgentHarnessSubstrateSpec{ + GatewayToken: "test-token", + }, + }, + } + lifecycle := &Lifecycle{Client: kube} + wpKey := types.NamespacedName{Namespace: "kagent", Name: "default-wp"} + + _, err := lifecycle.ensureActorTemplate(context.Background(), ah, wpKey) + require.NoError(t, err) + _, err = lifecycle.ensureActorTemplate(context.Background(), ah, wpKey) + require.NoError(t, err) + require.Zero(t, updateCalls, "matching desired ActorTemplate should not be updated") +} diff --git a/go/core/pkg/sandboxbackend/substrate/list.go b/go/core/pkg/sandboxbackend/substrate/list.go new file mode 100644 index 0000000000..976da7b27f --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/list.go @@ -0,0 +1,53 @@ +package substrate + +import ( + "context" + + "github.com/agent-substrate/substrate/proto/ateapipb" +) + +// ListActors returns all actors reflected in ate-api. +func (c *Client) ListActors(ctx context.Context) ([]*ateapipb.Actor, error) { + if c == nil { + return nil, nil + } + ctx, cancel := c.callCtx(ctx) + defer cancel() + resp, err := c.ControlClient.ListActors(ctx, &ateapipb.ListActorsRequest{}) + if err != nil { + return nil, err + } + return resp.GetActors(), nil +} + +// ListWorkers returns all workers reflected in ate-api. +func (c *Client) ListWorkers(ctx context.Context) ([]*ateapipb.Worker, error) { + if c == nil { + return nil, nil + } + ctx, cancel := c.callCtx(ctx) + defer cancel() + resp, err := c.ControlClient.ListWorkers(ctx, &ateapipb.ListWorkersRequest{}) + if err != nil { + return nil, err + } + return resp.GetWorkers(), nil +} + +// ActorStatusLabel returns a stable human-readable actor status. +func ActorStatusLabel(status ateapipb.Actor_Status) string { + switch status { + case ateapipb.Actor_STATUS_RESUMING: + return "Resuming" + case ateapipb.Actor_STATUS_RUNNING: + return "Running" + case ateapipb.Actor_STATUS_SUSPENDING: + return "Suspending" + case ateapipb.Actor_STATUS_SUSPENDED: + return "Suspended" + case ateapipb.Actor_STATUS_UNSPECIFIED: + return "Unknown" + default: + return status.String() + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/openclaw.go b/go/core/pkg/sandboxbackend/substrate/openclaw.go new file mode 100644 index 0000000000..04a559c6a1 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/openclaw.go @@ -0,0 +1,193 @@ +package substrate + +import ( + "context" + "fmt" + "regexp" + "strings" + + "github.com/agent-substrate/substrate/proto/ateapipb" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" +) + +const ( + defaultActorHostSuffix = "actors.resources.substrate.ate.dev" + defaultSubstrateGWPort = int32(80) + actorIDPrefix = "ahr" +) + +var dns1123Label = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`) + +// ClawBackend implements AsyncBackend for OpenClaw/NemoClaw on Agent Substrate. +type ClawBackend struct { + client *Client + backend v1alpha2.AgentHarnessBackendType + recorder record.EventRecorder +} + +var _ sandboxbackend.AsyncBackend = (*ClawBackend)(nil) + +// NewOpenClawBackend returns a substrate backend for openclaw/nemoclaw harness types. +func NewOpenClawBackend(client *Client, backend v1alpha2.AgentHarnessBackendType, recorder record.EventRecorder) *ClawBackend { + return &ClawBackend{ + client: client, + backend: backend, + recorder: recorder, + } +} + +func (b *ClawBackend) Name() v1alpha2.AgentHarnessBackendType { + return b.backend +} + +func (b *ClawBackend) EnsureAgentHarness(ctx context.Context, ah *v1alpha2.AgentHarness) (sandboxbackend.EnsureResult, error) { + if ah == nil { + return sandboxbackend.EnsureResult{}, fmt.Errorf("AgentHarness is required") + } + if err := validateSubstrateSpec(ah); err != nil { + return sandboxbackend.EnsureResult{}, err + } + + actorID := ActorID(ah) + tmplNS, tmplName := generatedActorTemplateKey(ah) + + actor, err := b.client.GetActor(ctx, actorID) + if err != nil { + if status.Code(err) != codes.NotFound { + return sandboxbackend.EnsureResult{}, fmt.Errorf("substrate GetActor %q: %w", actorID, err) + } + actor, err = b.client.CreateActor(ctx, actorID, tmplNS, tmplName) + if err != nil { + return sandboxbackend.EnsureResult{}, fmt.Errorf("substrate CreateActor %q: %w", actorID, err) + } + } + + switch actor.GetStatus() { + case ateapipb.Actor_STATUS_RUNNING, ateapipb.Actor_STATUS_RESUMING: + // already active or waking + case ateapipb.Actor_STATUS_SUSPENDED, ateapipb.Actor_STATUS_UNSPECIFIED: + actor, err = b.client.ResumeActor(ctx, actorID) + if err != nil { + return sandboxbackend.EnsureResult{}, fmt.Errorf("substrate ResumeActor %q: %w", actorID, err) + } + default: + // suspending — wait for next reconcile + } + + endpoint := substrateConnectionEndpoint(ah.Namespace, ah.Name, actor) + + return sandboxbackend.EnsureResult{ + Handle: sandboxbackend.Handle{ID: actorID}, + Endpoint: endpoint, + }, nil +} + +func (b *ClawBackend) GetStatus(ctx context.Context, h sandboxbackend.Handle) (metav1.ConditionStatus, string, string) { + if h.ID == "" { + return metav1.ConditionUnknown, "ActorHandleMissing", "no substrate actor id recorded yet" + } + actor, err := b.client.GetActor(ctx, h.ID) + if err != nil { + if status.Code(err) == codes.NotFound { + return metav1.ConditionUnknown, "ActorNotFound", fmt.Sprintf("substrate actor %q not found", h.ID) + } + return metav1.ConditionUnknown, "ActorGetFailed", err.Error() + } + return actorStatusToCondition(actor) +} + +func (b *ClawBackend) DeleteAgentHarness(ctx context.Context, h sandboxbackend.Handle) (bool, error) { + if h.ID == "" { + return true, nil + } + done, err := deleteActor(ctx, b.client, h.ID) + if err != nil { + return false, fmt.Errorf("substrate delete actor %q: %w", h.ID, err) + } + return done, nil +} + +func (b *ClawBackend) OnAgentHarnessReady(_ context.Context, _ *v1alpha2.AgentHarness, _ sandboxbackend.Handle) error { + // OpenClaw config is baked into the ActorTemplate golden snapshot when the + // generated ActorTemplate is reconciled. + return nil +} + +// ActorID returns a stable DNS-1123 actor id for this harness. +func ActorID(ah *v1alpha2.AgentHarness) string { + raw := fmt.Sprintf("%s-%s-%s", actorIDPrefix, ah.Namespace, ah.Name) + raw = strings.ToLower(raw) + raw = strings.ReplaceAll(raw, "_", "-") + if len(raw) > 63 { + raw = raw[:63] + raw = strings.TrimRight(raw, "-") + } + if !dns1123Label.MatchString(raw) { + // fallback: hash-like trim + raw = fmt.Sprintf("%s-%s", actorIDPrefix, ah.UID) + if len(raw) > 63 { + raw = raw[:63] + } + } + return raw +} + +// ActorHost returns the atenet router Host header value for the actor. +func ActorHost(actorID string, suffix string) string { + if suffix == "" { + suffix = defaultActorHostSuffix + } + return actorID + "." + suffix +} + +func generatedActorTemplateKey(ah *v1alpha2.AgentHarness) (string, string) { + return ah.Namespace, actorTemplateName(ah) +} + +func substrateConnectionEndpoint(namespace, name string, actor *ateapipb.Actor) string { + gw := fmt.Sprintf("/api/agentharnesses/%s/%s/gateway/", namespace, name) + if actor == nil { + return "kagent gateway: " + gw + } + if actorID := strings.TrimSpace(actor.GetActorId()); actorID != "" { + return fmt.Sprintf("atenet-router Host %s (UI via kagent %s)", ActorHost(actorID, ""), gw) + } + return fmt.Sprintf("kagent gateway: %s (actor status %s)", gw, actor.GetStatus()) +} + +func validateSubstrateSpec(ah *v1alpha2.AgentHarness) error { + runtime := ah.Spec.Runtime + if runtime == "" { + runtime = v1alpha2.AgentHarnessRuntimeOpenshell + } + if runtime != v1alpha2.AgentHarnessRuntimeSubstrate { + return fmt.Errorf("substrate backend called for runtime %q", runtime) + } + return nil +} + +func actorStatusToCondition(actor *ateapipb.Actor) (metav1.ConditionStatus, string, string) { + if actor == nil { + return metav1.ConditionUnknown, "ActorMissing", "empty actor response" + } + switch actor.GetStatus() { + case ateapipb.Actor_STATUS_RUNNING: + if ip := actor.GetAteomPodIp(); ip != "" { + return metav1.ConditionTrue, "ActorRunning", fmt.Sprintf("actor running on %s", ip) + } + return metav1.ConditionTrue, "ActorRunning", "actor is running" + case ateapipb.Actor_STATUS_RESUMING: + return metav1.ConditionFalse, "ActorResuming", "actor is resuming" + case ateapipb.Actor_STATUS_SUSPENDING: + return metav1.ConditionFalse, "ActorSuspending", "actor is suspending" + case ateapipb.Actor_STATUS_SUSPENDED: + return metav1.ConditionFalse, "ActorSuspended", "actor is suspended" + default: + return metav1.ConditionUnknown, "ActorStatusUnknown", actor.GetStatus().String() + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/openclaw_test.go b/go/core/pkg/sandboxbackend/substrate/openclaw_test.go new file mode 100644 index 0000000000..5e5b752f5f --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/openclaw_test.go @@ -0,0 +1,47 @@ +package substrate + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" +) + +func TestActorID(t *testing.T) { + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kagent", + Name: "my-claw", + UID: "00000000-0000-0000-0000-000000000001", + }, + } + id := ActorID(ah) + if !dns1123Label.MatchString(id) { + t.Fatalf("ActorID %q is not DNS-1123", id) + } + if id == "" { + t.Fatal("expected non-empty actor id") + } +} + +func TestActorHost(t *testing.T) { + got := ActorHost("ahr-kagent-my-claw", "") + if got != "ahr-kagent-my-claw.actors.resources.substrate.ate.dev" { + t.Fatalf("ActorHost = %q", got) + } +} + +func TestGeneratedActorTemplateKey(t *testing.T) { + t.Parallel() + ah := &v1alpha2.AgentHarness{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kagent", + Name: "peterj-claw", + }, + } + ns, name := generatedActorTemplateKey(ah) + if ns != "kagent" || name != "peterj-claw" { + t.Fatalf("got %s/%s, want kagent/peterj-claw", ns, name) + } +} diff --git a/go/core/pkg/sandboxbackend/substrate/pin_image_test.go b/go/core/pkg/sandboxbackend/substrate/pin_image_test.go new file mode 100644 index 0000000000..2580dc97f5 --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/pin_image_test.go @@ -0,0 +1,27 @@ +package substrate + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPinImageRef(t *testing.T) { + t.Run("accepts digest pin", func(t *testing.T) { + ref := "ghcr.io/kagent-dev/nemoclaw/sandbox-base@sha256:abc" + got, err := pinImageRef(ref) + require.NoError(t, err) + require.Equal(t, ref, got) + }) + + t.Run("rejects tag", func(t *testing.T) { + _, err := pinImageRef("ghcr.io/kagent-dev/nemoclaw/sandbox-base:2026.5.4") + require.Error(t, err) + require.Contains(t, err.Error(), "must be pinned with a digest") + }) + + t.Run("rejects empty", func(t *testing.T) { + _, err := pinImageRef(" ") + require.Error(t, err) + }) +} diff --git a/go/core/pkg/sandboxbackend/substrate/templates/openclaw_startup.sh.tmpl b/go/core/pkg/sandboxbackend/substrate/templates/openclaw_startup.sh.tmpl new file mode 100644 index 0000000000..a082584ddf --- /dev/null +++ b/go/core/pkg/sandboxbackend/substrate/templates/openclaw_startup.sh.tmpl @@ -0,0 +1,4 @@ +set -e +mkdir -p "${HOME}/.openclaw" +echo '{{.OpenClawJSONBase64}}' | base64 -d > "${HOME}/.openclaw/openclaw.json" +openclaw gateway run --port {{.GatewayPort}} --allow-unconfigured diff --git a/go/go.mod b/go/go.mod index 94f2dd970e..1bd839cccf 100644 --- a/go/go.mod +++ b/go/go.mod @@ -61,10 +61,10 @@ require ( ) require ( + github.com/agent-substrate/substrate v0.0.0 github.com/aws/aws-sdk-go-v2 v1.41.7 github.com/aws/aws-sdk-go-v2/service/bedrockruntime v1.50.6 - github.com/golang/protobuf v1.5.4 - github.com/google/go-containerregistry v0.21.2 + github.com/google/go-containerregistry v0.21.5 github.com/google/jsonschema-go v0.4.3 github.com/jackc/pgx/v5 v5.9.2 github.com/ollama/ollama v0.24.0 @@ -86,7 +86,7 @@ require ( cel.dev/expr v0.25.1 // indirect charm.land/lipgloss/v2 v2.0.3 // indirect cloud.google.com/go v0.123.0 // indirect - cloud.google.com/go/auth v0.18.2 // indirect + cloud.google.com/go/auth v0.19.0 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect codeberg.org/chavacava/garif v0.2.0 // indirect @@ -178,8 +178,7 @@ require ( github.com/denis-tingaikin/go-header v0.5.0 // indirect github.com/distribution/reference v0.6.0 // indirect github.com/dlclark/regexp2 v1.12.0 // indirect - github.com/docker/cli v29.2.1+incompatible // indirect - github.com/docker/distribution v2.8.3+incompatible // indirect + github.com/docker/cli v29.4.0+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect github.com/docker/go-connections v0.6.0 // indirect github.com/docker/go-units v0.5.0 // indirect @@ -200,20 +199,20 @@ require ( github.com/go-critic/go-critic v0.14.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect - github.com/go-openapi/jsonpointer v0.22.1 // indirect - github.com/go-openapi/jsonreference v0.21.2 // indirect - github.com/go-openapi/swag v0.25.1 // indirect - github.com/go-openapi/swag/cmdutils v0.25.1 // indirect - github.com/go-openapi/swag/conv v0.25.1 // indirect - github.com/go-openapi/swag/fileutils v0.25.1 // indirect - github.com/go-openapi/swag/jsonname v0.25.1 // indirect - github.com/go-openapi/swag/jsonutils v0.25.1 // indirect - github.com/go-openapi/swag/loading v0.25.1 // indirect - github.com/go-openapi/swag/mangling v0.25.1 // indirect - github.com/go-openapi/swag/netutils v0.25.1 // indirect - github.com/go-openapi/swag/stringutils v0.25.1 // indirect - github.com/go-openapi/swag/typeutils v0.25.1 // indirect - github.com/go-openapi/swag/yamlutils v0.25.1 // indirect + github.com/go-openapi/jsonpointer v0.22.4 // indirect + github.com/go-openapi/jsonreference v0.21.4 // indirect + github.com/go-openapi/swag v0.25.4 // indirect + github.com/go-openapi/swag/cmdutils v0.25.4 // indirect + github.com/go-openapi/swag/conv v0.25.4 // indirect + github.com/go-openapi/swag/fileutils v0.25.4 // indirect + github.com/go-openapi/swag/jsonname v0.25.4 // indirect + github.com/go-openapi/swag/jsonutils v0.25.4 // indirect + github.com/go-openapi/swag/loading v0.25.4 // indirect + github.com/go-openapi/swag/mangling v0.25.4 // indirect + github.com/go-openapi/swag/netutils v0.25.4 // indirect + github.com/go-openapi/swag/stringutils v0.25.4 // indirect + github.com/go-openapi/swag/typeutils v0.25.4 // indirect + github.com/go-openapi/swag/yamlutils v0.25.4 // indirect github.com/go-toolsmith/astcast v1.1.0 // indirect github.com/go-toolsmith/astcopy v1.1.0 // indirect github.com/go-toolsmith/astequal v1.2.0 // indirect @@ -245,7 +244,7 @@ require ( github.com/google/s2a-go v0.1.9 // indirect github.com/google/safehtml v0.1.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect - github.com/googleapis/gax-go/v2 v2.18.0 // indirect + github.com/googleapis/gax-go/v2 v2.21.0 // indirect github.com/gordonklaus/ineffassign v0.2.0 // indirect github.com/gostaticanalysis/analysisutil v0.7.1 // indirect github.com/gostaticanalysis/comment v1.5.0 // indirect @@ -397,7 +396,7 @@ require ( go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/bridges/prometheus v0.68.0 // indirect go.opentelemetry.io/contrib/detectors/gcp v1.42.0 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect @@ -423,7 +422,7 @@ require ( golang.org/x/time v0.15.0 // indirect golang.org/x/tools v0.45.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect - google.golang.org/api v0.272.0 // indirect + google.golang.org/api v0.274.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260406210006-6f92a3bedf2d // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect diff --git a/go/go.sum b/go/go.sum index 03a15884dc..753a6fe935 100644 --- a/go/go.sum +++ b/go/go.sum @@ -8,8 +8,8 @@ charm.land/lipgloss/v2 v2.0.3 h1:yM2zJ4Cf5Y51b7RHIwioil4ApI/aypFXXVHSwlM6RzU= charm.land/lipgloss/v2 v2.0.3/go.mod h1:7myLU9iG/3xluAWzpY/fSxYYHCgoKTie7laxk6ATwXA= cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= -cloud.google.com/go/auth v0.18.2 h1:+Nbt5Ev0xEqxlNjd6c+yYUeosQ5TtEUaNcN/3FozlaM= -cloud.google.com/go/auth v0.18.2/go.mod h1:xD+oY7gcahcu7G2SG2DsBerfFxgPAJz17zz2joOFF3M= +cloud.google.com/go/auth v0.19.0 h1:DGYwtbcsGsT1ywuxsIoWi1u/vlks0moIblQHgSDgQkQ= +cloud.google.com/go/auth v0.19.0/go.mod h1:2Aph7BT2KnaSFOM0JDPyiYgNh6PL9vGMiP8CUIXZ+IY= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= @@ -70,6 +70,8 @@ github.com/abiosoft/ishell/v2 v2.0.2 h1:5qVfGiQISaYM8TkbBl7RFO6MddABoXpATrsFbVI+ github.com/abiosoft/ishell/v2 v2.0.2/go.mod h1:E4oTCXfo6QjoCart0QYa5m9w4S+deXs/P/9jA77A9Bs= github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db h1:CjPUSXOiYptLbTdr1RceuZgSFDQ7U15ITERUGrUORx8= github.com/abiosoft/readline v0.0.0-20180607040430-155bce2042db/go.mod h1:rB3B4rKii8V21ydCbIzH5hZiCQE7f5E9SzUb/ZZx530= +github.com/agent-substrate/substrate v0.0.0 h1:XEX4QAjzaIcv4amBqBvPE/f40WV5WHRWo7u04xvqv/g= +github.com/agent-substrate/substrate v0.0.0/go.mod h1:8Z4SJqPWDMPBa76JgIdpiX0jTY1JXcfLTXEAtkUv7go= github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= github.com/alecthomas/chroma/v2 v2.24.1 h1:m5ffpfZbIb++k8AqFEKy9uVgY12xIQtBsQlc6DfZJQM= @@ -245,12 +247,10 @@ github.com/dlclark/regexp2 v1.12.0 h1:0j4c5qQmnC6XOWNjP3PIXURXN2gWx76rd3KvgdPkCz github.com/dlclark/regexp2 v1.12.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/cli v29.2.1+incompatible h1:n3Jt0QVCN65eiVBoUTZQM9mcQICCJt3akW4pKAbKdJg= -github.com/docker/cli v29.2.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= -github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= -github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= -github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/cli v29.4.0+incompatible h1:+IjXULMetlvWJiuSI0Nbor36lcJ5BTcVpUmB21KBoVM= +github.com/docker/cli v29.4.0+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI= +github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= @@ -308,36 +308,40 @@ github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk= -github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM= -github.com/go-openapi/jsonreference v0.21.2 h1:Wxjda4M/BBQllegefXrY/9aq1fxBA8sI5M/lFU6tSWU= -github.com/go-openapi/jsonreference v0.21.2/go.mod h1:pp3PEjIsJ9CZDGCNOyXIQxsNuroxm8FAJ/+quA0yKzQ= -github.com/go-openapi/swag v0.25.1 h1:6uwVsx+/OuvFVPqfQmOOPsqTcm5/GkBhNwLqIR916n8= -github.com/go-openapi/swag v0.25.1/go.mod h1:bzONdGlT0fkStgGPd3bhZf1MnuPkf2YAys6h+jZipOo= -github.com/go-openapi/swag/cmdutils v0.25.1 h1:nDke3nAFDArAa631aitksFGj2omusks88GF1VwdYqPY= -github.com/go-openapi/swag/cmdutils v0.25.1/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= -github.com/go-openapi/swag/conv v0.25.1 h1:+9o8YUg6QuqqBM5X6rYL/p1dpWeZRhoIt9x7CCP+he0= -github.com/go-openapi/swag/conv v0.25.1/go.mod h1:Z1mFEGPfyIKPu0806khI3zF+/EUXde+fdeksUl2NiDs= -github.com/go-openapi/swag/fileutils v0.25.1 h1:rSRXapjQequt7kqalKXdcpIegIShhTPXx7yw0kek2uU= -github.com/go-openapi/swag/fileutils v0.25.1/go.mod h1:+NXtt5xNZZqmpIpjqcujqojGFek9/w55b3ecmOdtg8M= -github.com/go-openapi/swag/jsonname v0.25.1 h1:Sgx+qbwa4ej6AomWC6pEfXrA6uP2RkaNjA9BR8a1RJU= -github.com/go-openapi/swag/jsonname v0.25.1/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo= -github.com/go-openapi/swag/jsonutils v0.25.1 h1:AihLHaD0brrkJoMqEZOBNzTLnk81Kg9cWr+SPtxtgl8= -github.com/go-openapi/swag/jsonutils v0.25.1/go.mod h1:JpEkAjxQXpiaHmRO04N1zE4qbUEg3b7Udll7AMGTNOo= -github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1 h1:DSQGcdB6G0N9c/KhtpYc71PzzGEIc/fZ1no35x4/XBY= -github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1/go.mod h1:kjmweouyPwRUEYMSrbAidoLMGeJ5p6zdHi9BgZiqmsg= -github.com/go-openapi/swag/loading v0.25.1 h1:6OruqzjWoJyanZOim58iG2vj934TysYVptyaoXS24kw= -github.com/go-openapi/swag/loading v0.25.1/go.mod h1:xoIe2EG32NOYYbqxvXgPzne989bWvSNoWoyQVWEZicc= -github.com/go-openapi/swag/mangling v0.25.1 h1:XzILnLzhZPZNtmxKaz/2xIGPQsBsvmCjrJOWGNz/ync= -github.com/go-openapi/swag/mangling v0.25.1/go.mod h1:CdiMQ6pnfAgyQGSOIYnZkXvqhnnwOn997uXZMAd/7mQ= -github.com/go-openapi/swag/netutils v0.25.1 h1:2wFLYahe40tDUHfKT1GRC4rfa5T1B4GWZ+msEFA4Fl4= -github.com/go-openapi/swag/netutils v0.25.1/go.mod h1:CAkkvqnUJX8NV96tNhEQvKz8SQo2KF0f7LleiJwIeRE= -github.com/go-openapi/swag/stringutils v0.25.1 h1:Xasqgjvk30eUe8VKdmyzKtjkVjeiXx1Iz0zDfMNpPbw= -github.com/go-openapi/swag/stringutils v0.25.1/go.mod h1:JLdSAq5169HaiDUbTvArA2yQxmgn4D6h4A+4HqVvAYg= -github.com/go-openapi/swag/typeutils v0.25.1 h1:rD/9HsEQieewNt6/k+JBwkxuAHktFtH3I3ysiFZqukA= -github.com/go-openapi/swag/typeutils v0.25.1/go.mod h1:9McMC/oCdS4BKwk2shEB7x17P6HmMmA6dQRtAkSnNb8= -github.com/go-openapi/swag/yamlutils v0.25.1 h1:mry5ez8joJwzvMbaTGLhw8pXUnhDK91oSJLDPF1bmGk= -github.com/go-openapi/swag/yamlutils v0.25.1/go.mod h1:cm9ywbzncy3y6uPm/97ysW8+wZ09qsks+9RS8fLWKqg= +github.com/go-openapi/jsonpointer v0.22.4 h1:dZtK82WlNpVLDW2jlA1YCiVJFVqkED1MegOUy9kR5T4= +github.com/go-openapi/jsonpointer v0.22.4/go.mod h1:elX9+UgznpFhgBuaMQ7iu4lvvX1nvNsesQ3oxmYTw80= +github.com/go-openapi/jsonreference v0.21.4 h1:24qaE2y9bx/q3uRK/qN+TDwbok1NhbSmGjjySRCHtC8= +github.com/go-openapi/jsonreference v0.21.4/go.mod h1:rIENPTjDbLpzQmQWCj5kKj3ZlmEh+EFVbz3RTUh30/4= +github.com/go-openapi/swag v0.25.4 h1:OyUPUFYDPDBMkqyxOTkqDYFnrhuhi9NR6QVUvIochMU= +github.com/go-openapi/swag v0.25.4/go.mod h1:zNfJ9WZABGHCFg2RnY0S4IOkAcVTzJ6z2Bi+Q4i6qFQ= +github.com/go-openapi/swag/cmdutils v0.25.4 h1:8rYhB5n6WawR192/BfUu2iVlxqVR9aRgGJP6WaBoW+4= +github.com/go-openapi/swag/cmdutils v0.25.4/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= +github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4= +github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU= +github.com/go-openapi/swag/fileutils v0.25.4 h1:2oI0XNW5y6UWZTC7vAxC8hmsK/tOkWXHJQH4lKjqw+Y= +github.com/go-openapi/swag/fileutils v0.25.4/go.mod h1:cdOT/PKbwcysVQ9Tpr0q20lQKH7MGhOEb6EwmHOirUk= +github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI= +github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag= +github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA= +github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM= +github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s= +github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE= +github.com/go-openapi/swag/mangling v0.25.4 h1:2b9kBJk9JvPgxr36V23FxJLdwBrpijI26Bx5JH4Hp48= +github.com/go-openapi/swag/mangling v0.25.4/go.mod h1:6dxwu6QyORHpIIApsdZgb6wBk/DPU15MdyYj/ikn0Hg= +github.com/go-openapi/swag/netutils v0.25.4 h1:Gqe6K71bGRb3ZQLusdI8p/y1KLgV4M/k+/HzVSqT8H0= +github.com/go-openapi/swag/netutils v0.25.4/go.mod h1:m2W8dtdaoX7oj9rEttLyTeEFFEBvnAx9qHd5nJEBzYg= +github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8= +github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0= +github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw= +github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE= +github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw= +github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg= +github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls= +github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= github.com/go-pg/pg/v10 v10.11.0 h1:CMKJqLgTrfpE/aOVeLdybezR2om071Vh38OLZjsyMI0= github.com/go-pg/pg/v10 v10.11.0/go.mod h1:4BpHRoxE61y4Onpof3x1a2SQvi9c+q1dJnrNdMjsroA= github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= @@ -377,6 +381,8 @@ github.com/godoc-lint/godoc-lint v0.11.2 h1:Bp0FkJWoSdNsBikdNgIcgtaoo+xz6I/Y9s5W github.com/godoc-lint/godoc-lint v0.11.2/go.mod h1:iVpGdL1JCikNH2gGeAn3Hh+AgN5Gx/I/cxV+91L41jo= github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw= github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA= @@ -417,8 +423,8 @@ github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/go-containerregistry v0.21.2 h1:vYaMU4nU55JJGFC9JR/s8NZcTjbE9DBBbvusTW9NeS0= -github.com/google/go-containerregistry v0.21.2/go.mod h1:ctO5aCaewH4AK1AumSF5DPW+0+R+d2FmylMJdp5G7p0= +github.com/google/go-containerregistry v0.21.5 h1:KTJG9Pn/jC0VdZR6ctV3/jcN+q6/Iqlx0sTVz3ywZlM= +github.com/google/go-containerregistry v0.21.5/go.mod h1:ySvMuiWg+dOsRW0Hw8GYwfMwBlNRTmpYBFJPlkco5zU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -434,8 +440,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8= github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= -github.com/googleapis/gax-go/v2 v2.18.0 h1:jxP5Uuo3bxm3M6gGtV94P4lliVetoCB4Wk2x8QA86LI= -github.com/googleapis/gax-go/v2 v2.18.0/go.mod h1:uSzZN4a356eRG985CzJ3WfbFSpqkLTjsnhWGJR6EwrE= +github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl/wMbiI= +github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4= github.com/gordonklaus/ineffassign v0.2.0 h1:Uths4KnmwxNJNzq87fwQQDDnbNb7De00VOk9Nu0TySs= github.com/gordonklaus/ineffassign v0.2.0/go.mod h1:TIpymnagPSexySzs7F9FnO1XFTy8IT3a59vmZp5Y9Lw= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= @@ -879,8 +885,8 @@ go.opentelemetry.io/contrib/detectors/gcp v1.42.0 h1:kpt2PEJuOuqYkPcktfJqWWDjTEd go.opentelemetry.io/contrib/detectors/gcp v1.42.0/go.mod h1:W9zQ439utxymRrXsUOzZbFX4JhLxXU4+ZnCt8GG7yA8= go.opentelemetry.io/contrib/exporters/autoexport v0.68.0 h1:0D3GFvELGIwQGfC6agLsbrEYSGWZTRTxIXxcQUqrOuk= go.opentelemetry.io/contrib/exporters/autoexport v0.68.0/go.mod h1:DM2NV7Zb8CcGeVPt6glouY0FAiwZQ/iqgcWExhgWeN8= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0 h1:0Qx7VGBacMm9ZENQ7TnNObTYI4ShC+lHI16seduaxZo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.68.0/go.mod h1:Sje3i3MjSPKTSPvVWCaL8ugBzJwik3u4smCjUeuupqg= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0 h1:CqXxU8VOmDefoh0+ztfGaymYbhdB/tT3zs79QaZTNGY= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.68.0/go.mod h1:BuhAPThV8PBHBvg8ZzZ/Ok3idOdhWIodywz2xEcRbJo= go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= @@ -1037,10 +1043,12 @@ gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/adk v1.2.0 h1:MfQD1/GqPfIsFNBcozNykkjdqNIdCrPH/SNqKPZF/yM= google.golang.org/adk v1.2.0/go.mod h1:6QY5jQI7awU4WYtJqvyIkJQheCvqsGWweU6BX63USEc= -google.golang.org/api v0.272.0 h1:eLUQZGnAS3OHn31URRf9sAmRk3w2JjMx37d2k8AjJmA= -google.golang.org/api v0.272.0/go.mod h1:wKjowi5LNJc5qarNvDCvNQBn3rVK8nSy6jg2SwRwzIA= +google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA= +google.golang.org/api v0.274.0/go.mod h1:JbAt7mF+XVmWu6xNP8/+CTiGH30ofmCmk9nM8d8fHew= google.golang.org/genai v1.57.0 h1:qTyG2ynz5dQy2jF4CvZdLHHVslhR0heMue+zM1a4GNM= google.golang.org/genai v1.57.0/go.mod h1:A3kkl0nyBjyFlNjgxIwKq70julKbIxpSxqKO5gw/gmk= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 h1:XzmzkmB14QhVhgnawEVsOn6OFsnpyxNPRY9QV01dNB0= +google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:L43LFes82YgSonw6iTXTxXUX1OlULt4AQtkik4ULL/I= google.golang.org/genproto/googleapis/api v0.0.0-20260406210006-6f92a3bedf2d h1:/aDRtSZJjyLQzm75d+a1wOJaqyKBMvIAfeQmoa3ORiI= google.golang.org/genproto/googleapis/api v0.0.0-20260406210006-6f92a3bedf2d/go.mod h1:etfGUgejTiadZAUaEP14NP97xi1RGeawqkjDARA/UOs= google.golang.org/genproto/googleapis/rpc v0.0.0-20260406210006-6f92a3bedf2d h1:wT2n40TBqFY6wiwazVK9/iTWbsQrgk5ZfCSVFLO9LQA= diff --git a/helm/kagent-crds/templates/kagent.dev_agentharnesses.yaml b/helm/kagent-crds/templates/kagent.dev_agentharnesses.yaml index 308d7ba0f2..9c4c1ee2a6 100644 --- a/helm/kagent-crds/templates/kagent.dev_agentharnesses.yaml +++ b/helm/kagent-crds/templates/kagent.dev_agentharnesses.yaml @@ -19,6 +19,9 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: + - jsonPath: .spec.runtime + name: Runtime + type: string - jsonPath: .spec.backend name: Backend type: string @@ -511,6 +514,75 @@ spec: type: string type: array type: object + runtime: + default: openshell + description: Runtime selects the harness provisioning stack. Defaults + to openshell when unset. + enum: + - openshell + - substrate + type: string + substrate: + description: Substrate is required when runtime is substrate. + properties: + gatewayToken: + description: |- + GatewayToken is the OpenClaw gateway Bearer token for this harness. + Prefer gatewayTokenSecretRef for production secrets. + minLength: 1 + type: string + gatewayTokenSecretRef: + description: |- + GatewayTokenSecretRef references a Secret key holding the OpenClaw gateway Bearer token. + The Secret must contain a "token" key. + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + required: + - name + type: object + snapshotsConfig: + description: |- + SnapshotsConfig configures actor memory snapshots. Defaults to + gs://ate-snapshots// when unset. + properties: + location: + description: |- + Location is the GCS URI prefix for golden and incremental snapshots. + Example: gs://ate-snapshots/kagent/my-namespace/my-harness/ + pattern: ^gs:// + type: string + required: + - location + type: object + workerPoolRef: + description: |- + WorkerPoolRef references an existing ate.dev WorkerPool in the harness namespace. + When unset, the controller uses its configured default WorkerPool. + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + required: + - name + type: object + workloadImage: + description: WorkloadImage overrides the default nemoclaw/openclaw + sandbox image in the ActorTemplate. + type: string + type: object + x-kubernetes-validations: + - message: Exactly one of gatewayToken or gatewayTokenSecretRef must + be specified + rule: (has(self.gatewayToken) && !has(self.gatewayTokenSecretRef)) + || (!has(self.gatewayToken) && has(self.gatewayTokenSecretRef)) required: - backend type: object @@ -520,6 +592,10 @@ spec: || (has(c.slack) && ((self.backend == ''hermes'' && has(c.slack.hermes) && !has(c.slack.openclaw)) || ((self.backend == ''openclaw'' || self.backend == ''nemoclaw'') && has(c.slack.openclaw) && !has(c.slack.hermes)))))' + - message: spec.substrate may only be set when runtime is substrate + rule: '!has(self.substrate) || self.runtime == ''substrate''' + - message: spec.substrate is required when runtime is substrate + rule: self.runtime != 'substrate' || has(self.substrate) status: description: AgentHarnessStatus is the observed state of an AgentHarness. properties: diff --git a/helm/kagent/templates/_helpers.tpl b/helm/kagent/templates/_helpers.tpl index 22c358bce3..761d225696 100644 --- a/helm/kagent/templates/_helpers.tpl +++ b/helm/kagent/templates/_helpers.tpl @@ -50,6 +50,17 @@ Allows overriding it for multi-namespace deployments in combined charts. {{- default .Release.Namespace .Values.namespaceOverride | trunc 63 | trimSuffix "-" -}} {{- end }} +{{/* +Namespaces where Substrate ate-api-server needs read access to Secrets and ConfigMaps +referenced by generated ActorTemplates (install namespace plus rbac.namespaces). +*/}} +{{- define "kagent.substrate.envSourceNamespaces" -}} +{{- $installNs := include "kagent.namespace" . -}} +{{- $extra := .Values.rbac.namespaces | default list -}} +{{- $all := append $extra $installNs | uniq | sortAlpha -}} +{{- join "," $all -}} +{{- end }} + {{/* Watch namespaces - transforms list of namespaces cached by the controller into comma-separated string. Precedence: controller.watchNamespaces (explicit override) > rbac.namespaces > empty (watch all). diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 9d85f1066e..6ef3af61e0 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -87,6 +87,42 @@ spec: {{- with .Values.controller.env }} {{- toYaml . | nindent 12 }} {{- end }} + {{- if and .Values.controller.substrate .Values.controller.substrate.enabled }} + - name: SUBSTRATE_ATE_API_ENDPOINT + value: {{ .Values.controller.substrate.ateApiEndpoint | quote }} + {{- with .Values.controller.substrate.atenetRouterURL }} + - name: SUBSTRATE_ATENET_ROUTER_URL + value: {{ . | quote }} + {{- end }} + {{- if .Values.controller.substrate.ateApiInsecure }} + - name: SUBSTRATE_ATE_API_INSECURE + value: "true" + {{- end }} + - name: SUBSTRATE_DEFAULT_WORKERPOOL_NAMESPACE + value: {{ .Values.controller.substrate.defaultWorkerPool.namespace | default (include "kagent.namespace" .) | quote }} + - name: SUBSTRATE_DEFAULT_WORKERPOOL_NAME + value: {{ .Values.controller.substrate.defaultWorkerPool.name | default (ternary .Values.substrateWorkerPool.name "" .Values.substrateWorkerPool.create) | quote }} + {{- with .Values.controller.substrate.pauseImage }} + - name: SUBSTRATE_PAUSE_IMAGE + value: {{ . | quote }} + {{- end }} + {{- with .Values.controller.substrate.runscAMD64URL }} + - name: SUBSTRATE_RUNSC_AMD64_URL + value: {{ . | quote }} + {{- end }} + {{- with .Values.controller.substrate.runscAMD64SHA256 }} + - name: SUBSTRATE_RUNSC_AMD64_SHA256 + value: {{ . | quote }} + {{- end }} + {{- with .Values.controller.substrate.runscARM64URL }} + - name: SUBSTRATE_RUNSC_ARM64_URL + value: {{ . | quote }} + {{- end }} + {{- with .Values.controller.substrate.runscARM64SHA256 }} + - name: SUBSTRATE_RUNSC_ARM64_SHA256 + value: {{ . | quote }} + {{- end }} + {{- end }} envFrom: - configMapRef: name: {{ include "kagent.fullname" . }}-controller diff --git a/helm/kagent/templates/rbac/getter-role.yaml b/helm/kagent/templates/rbac/getter-role.yaml index f0ed9614fe..ceab5ec9aa 100644 --- a/helm/kagent/templates/rbac/getter-role.yaml +++ b/helm/kagent/templates/rbac/getter-role.yaml @@ -53,6 +53,21 @@ - get - list - watch +- apiGroups: + - ate.dev + resources: + - workerpools + - actortemplates + verbs: + - get + - list + - watch +- apiGroups: + - ate.dev + resources: + - actortemplates/status + verbs: + - get - apiGroups: - "apps" resources: diff --git a/helm/kagent/templates/rbac/writer-role.yaml b/helm/kagent/templates/rbac/writer-role.yaml index b735e159bd..551f52d5fc 100644 --- a/helm/kagent/templates/rbac/writer-role.yaml +++ b/helm/kagent/templates/rbac/writer-role.yaml @@ -75,6 +75,15 @@ - update - patch - delete +- apiGroups: + - ate.dev + resources: + - actortemplates + verbs: + - create + - update + - patch + - delete {{- end -}} {{- include "kagent.rbac.validate" . -}} @@ -101,4 +110,4 @@ metadata: {{- include "kagent.labels" . | nindent 4 }} rules: {{- include "kagent.writer.rules" . | nindent 2 }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/kagent/templates/substrate-ate-api-rbac.yaml b/helm/kagent/templates/substrate-ate-api-rbac.yaml new file mode 100644 index 0000000000..ea7665c3b4 --- /dev/null +++ b/helm/kagent/templates/substrate-ate-api-rbac.yaml @@ -0,0 +1,39 @@ +{{- if .Values.controller.substrate.enabled }} +{{- $namespaces := splitList "," (include "kagent.substrate.envSourceNamespaces" .) }} +{{- $ateNs := .Values.controller.substrate.ateApiServer.namespace | default "ate-system" }} +{{- $ateSA := .Values.controller.substrate.ateApiServer.serviceAccount | default "ate-api-server" }} +{{- range $namespace := $namespaces }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "kagent.fullname" $ }}-ate-api-env-sources + namespace: {{ $namespace }} + labels: + {{- include "kagent.labels" $ | nindent 4 }} +rules: +- apiGroups: + - "" + resources: + - secrets + - configmaps + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "kagent.fullname" $ }}-ate-api-env-sources + namespace: {{ $namespace }} + labels: + {{- include "kagent.labels" $ | nindent 4 }} +subjects: +- kind: ServiceAccount + name: {{ $ateSA | quote }} + namespace: {{ $ateNs | quote }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "kagent.fullname" $ }}-ate-api-env-sources +{{- end }} +{{- end }} diff --git a/helm/kagent/templates/substrate-workerpool.yaml b/helm/kagent/templates/substrate-workerpool.yaml new file mode 100644 index 0000000000..4cc12119c7 --- /dev/null +++ b/helm/kagent/templates/substrate-workerpool.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.controller.substrate.enabled .Values.substrateWorkerPool.create }} +{{- if not .Values.substrateWorkerPool.ateomImage }} +{{- fail "substrateWorkerPool.ateomImage is required when substrateWorkerPool.create=true" }} +{{- end }} +apiVersion: ate.dev/v1alpha1 +kind: WorkerPool +metadata: + name: {{ .Values.substrateWorkerPool.name | quote }} + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.substrateWorkerPool.replicas }} + ateomImage: {{ .Values.substrateWorkerPool.ateomImage | quote }} +{{- end }} diff --git a/helm/kagent/tests/substrate-ate-api-rbac_test.yaml b/helm/kagent/tests/substrate-ate-api-rbac_test.yaml new file mode 100644 index 0000000000..2bcee7d040 --- /dev/null +++ b/helm/kagent/tests/substrate-ate-api-rbac_test.yaml @@ -0,0 +1,97 @@ +suite: test substrate ate-api rbac +templates: + - substrate-ate-api-rbac.yaml +tests: + - it: should not render when substrate is disabled + set: + controller.substrate.enabled: false + asserts: + - hasDocuments: + count: 0 + + - it: should render Role and RoleBinding in the release namespace when substrate is enabled + set: + controller.substrate.enabled: true + asserts: + - hasDocuments: + count: 2 + - isKind: + of: Role + documentIndex: 0 + - isKind: + of: RoleBinding + documentIndex: 1 + - equal: + path: metadata.namespace + value: NAMESPACE + documentIndex: 0 + - equal: + path: metadata.name + value: RELEASE-NAME-ate-api-env-sources + documentIndex: 0 + - contains: + path: rules + content: + apiGroups: [""] + resources: ["secrets", "configmaps"] + verbs: ["get"] + documentIndex: 0 + - equal: + path: subjects[0].name + value: ate-api-server + documentIndex: 1 + - equal: + path: subjects[0].namespace + value: ate-system + documentIndex: 1 + + - it: should render RBAC in each rbac.namespaces entry plus the release namespace + set: + controller.substrate.enabled: true + rbac: + namespaces: + - team-a + - team-b + asserts: + - hasDocuments: + count: 6 + - equal: + path: metadata.namespace + value: NAMESPACE + documentIndex: 0 + - equal: + path: metadata.namespace + value: NAMESPACE + documentIndex: 1 + - equal: + path: metadata.namespace + value: team-a + documentIndex: 2 + - equal: + path: metadata.namespace + value: team-a + documentIndex: 3 + - equal: + path: metadata.namespace + value: team-b + documentIndex: 4 + - equal: + path: metadata.namespace + value: team-b + documentIndex: 5 + + - it: should allow overriding ate-api-server service account identity + set: + controller.substrate.enabled: true + controller.substrate.ateApiServer: + namespace: custom-ate + serviceAccount: custom-api + asserts: + - equal: + path: subjects[0].name + value: custom-api + documentIndex: 1 + - equal: + path: subjects[0].namespace + value: custom-ate + documentIndex: 1 diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index ebd3f6987c..e354d25310 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -223,12 +223,40 @@ controller: ports: port: 8083 targetPort: 8083 - # TODO: NEED TO MAKE SURE THESE GET RENDERED IN controller-deployment.yaml - # Extra controller env. Examples — OpenShell: - # env: - # - name: OPENSHELL_GRPC_ADDR - # value: "openshell.my-namespace.svc.cluster.local:8080" - env: [] + # Extra controller env (mapped to flags via SUBSTRATE_* / OPENSHELL_* env names). + # OpenShell AgentHarness: set OPENSHELL_GATEWAY_URL (or leave defaults below). + env: + # - name: OPENSHELL_GATEWAY_URL + # value: openshell.openshell.svc.cluster.local:8080 + # - name: OPENSHELL_INSECURE + # value: "true" + + # Agent Substrate (OpenClaw harness runtime=substrate). Requires ate-system installed. + # kagent generates per-harness ActorTemplates and references an existing WorkerPool. + substrate: + enabled: false + ateApiEndpoint: "" + atenetRouterURL: "" + ateApiInsecure: false + # Substrate ate-api-server identity for env source resolution (secretKeyRef/configMapKeyRef on ActorTemplates). + ateApiServer: + namespace: ate-system + serviceAccount: ate-api-server + defaultWorkerPool: + namespace: "" + name: "" + # pauseImage: "gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da" + # runscAMD64URL: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc" + # runscAMD64SHA256: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63" + # runscARM64URL: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc" + # runscARM64SHA256: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9" + # Example when enabled: + # enabled: true + # ateApiEndpoint: "dns:///api.ate-system.svc:443" + # atenetRouterURL: "http://atenet-router.ate-system.svc:80" + # defaultWorkerPool: + # name: "kagent-default" + envFrom: [] # Additional volumes on the output Deployment definition. @@ -256,6 +284,14 @@ controller: # @default -- httpGet /health on port http, periodSeconds=30 readinessProbe: {} +# -- Optional Agent Substrate WorkerPool installed by this chart. This is platform +# capacity and is not owned by individual AgentHarness resources. +substrateWorkerPool: + create: false + name: kagent-default + replicas: 1 + ateomImage: "" + # ============================================================================== # UI CONFIGURATION # ============================================================================== @@ -657,7 +693,7 @@ oauth2-proxy: # Skip authentication for kagent's branded login page, health checks, and static assets # This allows unauthenticated users to see the landing page and k8s probes to work skip-auth-route: "^/(health|login)$" - skip-auth-regex: "^/(login|_next/static|_next/image|login-bg\\.(jpg|png|webp)|logo-.*\\.png|favicon\\.ico).*$" + skip-auth-regex: "^/(login|_next/static|_next/image|login-bg\\.(jpg|png|webp)|logo-.*\\.png|favicon\\.ico|api/agentharnesses/.*/gateway).*$" # Use custom templates that redirect to kagent's branded /login page custom-templates-dir: "/templates" diff --git a/ui/next.config.ts b/ui/next.config.ts index e816991448..cc63e385f6 100644 --- a/ui/next.config.ts +++ b/ui/next.config.ts @@ -1,7 +1,22 @@ import type { NextConfig } from "next"; +const controllerDevURL = + process.env.KAGENT_DEV_CONTROLLER_URL ?? "http://127.0.0.1:8083"; + const nextConfig: NextConfig = { output: "standalone", + // Proxy /api to the controller in local dev (next dev :8001 → controller :8083). + async rewrites() { + if (process.env.NODE_ENV === "production") { + return []; + } + return [ + { + source: "/api/:path*", + destination: `${controllerDevURL}/api/:path*`, + }, + ]; + }, logging: { fetches: { fullUrl: true, diff --git a/ui/src/app/actions/substrate.ts b/ui/src/app/actions/substrate.ts new file mode 100644 index 0000000000..e638fdc961 --- /dev/null +++ b/ui/src/app/actions/substrate.ts @@ -0,0 +1,22 @@ +"use server"; + +import { fetchApi, createErrorResponse } from "./utils"; +import type { BaseResponse, SubstrateStatusResponse } from "@/types"; + +export async function getSubstrateStatus( + namespace?: string, +): Promise> { + try { + const qs = namespace?.trim() ? `?namespace=${encodeURIComponent(namespace.trim())}` : ""; + const response = await fetchApi>(`/substrate/status${qs}`); + if (!response?.data) { + throw new Error("Failed to load substrate status"); + } + return { + message: response.message ?? "Substrate status fetched", + data: response.data, + }; + } catch (error) { + return createErrorResponse(error, "Error loading substrate status"); + } +} diff --git a/ui/src/app/layout.tsx b/ui/src/app/layout.tsx index 175d0e4fba..4ce12532bc 100644 --- a/ui/src/app/layout.tsx +++ b/ui/src/app/layout.tsx @@ -4,6 +4,7 @@ import "./globals.css"; import { TooltipProvider } from "@/components/ui/tooltip"; import { AgentsProvider } from "@/components/AgentsProvider"; import { AuthProvider } from "@/contexts/AuthContext"; +import { SubstrateFeaturesProvider } from "@/contexts/SubstrateFeaturesContext"; import { Header } from "@/components/Header"; import { Footer } from "@/components/Footer"; import { ThemeProvider } from "@/components/ThemeProvider"; @@ -23,6 +24,7 @@ export default function RootLayout({ children }: { children: React.ReactNode }) return ( + @@ -37,6 +39,7 @@ export default function RootLayout({ children }: { children: React.ReactNode }) + ); diff --git a/ui/src/app/openshell/OpenshellTerminalPage.tsx b/ui/src/app/openshell/OpenshellTerminalPage.tsx index ab7a77a618..e937186f38 100644 --- a/ui/src/app/openshell/OpenshellTerminalPage.tsx +++ b/ui/src/app/openshell/OpenshellTerminalPage.tsx @@ -50,9 +50,9 @@ export function OpenshellTerminalPage() { : undefined; const clawHarnessSession = searchParams.get("clawHarness") === "1"; const harnessTerminalSession = clawHarnessSession || harnessBackend === "hermes"; - const autoConnect = Boolean(gatewaySandboxName); const namespace = searchParams.get("ns")?.trim() ?? ""; const crName = searchParams.get("name")?.trim() ?? ""; + const autoConnect = Boolean(gatewaySandboxName); const modelConfigRef = searchParams.get("modelConfigRef")?.trim() ?? ""; const [plainShellOnly, setPlainShellOnly] = useState(() => searchParams.get("plainShell") === "1"); /** Plain-shell mode the active SSH session was opened with (null when disconnected). */ @@ -63,7 +63,7 @@ export function OpenshellTerminalPage() { const [termError, setTermError] = useState(null); const [sessionActive, setSessionActive] = useState(false); - const [connecting, setConnecting] = useState(() => Boolean(autoConnect && gatewaySandboxName)); + const [connecting, setConnecting] = useState(() => Boolean(autoConnect)); const termHostRef = useRef(null); const termRef = useRef(null); @@ -118,25 +118,28 @@ export function OpenshellTerminalPage() { wsRef.current?.close(); }, []); - const connectTerminal = useCallback( - (gatewayName: string) => { + const connectTerminal = useCallback(() => { const term = termRef.current; if (!term) { setConnecting(false); return; } - const name = gatewayName.trim(); - if (!name) { - setTermError("Missing gateway sandbox name."); - return; - } setTermError(null); setConnecting(true); setSessionActive(false); wsRef.current?.close(); - const url = sandboxSshWebSocketURL(terminalApiBase()); + const name = gatewaySandboxName.trim(); + if (!name) { + setConnecting(false); + setTermError("Missing gateway sandbox name."); + return; + } + + const apiBase = terminalApiBase(); + const url = sandboxSshWebSocketURL(apiBase); + let ws: WebSocket; try { ws = new WebSocket(url); @@ -210,24 +213,22 @@ export function OpenshellTerminalPage() { } }; }, - [plainShellOnly, harnessBackend], + [plainShellOnly, harnessBackend, gatewaySandboxName], ); const restartSession = useCallback(() => { - const name = gatewaySandboxName.trim(); - if (!name) return; wsRef.current?.close(); - window.setTimeout(() => connectTerminal(name), 120); - }, [gatewaySandboxName, connectTerminal]); + window.setTimeout(() => connectTerminal(), 120); + }, [connectTerminal]); useEffect(() => { - if (!autoConnect || !gatewaySandboxName) return; + if (!autoConnect) return; const t = window.setTimeout(() => { if (!termRef.current) return; - connectTerminal(gatewaySandboxName); + connectTerminal(); }, 400); return () => window.clearTimeout(t); - }, [autoConnect, gatewaySandboxName, connectTerminal]); + }, [autoConnect, connectTerminal]); const showReconnect = Boolean(gatewaySandboxName) && !sessionActive && !connecting; const plainShellPendingRestart = @@ -284,7 +285,7 @@ export function OpenshellTerminalPage() { ) : null}
{showReconnect ? ( - ) : null} @@ -304,8 +305,7 @@ export function OpenshellTerminalPage() { {!gatewaySandboxName ? (

- Open an OpenShell sandbox from the Agents list to start a terminal - session. + Open a harness from the Agents list to start a terminal session.

) : null} diff --git a/ui/src/app/substrate/SubstrateStatusPage.tsx b/ui/src/app/substrate/SubstrateStatusPage.tsx new file mode 100644 index 0000000000..2bfd081dcf --- /dev/null +++ b/ui/src/app/substrate/SubstrateStatusPage.tsx @@ -0,0 +1,72 @@ +"use client"; + +import { useCallback, useEffect, useState } from "react"; +import { useSearchParams, useRouter } from "next/navigation"; +import { AppPageFrame } from "@/components/layout/AppPageFrame"; +import { PageHeader } from "@/components/layout/PageHeader"; +import { SubstrateStatusView } from "@/components/substrate/SubstrateStatusView"; +import { getSubstrateStatus } from "@/app/actions/substrate"; +import type { SubstrateStatusResponse } from "@/types"; + +export function SubstrateStatusPage() { + const router = useRouter(); + const searchParams = useSearchParams(); + const namespace = searchParams.get("namespace") ?? ""; + + const [status, setStatus] = useState(null); + const [loading, setLoading] = useState(true); + const [loadError, setLoadError] = useState(null); + + const load = useCallback(async () => { + setLoading(true); + setLoadError(null); + const result = await getSubstrateStatus(namespace || undefined); + if (result.error || !result.data) { + setLoadError(result.error || "Failed to load substrate status"); + setStatus(null); + } else { + setStatus(result.data); + } + setLoading(false); + }, [namespace]); + + useEffect(() => { + const raf = requestAnimationFrame(() => { + void load(); + }); + return () => cancelAnimationFrame(raf); + }, [load]); + + const handleNamespaceChange = useCallback( + (ns: string) => { + const params = new URLSearchParams(searchParams.toString()); + if (ns) { + params.set("namespace", ns); + } else { + params.delete("namespace"); + } + const q = params.toString(); + router.replace(q ? `/substrate?${q}` : "/substrate"); + }, + [router, searchParams], + ); + + return ( + + + + + ); +} diff --git a/ui/src/app/substrate/page.tsx b/ui/src/app/substrate/page.tsx new file mode 100644 index 0000000000..a51600ca0d --- /dev/null +++ b/ui/src/app/substrate/page.tsx @@ -0,0 +1,19 @@ +import { Suspense } from "react"; +import { SubstratePageGuard } from "@/components/substrate/SubstratePageGuard"; +import { SubstrateStatusPage } from "./SubstrateStatusPage"; + +export default function SubstratePage() { + return ( + + + Loading substrate status… +
+ } + > + + + + ); +} diff --git a/ui/src/components/AgentCard.tsx b/ui/src/components/AgentCard.tsx index 42289faf0d..7a17f89573 100644 --- a/ui/src/components/AgentCard.tsx +++ b/ui/src/components/AgentCard.tsx @@ -20,11 +20,17 @@ import { Brain, MoreHorizontal, Pencil, Terminal, Trash2 } from "lucide-react"; import { k8sRefUtils } from "@/lib/k8sUtils"; import { agentHarnessIcon, + agentHarnessRuntimeLabel, agentHarnessTypeLabel, getAgentHarnessBackend, + getAgentHarnessRuntime, isAgentHarness, } from "@/lib/agentHarness"; -import { isOpenshellSandboxRow, openshellTerminalHref } from "@/lib/openshellSandboxAgents"; +import { + isOpenshellSandboxRow, + isSubstrateHarnessRow, + openshellTerminalHref, +} from "@/lib/openshellSandboxAgents"; import { cn } from "@/lib/utils"; interface AgentCardProps { @@ -39,8 +45,10 @@ export function AgentCard({ agentResponse, onAgentsChanged }: AgentCardProps) { const [deleteOpen, setDeleteOpen] = useState(false); const sshSandbox = isOpenshellSandboxRow(agentResponse); + const substrateHarness = isSubstrateHarnessRow(agentResponse); const agentHarness = isAgentHarness(agentResponse); const harnessBackend = getAgentHarnessBackend(agentResponse); + const harnessRuntime = getAgentHarnessRuntime(agentResponse); const agentRef = k8sRefUtils.toRef( agent.metadata.namespace || '', @@ -89,7 +97,11 @@ export function AgentCard({ agentResponse, onAgentsChanged }: AgentCardProps) { {harnessBackend ? agentHarnessIcon(harnessBackend) : "🦞"} @@ -173,16 +185,19 @@ export function AgentCard({ agentResponse, onAgentsChanged }: AgentCardProps) { ); + const substrateGatewayPath = agentResponse.substrateAgentHarness?.gatewayUIPath; const chatHref = - sshSandbox && agentResponse.openshellAgentHarness - ? openshellTerminalHref({ - gatewaySandboxName: agentResponse.openshellAgentHarness.gatewaySandboxName, - namespace: agent.metadata.namespace, - crName: agent.metadata.name, - modelConfigRef: agentResponse.modelConfigRef, - harnessBackend: harnessBackend, - }) - : `/agents/${agent.metadata.namespace}/${agent.metadata.name}/chat`; + substrateHarness && substrateGatewayPath + ? substrateGatewayPath + : sshSandbox && agentResponse.openshellAgentHarness + ? openshellTerminalHref({ + gatewaySandboxName: agentResponse.openshellAgentHarness.gatewaySandboxName, + namespace: agent.metadata.namespace, + crName: agent.metadata.name, + modelConfigRef: agentResponse.modelConfigRef, + harnessBackend, + }) + : `/agents/${agent.metadata.namespace}/${agent.metadata.name}/chat`; return ( <> diff --git a/ui/src/components/AgentListView.tsx b/ui/src/components/AgentListView.tsx index db7d47a60d..0072d7cfc6 100644 --- a/ui/src/components/AgentListView.tsx +++ b/ui/src/components/AgentListView.tsx @@ -24,7 +24,11 @@ import { getAgentHarnessBackend, isAgentHarness, } from "@/lib/agentHarness"; -import { isOpenshellSandboxRow, openshellTerminalHref } from "@/lib/openshellSandboxAgents"; +import { + isOpenshellSandboxRow, + isSubstrateHarnessRow, + openshellTerminalHref, +} from "@/lib/openshellSandboxAgents"; interface AgentListViewProps { agentResponse: AgentResponse[]; @@ -222,6 +226,7 @@ function AgentListRow({ item, onAgentsChanged }: { item: AgentResponse; onAgents const [deleteOpen, setDeleteOpen] = useState(false); const sshSandbox = isOpenshellSandboxRow(item); + const substrateHarness = isSubstrateHarnessRow(item); const agentHarness = isAgentHarness(item); const harnessBackend = getAgentHarnessBackend(item); @@ -233,26 +238,38 @@ function AgentListRow({ item, onAgentsChanged }: { item: AgentResponse; onAgents const nTools = countAgentToolBindings(item); const nSkills = countSkills(agent); + const substrateGatewayPath = item.substrateAgentHarness?.gatewayUIPath; const gatewaySandboxName = item.openshellAgentHarness?.gatewaySandboxName; const chatPath = useMemo( () => - sshSandbox && gatewaySandboxName - ? openshellTerminalHref({ - gatewaySandboxName, - namespace, - crName: name, - modelConfigRef: item.modelConfigRef, - harnessBackend, - }) - : `/agents/${encodeURIComponent(namespace)}/${encodeURIComponent(name)}/chat`, - [sshSandbox, gatewaySandboxName, namespace, name, item.modelConfigRef, harnessBackend], + substrateHarness && substrateGatewayPath + ? substrateGatewayPath + : sshSandbox && gatewaySandboxName + ? openshellTerminalHref({ + gatewaySandboxName, + namespace, + crName: name, + modelConfigRef: item.modelConfigRef, + harnessBackend, + }) + : `/agents/${encodeURIComponent(namespace)}/${encodeURIComponent(name)}/chat`, + [ + substrateHarness, + substrateGatewayPath, + sshSandbox, + gatewaySandboxName, + namespace, + name, + item.modelConfigRef, + harnessBackend, + ], ); - const goChat = () => { + const goChat = useCallback(() => { if (isReady) { router.push(chatPath); } - }; + }, [isReady, router, chatPath]); const handleEdit = (e: React.MouseEvent) => { e.preventDefault(); diff --git a/ui/src/components/Header.stories.tsx b/ui/src/components/Header.stories.tsx index 7813370811..76b89c92c5 100644 --- a/ui/src/components/Header.stories.tsx +++ b/ui/src/components/Header.stories.tsx @@ -1,6 +1,7 @@ import type { Meta, StoryObj } from "@storybook/nextjs-vite"; import { Header } from "./Header"; import { AuthProvider } from "@/contexts/AuthContext"; +import { SubstrateFeaturesTestProvider } from "@/contexts/SubstrateFeaturesContext"; const meta = { title: "Components/Header", @@ -10,9 +11,11 @@ const meta = { }, decorators: [ (Story) => ( - - - + + + + + ), ], } satisfies Meta; diff --git a/ui/src/components/Header.tsx b/ui/src/components/Header.tsx index 4ced9791e7..7f76bcfd3c 100644 --- a/ui/src/components/Header.tsx +++ b/ui/src/components/Header.tsx @@ -4,7 +4,7 @@ import Link from "next/link"; import { Button } from "./ui/button"; import KAgentLogoWithText from "./kagent-logo-text"; import KagentLogo from "./kagent-logo"; -import { Plus, Menu, X, ChevronDown, Brain, Server, Eye, Hammer, HomeIcon, ScrollText, Cable } from "lucide-react"; +import { Plus, Menu, X, ChevronDown, Brain, Server, Eye, Hammer, HomeIcon, ScrollText, Cable, Layers } from "lucide-react"; import { ThemeToggle } from "./ThemeToggle"; import { UserMenu } from "./UserMenu"; import { @@ -13,6 +13,7 @@ import { DropdownMenuItem, DropdownMenuTrigger, } from "@/components/ui/dropdown-menu"; +import { SubstrateFeatureGate } from "@/components/SubstrateFeatureGate"; export function Header() { const [isMenuOpen, setIsMenuOpen] = useState(false); @@ -131,6 +132,14 @@ export function Header() { Prompt Library + + + + + Substrate + + + @@ -194,6 +203,14 @@ export function Header() { Prompt Library + + + + + Substrate + + + diff --git a/ui/src/components/SubstrateFeatureGate.tsx b/ui/src/components/SubstrateFeatureGate.tsx new file mode 100644 index 0000000000..08e3ae6325 --- /dev/null +++ b/ui/src/components/SubstrateFeatureGate.tsx @@ -0,0 +1,32 @@ +"use client"; + +import type { ReactNode } from "react"; +import { useSubstrateFeatures } from "@/contexts/SubstrateFeaturesContext"; + +type SubstrateFeatureGateProps = { + children: ReactNode; + /** Shown while capabilities are loading. Defaults to nothing. */ + loadingFallback?: ReactNode; + /** Shown when substrate is disabled. Defaults to nothing. */ + fallback?: ReactNode; +}; + +/** + * Renders children only when Agent Substrate is enabled on the controller. + * Use for nav items, form sections, or any UI gated on cluster substrate config. + */ +export function SubstrateFeatureGate({ + children, + loadingFallback = null, + fallback = null, +}: SubstrateFeatureGateProps) { + const { enabled, isLoading } = useSubstrateFeatures(); + + if (isLoading) { + return <>{loadingFallback}; + } + if (!enabled) { + return <>{fallback}; + } + return <>{children}; +} diff --git a/ui/src/components/agent-form/OpenClawSandboxFields.tsx b/ui/src/components/agent-form/OpenClawSandboxFields.tsx index 8af459e329..13e71d94e9 100644 --- a/ui/src/components/agent-form/OpenClawSandboxFields.tsx +++ b/ui/src/components/agent-form/OpenClawSandboxFields.tsx @@ -18,6 +18,7 @@ import type { OpenClawSandboxFormValidationError, } from "@/lib/openClawSandboxForm"; import { isClawHarnessBackend, newOpenClawChannelRow } from "@/lib/openClawSandboxForm"; +import { useSubstrateEnabled } from "@/contexts/SubstrateFeaturesContext"; const OPENCLAW_DOCS_ROOT = "https://docs.openclaw.ai"; @@ -151,17 +152,91 @@ export function OpenClawSandboxFields({ harnessBackend, validationError, }: OpenClawSandboxFieldsProps) { + const substrateEnabled = useSubstrateEnabled(); const clawBackend = isClawHarnessBackend(harnessBackend); const set = (patch: Partial) => onChange({ ...value, ...patch }); const [advancedOpen, setAdvancedOpen] = React.useState(false); const section = validationError?.section ?? null; + React.useEffect(() => { + if (!substrateEnabled && value.runtime === "substrate") { + set({ runtime: "openshell" }); + } + }, [substrateEnabled, value.runtime]); + return (
{section === "general" ? validationError?.message : null} + {substrateEnabled ? ( + + + Control plane + + + {value.runtime === "substrate" ? ( +
+ + Gateway token + set({ substrateGatewayToken: e.target.value })} + /> +

+ Bearer token used by kagent when proxying the generated OpenClaw gateway. +

+
+ + Snapshot location (GCS) + set({ substrateSnapshotsLocation: e.target.value })} + /> +

+ Substrate stores golden and incremental snapshots at this gs:// prefix (GCS only today). +

+
+ + WorkerPool name + set({ substrateWorkerPoolRefName: e.target.value })} + /> +

+ Leave empty to use the controller default WorkerPool. +

+
+
+ ) : null} +
+ ) : null} + { + if (!isLoading && !enabled) { + router.replace("/"); + } + }, [enabled, isLoading, router]); + + if (isLoading) { + return ( +
+ Loading… +
+ ); + } + + if (!enabled) { + return null; + } + + return <>{children}; +} diff --git a/ui/src/components/substrate/SubstrateStatusView.tsx b/ui/src/components/substrate/SubstrateStatusView.tsx new file mode 100644 index 0000000000..1720e80867 --- /dev/null +++ b/ui/src/components/substrate/SubstrateStatusView.tsx @@ -0,0 +1,352 @@ +"use client"; + +import { useCallback, useMemo, type ComponentType } from "react"; +import Link from "next/link"; +import { RefreshCw, AlertCircle, Cpu, FileStack, Users, Boxes } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert"; +import { NamespaceCombobox } from "@/components/NamespaceCombobox"; +import type { + SubstrateActorEntry, + SubstrateActorTemplateEntry, + SubstrateStatusResponse, + SubstrateWorkerEntry, + SubstrateWorkerPoolEntry, +} from "@/types"; +import { cn } from "@/lib/utils"; + +type SubstrateStatusViewProps = { + status: SubstrateStatusResponse | null; + namespace: string; + onNamespaceChange: (ns: string) => void; + isLoading: boolean; + loadError: string | null; + onRefresh: () => Promise; +}; + +function statusTone(label: string): "ok" | "warn" | "idle" | "busy" | "neutral" { + const s = label.toLowerCase(); + if (s === "ready" || s === "running") return "ok"; + if (s === "failed" || s === "suspending") return "warn"; + if (s === "suspended" || s === "unknown" || s === "") return "idle"; + if (s.includes("resume") || s.includes("wait") || s.includes("golden")) return "busy"; + return "neutral"; +} + +function StatusChip({ label }: { label: string }) { + const tone = statusTone(label); + return ( + + {label || "—"} + + ); +} + +function SectionHeader({ + icon: Icon, + title, + count, + hint, +}: { + icon: ComponentType<{ className?: string }>; + title: string; + count: number; + hint?: string; +}) { + return ( +
+
+ +

{title}

+ {count} +
+ {hint ?

{hint}

: null} +
+ ); +} + +function EmptyRow({ message }: { message: string }) { + return ( +

{message}

+ ); +} + +function WorkerPoolsTable({ rows }: { rows: SubstrateWorkerPoolEntry[] }) { + if (rows.length === 0) { + return ; + } + return ( +
+ + + + + + + + + + {rows.map((wp) => ( + + + + + + ))} + +
PoolReplicasAteom image
+ {wp.namespace}/ + {wp.name} + {wp.replicas}{wp.ateomImage}
+
+ ); +} + +function ActorTemplatesTable({ rows }: { rows: SubstrateActorTemplateEntry[] }) { + if (rows.length === 0) { + return ; + } + return ( +
+ + + + + + + + + + + {rows.map((t) => ( + + + + + + + ))} + +
TemplatePhaseWorker poolHarness
+
+ {t.namespace}/ + {t.name} +
+ {t.goldenActorId ? ( +
golden: {t.goldenActorId}
+ ) : null} +
+ + {t.workerPoolRef ?? "—"} + {t.harnessName ? ( + + {t.harnessName} + + ) : ( + + )} +
+
+ ); +} + +function ActorsTable({ rows, enabled }: { rows: SubstrateActorEntry[]; enabled: boolean }) { + if (!enabled) { + return ( + + ); + } + if (rows.length === 0) { + return ; + } + return ( +
+ + + + + + + + + + + {rows.map((a) => ( + + + + + + + ))} + +
ActorStatusTemplateWorker pod
{a.actorId} + + + {a.actorTemplateNamespace && a.actorTemplateName + ? `${a.actorTemplateNamespace}/${a.actorTemplateName}` + : "—"} + + {a.ateomPodName ? `${a.ateomPodNamespace ?? ""}/${a.ateomPodName}` : "—"} + {a.ateomPodIp ? ` · ${a.ateomPodIp}` : ""} +
+
+ ); +} + +function WorkersTable({ rows, enabled }: { rows: SubstrateWorkerEntry[]; enabled: boolean }) { + if (!enabled) { + return ; + } + if (rows.length === 0) { + return ; + } + return ( +
+ + + + + + + + + + {rows.map((w) => ( + + + + + + ))} + +
PodPoolActor
+ {w.workerNamespace}/{w.workerPod} + {w.workerPool}{w.actorId || "idle"}
+
+ ); +} + +export function SubstrateStatusView({ + status, + namespace, + onNamespaceChange, + isLoading, + loadError, + onRefresh, +}: SubstrateStatusViewProps) { + const summary = useMemo(() => { + if (!status) return null; + const running = status.actors.filter((a) => a.status.toLowerCase() === "running").length; + const readyTemplates = status.actorTemplates.filter((t) => t.phase?.toLowerCase() === "ready").length; + return { + pools: status.workerPools.length, + templates: status.actorTemplates.length, + readyTemplates, + actors: status.actors.length, + running, + workers: status.workers.length, + busyWorkers: status.workers.filter((w) => w.actorId).length, + }; + }, [status]); + + const handleRefresh = useCallback(() => { + void onRefresh(); + }, [onRefresh]); + + return ( +
+
+
+ + +
+ +
+ + {loadError ? ( + + + Could not load substrate status + {loadError} + + ) : null} + + {status?.ateApiError ? ( + + + ate-api partial data + {status.ateApiError} + + ) : null} + + {summary ? ( +
+ {[ + { label: "Worker pools", value: summary.pools }, + { label: "Templates ready", value: `${summary.readyTemplates}/${summary.templates}` }, + { label: "Actors running", value: `${summary.running}/${summary.actors}` }, + { label: "Workers busy", value: `${summary.busyWorkers}/${summary.workers}` }, + { label: "ate-api", value: status?.enabled ? "connected" : "off" }, + { label: "Scope", value: namespace || "all" }, + ].map((item) => ( +
+
{item.label}
+
{item.value}
+
+ ))} +
+ ) : null} + +
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+
+ ); +} diff --git a/ui/src/contexts/SubstrateFeaturesContext.tsx b/ui/src/contexts/SubstrateFeaturesContext.tsx new file mode 100644 index 0000000000..8b572d36e3 --- /dev/null +++ b/ui/src/contexts/SubstrateFeaturesContext.tsx @@ -0,0 +1,100 @@ +"use client"; + +import React, { + createContext, + useCallback, + useContext, + useEffect, + useMemo, + useState, + type ReactNode, +} from "react"; +import { getSubstrateStatus } from "@/app/actions/substrate"; + +export interface SubstrateFeaturesContextValue { + /** True when the controller has Agent Substrate configured (ate-api endpoint set). */ + enabled: boolean; + isLoading: boolean; + error: string | null; + refetch: () => Promise; +} + +const SubstrateFeaturesContext = createContext( + undefined, +); + +export function SubstrateFeaturesProvider({ children }: { children: ReactNode }) { + const [enabled, setEnabled] = useState(false); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(null); + + const refetch = useCallback(async () => { + setIsLoading(true); + setError(null); + try { + const result = await getSubstrateStatus(); + if (result.error || !result.data) { + setEnabled(false); + setError(result.error ?? "Failed to load substrate features"); + return; + } + setEnabled(result.data.enabled); + } catch (e) { + setEnabled(false); + setError(e instanceof Error ? e.message : "Failed to load substrate features"); + } finally { + setIsLoading(false); + } + }, []); + + useEffect(() => { + void refetch(); + }, [refetch]); + + const value = useMemo( + () => ({ enabled, isLoading, error, refetch }), + [enabled, isLoading, error, refetch], + ); + + return ( + {children} + ); +} + +export function useSubstrateFeatures(): SubstrateFeaturesContextValue { + const context = useContext(SubstrateFeaturesContext); + if (context === undefined) { + throw new Error("useSubstrateFeatures must be used within a SubstrateFeaturesProvider"); + } + return context; +} + +/** True after the initial probe finishes and substrate is enabled on the cluster. */ +export function useSubstrateEnabled(): boolean { + const { enabled, isLoading } = useSubstrateFeatures(); + return !isLoading && enabled; +} + +/** For Storybook/tests: inject feature flags without calling the API. */ +export function SubstrateFeaturesTestProvider({ + children, + enabled, + isLoading = false, +}: { + children: ReactNode; + enabled: boolean; + isLoading?: boolean; +}) { + const value = useMemo( + () => ({ + enabled, + isLoading, + error: null, + refetch: async () => {}, + }), + [enabled, isLoading], + ); + return ( + {children} + ); +} diff --git a/ui/src/lib/__tests__/openClawSandboxForm.test.ts b/ui/src/lib/__tests__/openClawSandboxForm.test.ts index 7402d618c4..512f4c610c 100644 --- a/ui/src/lib/__tests__/openClawSandboxForm.test.ts +++ b/ui/src/lib/__tests__/openClawSandboxForm.test.ts @@ -33,18 +33,27 @@ describe("validateOpenClawSandboxForm sections", () => { expect(r?.message).toContain("not a valid hostname"); }); - it("tags channel credential failures as channels", () => { - const row = newOpenClawChannelRow(); - row.name = "slack1"; - row.channelType = "slack"; - row.botToken = ""; - const r = validateOpenClawSandboxForm({ - openClaw: { ...defaultOpenClawSandboxFormSlice(), channels: [row] }, - modelRef: "ns/m1", - }); - expect(r?.section).toBe("channels"); - expect(r?.message).toContain("slack1"); + it("tags missing substrate gateway token as general", () => { + const r = validateOpenClawSandboxForm({ + openClaw: { ...defaultOpenClawSandboxFormSlice(), runtime: "substrate" }, + modelRef: "ns/m1", }); + expect(r?.section).toBe("general"); + expect(r?.message).toContain("gateway token"); + }); + + it("tags channel credential failures as channels", () => { + const row = newOpenClawChannelRow(); + row.name = "slack1"; + row.channelType = "slack"; + row.botToken = ""; + const r = validateOpenClawSandboxForm({ + openClaw: { ...defaultOpenClawSandboxFormSlice(), channels: [row] }, + modelRef: "ns/m1", + }); + expect(r?.section).toBe("channels"); + expect(r?.message).toContain("slack1"); + }); it("rejects duplicate channel binding names", () => { const row = newOpenClawChannelRow(); @@ -183,6 +192,29 @@ describe("openClawSandboxForm allowedDomains", () => { expect(draft.spec.backend).toBe("openclaw"); }); + it("writes substrate config without creating a WorkerPool", () => { + const draft = buildSandboxCRDraft({ + name: "h1", + namespace: "ns", + description: "", + modelRef: "m1", + openClaw: { + ...defaultOpenClawSandboxFormSlice(), + runtime: "substrate", + substrateGatewayToken: "tok", + substrateWorkerPoolRefName: "default-wp", + }, + }); + expect("error" in draft).toBe(false); + if ("error" in draft) return; + expect(draft.spec.substrate).toEqual({ + gatewayToken: "tok", + snapshotsConfig: { location: "gs://ate-snapshots/kagent/" }, + workerPoolRef: { name: "default-wp" }, + }); + expect(draft.spec.substrate).not.toHaveProperty("workerPool"); + }); + it("writes Hermes slack allowedUserIDs and home channel fields", () => { const row = newOpenClawChannelRow(); row.name = "slack-main"; diff --git a/ui/src/lib/agentHarness.ts b/ui/src/lib/agentHarness.ts index 279ef70abd..7154e625f1 100644 --- a/ui/src/lib/agentHarness.ts +++ b/ui/src/lib/agentHarness.ts @@ -1,5 +1,5 @@ import type { AgentResponse } from "@/types"; -import { isOpenshellSandboxRow } from "@/lib/openshellSandboxAgents"; +import { isHarnessListRow, isOpenshellSandboxRow, isSubstrateHarnessRow } from "@/lib/openshellSandboxAgents"; /** * Sandbox CR backends that identify an **agent harness** (declarative harness UX: channels, harness create flow, etc.) @@ -15,19 +15,30 @@ export function isAgentHarnessBackend(value: string | undefined | null): value i return AGENT_HARNESS_BACKENDS.some((b) => b === value); } +export function getAgentHarnessRuntime(item: AgentResponse): "openshell" | "substrate" | undefined { + if (!isHarnessListRow(item)) { + return undefined; + } + if (isSubstrateHarnessRow(item)) { + return "substrate"; + } + return "openshell"; +} + /** * When this agent row represents an agent harness, returns the AgentHarness CR backend discriminator (e.g. openclaw vs nemoclaw). * Use {@link isAgentHarness} for a simple boolean check. */ export function getAgentHarnessBackend(item: AgentResponse): AgentHarnessBackend | undefined { - if (!isOpenshellSandboxRow(item)) { + if (!isHarnessListRow(item)) { return undefined; } - const backend = item.openshellAgentHarness?.backend; + const backend = + item.substrateAgentHarness?.backend ?? item.openshellAgentHarness?.backend; return isAgentHarnessBackend(backend) ? backend : undefined; } -/** True when the agents-list row is an agent harness (OpenShell sandbox whose backend is a known harness runtime). */ +/** True when the agents-list row is an agent harness. */ export function isAgentHarness(item: AgentResponse): boolean { return getAgentHarnessBackend(item) !== undefined; } @@ -80,3 +91,7 @@ export function agentHarnessTypeLabel(backend: AgentHarnessBackend): string { } } } + +export function agentHarnessRuntimeLabel(runtime: "openshell" | "substrate"): string { + return runtime === "substrate" ? "Substrate" : "OpenShell"; +} diff --git a/ui/src/lib/openClawSandboxForm.ts b/ui/src/lib/openClawSandboxForm.ts index 50b0c83cac..33cf3ce49b 100644 --- a/ui/src/lib/openClawSandboxForm.ts +++ b/ui/src/lib/openClawSandboxForm.ts @@ -65,7 +65,15 @@ export function isClawHarnessBackend(backend: AgentHarnessSandboxBackend | undef return backend === "openclaw" || backend === "nemoclaw"; } +export type HarnessRuntimeForm = "openshell" | "substrate"; + export interface OpenClawSandboxFormSlice { + /** Harness control plane: OpenShell (default) or Agent Substrate. */ + runtime: HarnessRuntimeForm; + substrateWorkerPoolRefName: string; + substrateGatewayToken: string; + /** GCS snapshot prefix (gs://bucket/path/) — required for generated templates. */ + substrateSnapshotsLocation: string; /** Optional override for Sandbox.spec.image (OpenShell VM template image). Empty → controller default. */ image: string; channels: OpenClawChannelRow[]; @@ -80,6 +88,10 @@ export interface OpenClawSandboxFormSlice { export function defaultOpenClawSandboxFormSlice(): OpenClawSandboxFormSlice { return { + runtime: "openshell", + substrateWorkerPoolRefName: "", + substrateGatewayToken: "", + substrateSnapshotsLocation: "gs://ate-snapshots/kagent/", image: "", channels: [], allowedDomains: "", @@ -181,6 +193,9 @@ export function validateOpenClawSandboxForm(args: { if (!mr) { return openClawValidationFail("general", "Please select a model config for this sandbox."); } + if (args.openClaw.runtime === "substrate" && !args.openClaw.substrateGatewayToken.trim()) { + return openClawValidationFail("general", "Substrate gateway token is required."); + } for (const entry of trimSplitList(args.openClaw.allowedDomains)) { if (!isPlausibleAllowedDomainHost(entry)) { @@ -361,11 +376,36 @@ export function buildSandboxCRDraft(args: { } const backend = resolveSandboxBackend(args.backend); + const runtime = args.openClaw.runtime?.trim() || "openshell"; + const spec: Record = { backend, + runtime, modelConfigRef, }; + if (runtime === "substrate") { + const snapshots = args.openClaw.substrateSnapshotsLocation?.trim(); + if (!snapshots) { + return { error: "Substrate snapshots location (gs://…) is required." }; + } + const gatewayToken = args.openClaw.substrateGatewayToken?.trim(); + if (!gatewayToken) { + return { error: "Substrate gateway token is required." }; + } + const substrate: Record = { + gatewayToken, + snapshotsConfig: { location: snapshots }, + }; + const wpName = args.openClaw.substrateWorkerPoolRefName?.trim(); + if (wpName) { + substrate.workerPoolRef = { + name: wpName, + }; + } + spec.substrate = substrate; + } + const desc = args.description.trim(); if (desc) { spec.description = desc; diff --git a/ui/src/lib/openshellSandboxAgents.ts b/ui/src/lib/openshellSandboxAgents.ts index 64c7e45d9d..64f79770c2 100644 --- a/ui/src/lib/openshellSandboxAgents.ts +++ b/ui/src/lib/openshellSandboxAgents.ts @@ -5,6 +5,14 @@ export function isOpenshellSandboxRow(item: AgentResponse): boolean { return Boolean(item.openshellAgentHarness?.gatewaySandboxName); } +export function isSubstrateHarnessRow(item: AgentResponse): boolean { + return Boolean(item.substrateAgentHarness?.gatewayUIPath); +} + +export function isHarnessListRow(item: AgentResponse): boolean { + return isOpenshellSandboxRow(item) || isSubstrateHarnessRow(item); +} + export type OpenshellTerminalLinkParams = { gatewaySandboxName: string; namespace?: string; diff --git a/ui/src/types/index.ts b/ui/src/types/index.ts index b4a441ce8e..08de6dd75b 100644 --- a/ui/src/types/index.ts +++ b/ui/src/types/index.ts @@ -427,6 +427,70 @@ export interface OpenshellAgentHarnessListEntry { endpoint?: string; } +/** Merged into GET /api/agents when AgentHarness.spec.runtime is substrate. */ +export interface SubstrateAgentHarnessListEntry { + backend: string; + runtime: "substrate"; + actorId?: string; + /** Same-origin path for OpenClaw UI (HTTP + WebSocket via kagent proxy to actor pod IP). */ + gatewayUIPath?: string; + modelConfigRef?: string; + backendRefId?: string; + endpoint?: string; +} + +/** GET /api/substrate/status — WorkerPools, ActorTemplates, and ate-api actors/workers. */ +export interface SubstrateStatusResponse { + enabled: boolean; + ateApiError?: string; + workerPools: SubstrateWorkerPoolEntry[]; + actorTemplates: SubstrateActorTemplateEntry[]; + actors: SubstrateActorEntry[]; + workers: SubstrateWorkerEntry[]; +} + +export interface SubstrateWorkerPoolEntry { + namespace: string; + name: string; + replicas: number; + ateomImage: string; +} + +export interface SubstrateActorTemplateEntry { + namespace: string; + name: string; + phase?: string; + goldenActorId?: string; + goldenSnapshot?: string; + workerPoolRef?: string; + harnessName?: string; + managedByKagent: boolean; +} + +export interface SubstrateActorEntry { + actorId: string; + status: string; + actorTemplateNamespace?: string; + actorTemplateName?: string; + ateomPodNamespace?: string; + ateomPodName?: string; + ateomPodIp?: string; + lastSnapshot?: string; + inProgressSnapshot?: string; + version?: number; +} + +export interface SubstrateWorkerEntry { + workerNamespace: string; + workerPool: string; + workerPod: string; + actorNamespace?: string; + actorTemplate?: string; + actorId?: string; + ip?: string; + version?: number; +} + export interface AgentResponse { id: number | string; agent: Agent; @@ -438,6 +502,7 @@ export interface AgentResponse { accepted: boolean; workloadMode?: "deployment" | "sandbox"; openshellAgentHarness?: OpenshellAgentHarnessListEntry; + substrateAgentHarness?: SubstrateAgentHarnessListEntry; } export interface RemoteMCPServer {