Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,18 @@ SKILLS_INIT_IMAGE_NAME ?= skills-init
CONTROLLER_IMAGE_TAG ?= $(VERSION)
UI_IMAGE_TAG ?= $(VERSION)
APP_IMAGE_TAG ?= $(VERSION)
APP_FULL_IMAGE_TAG ?= $(VERSION)-full
Comment thread
jmhbh marked this conversation as resolved.
KAGENT_ADK_IMAGE_TAG ?= $(VERSION)
KAGENT_ADK_FULL_IMAGE_TAG ?= $(VERSION)-full
GOLANG_ADK_IMAGE_TAG ?= $(VERSION)
GOLANG_ADK_FULL_IMAGE_TAG ?= $(VERSION)-full
SKILLS_INIT_IMAGE_TAG ?= $(VERSION)
CONTROLLER_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(CONTROLLER_IMAGE_NAME):$(CONTROLLER_IMAGE_TAG)
UI_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(UI_IMAGE_NAME):$(UI_IMAGE_TAG)
APP_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(APP_IMAGE_NAME):$(APP_IMAGE_TAG)
APP_FULL_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(APP_IMAGE_NAME):$(APP_FULL_IMAGE_TAG)
KAGENT_ADK_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(KAGENT_ADK_IMAGE_NAME):$(KAGENT_ADK_IMAGE_TAG)
KAGENT_ADK_FULL_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(KAGENT_ADK_IMAGE_NAME):$(KAGENT_ADK_FULL_IMAGE_TAG)
GOLANG_ADK_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(GOLANG_ADK_IMAGE_NAME):$(GOLANG_ADK_IMAGE_TAG)
GOLANG_ADK_FULL_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(GOLANG_ADK_IMAGE_NAME):$(GOLANG_ADK_FULL_IMAGE_TAG)
SKILLS_INIT_IMG ?= $(DOCKER_REGISTRY)/$(DOCKER_REPO)/$(SKILLS_INIT_IMAGE_NAME):$(SKILLS_INIT_IMAGE_TAG)
Expand Down Expand Up @@ -197,12 +201,14 @@ build-all: buildx-create

.PHONY: build
build: ## Build and push all component images
build: buildx-create build-ui build-skills-init build-golang-adk build-golang-adk-full build-app build-controller
build: buildx-create build-ui build-skills-init build-golang-adk build-golang-adk-full build-app build-app-full build-controller
@echo "Build completed successfully."
@echo "Controller Image: $(CONTROLLER_IMG)"
@echo "UI Image: $(UI_IMG)"
@echo "App Image: $(APP_IMG)"
@echo "App Full Image: $(APP_FULL_IMG)"
@echo "Kagent ADK Image: $(KAGENT_ADK_IMG)"
@echo "Kagent ADK Full Image: $(KAGENT_ADK_FULL_IMG)"
@echo "Golang ADK Image: $(GOLANG_ADK_IMG)"
@echo "Golang ADK Full Image: $(GOLANG_ADK_FULL_IMG)"
@echo "Skills Init Image: $(SKILLS_INIT_IMG)"
Expand Down Expand Up @@ -230,7 +236,9 @@ build-img-versions: ## Print the fully-qualified image tags for all components
@echo controller=$(CONTROLLER_IMG)
@echo ui=$(UI_IMG)
@echo app=$(APP_IMG)
@echo app-full=$(APP_FULL_IMG)
@echo kagent-adk=$(KAGENT_ADK_IMG)
@echo kagent-adk-full=$(KAGENT_ADK_FULL_IMG)
@echo golang-adk=$(GOLANG_ADK_IMG)
@echo golang-adk-full=$(GOLANG_ADK_FULL_IMG)
@echo skills-init=$(SKILLS_INIT_IMG)
Expand All @@ -242,10 +250,11 @@ controller-manifests: ## Regenerate CRD manifests and copy them into the Helm ch

.PHONY: build-controller
build-controller: ## Build and push the controller image (embeds agent runtime digests via scripts/controller-digest-ldflags.sh)
build-controller: buildx-create controller-manifests build-app build-golang-adk build-golang-adk-full
build-controller: buildx-create controller-manifests build-app build-app-full build-golang-adk build-golang-adk-full
@set -e; \
DIGEST_LDFLAGS=$$(CONTAINER_RUNTIME=$(CONTAINER_RUNTIME) \
APP_IMG=$(APP_IMG) \
APP_FULL_IMG=$(APP_FULL_IMG) \
GOLANG_ADK_IMG=$(GOLANG_ADK_IMG) \
GOLANG_ADK_FULL_IMG=$(GOLANG_ADK_FULL_IMG) \
./scripts/controller-digest-ldflags.sh); \
Expand All @@ -268,11 +277,23 @@ build-kagent-adk: buildx-create
$(DOCKER_PUSH) $(KAGENT_ADK_IMG)

.PHONY: build-app
build-app: ## Build and push the app image (depends on kagent-adk)
build-app: ## Build and push the app image (distroless slim; depends on kagent-adk)
build-app: buildx-create build-kagent-adk
$(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) --build-arg KAGENT_ADK_VERSION=$(KAGENT_ADK_IMAGE_TAG) --build-arg DOCKER_REGISTRY=$(DOCKER_REGISTRY) -t $(APP_IMG) -f python/Dockerfile.app ./python
$(DOCKER_PUSH) $(APP_IMG)

.PHONY: build-kagent-adk-full
build-kagent-adk-full: ## Build and push the full Python kagent ADK image (includes sandbox runtime)
build-kagent-adk-full: buildx-create
$(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) -t $(KAGENT_ADK_FULL_IMG) -f python/Dockerfile.full ./python
$(DOCKER_PUSH) $(KAGENT_ADK_FULL_IMG)

.PHONY: build-app-full
build-app-full: ## Build and push the full app image (sandbox runtime; depends on kagent-adk-full)
build-app-full: buildx-create build-kagent-adk-full
$(DOCKER_BUILDER) $(DOCKER_BUILD_ARGS) $(TOOLS_IMAGE_BUILD_ARGS) --build-arg KAGENT_ADK_VERSION=$(KAGENT_ADK_FULL_IMAGE_TAG) --build-arg DOCKER_REGISTRY=$(DOCKER_REGISTRY) -t $(APP_FULL_IMG) -f python/Dockerfile.app ./python
$(DOCKER_PUSH) $(APP_FULL_IMG)

.PHONY: build-golang-adk
build-golang-adk: ## Build and push the Go ADK image
build-golang-adk: buildx-create
Expand Down
2 changes: 0 additions & 2 deletions go/api/config/crd/bases/kagent.dev_sandboxagents.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11339,8 +11339,6 @@ spec:
rule: '!has(self.skills) || self.platform != ''substrate'''
- message: spec.substrate may only be set when spec.platform is substrate
rule: '!has(self.substrate) || self.platform == ''substrate'''
- message: BYO agents are not supported when spec.platform is substrate
rule: '!has(self.type) || self.type != ''BYO'' || self.platform != ''substrate'''
- message: type must be specified
rule: has(self.type)
- message: type must be either Declarative or BYO
Expand Down
8 changes: 7 additions & 1 deletion go/api/v1alpha2/agent_runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@ func TestEffectiveDeclarativeRuntimeForAgent(t *testing.T) {
require.Equal(t, DeclarativeRuntime_Python, EffectiveDeclarativeRuntimeForAgent(agent))
})

t.Run("SandboxAgent on substrate uses Go", func(t *testing.T) {
t.Run("SandboxAgent on substrate honors configured runtime", func(t *testing.T) {
sa := &SandboxAgent{Spec: SandboxAgentSpec{AgentSpec: substrateSpec, Platform: SandboxPlatformSubstrate}}
require.Equal(t, DeclarativeRuntime_Python, EffectiveDeclarativeRuntimeForAgent(sa))
})

t.Run("SandboxAgent on substrate honors Go runtime when set", func(t *testing.T) {
goSpec := AgentSpec{Type: AgentType_Declarative, Declarative: &DeclarativeAgentSpec{Runtime: DeclarativeRuntime_Go}}
sa := &SandboxAgent{Spec: SandboxAgentSpec{AgentSpec: goSpec, Platform: SandboxPlatformSubstrate}}
require.Equal(t, DeclarativeRuntime_Go, EffectiveDeclarativeRuntimeForAgent(sa))
})

Expand Down
23 changes: 11 additions & 12 deletions go/api/v1alpha2/agent_spec_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package v1alpha2
import "fmt"

const (
substrateSandboxSkillsUnsupportedMsg = "spec.skills is not supported when spec.platform is substrate"
substrateSandboxPythonRuntimeUnsupportedMsg = "spec.declarative.runtime must be \"go\" when spec.platform is substrate"
substrateSandboxBYOUnsupportedMsg = "BYO agents are not supported when spec.platform is substrate"
substrateSandboxSkillsUnsupportedMsg = "spec.skills is not supported when spec.platform is substrate"
substrateSandboxBYOMissingCommandMsg = "BYO agents on substrate must set spec.byo.deployment.cmd (substrate does not fall back to the image entrypoint)"
)

// AgentSpecHasSkills reports whether the spec configures any skill sources.
Expand All @@ -18,23 +17,23 @@ func AgentSpecHasSkills(spec *AgentSpec) bool {
}

// ValidateSubstrateSandboxAgentSpec rejects substrate sandbox configurations that kagent
// does not support yet (for example declarative skills on Agent Substrate).
// does not support yet (for example declarative skills on Agent Substrate). Declarative
// Python/Go and BYO (Go/Python) agents are supported; BYO agents must provide an explicit
// command because substrate copies the container Command verbatim with no image-entrypoint
// fallback.
func ValidateSubstrateSandboxAgentSpec(agent *SandboxAgent) error {
if agent == nil || AgentSandboxPlatform(agent) != SandboxPlatformSubstrate {
return nil
}
spec := agent.GetAgentSpec()
if spec.Type == AgentType_BYO {
return fmt.Errorf("%s", substrateSandboxBYOUnsupportedMsg)
}
if AgentSpecHasSkills(spec) {
return fmt.Errorf("%s", substrateSandboxSkillsUnsupportedMsg)
}
if spec.Type == AgentType_Declarative &&
spec.Declarative != nil &&
spec.Declarative.Runtime != "" &&
spec.Declarative.Runtime != DeclarativeRuntime_Go {
return fmt.Errorf("%s", substrateSandboxPythonRuntimeUnsupportedMsg)
if spec.Type == AgentType_BYO {
dep := spec.BYO
if dep == nil || dep.Deployment == nil || dep.Deployment.Cmd == nil || *dep.Deployment.Cmd == "" {
return fmt.Errorf("%s", substrateSandboxBYOMissingCommandMsg)
}
Comment thread
jmhbh marked this conversation as resolved.
}
return nil
}
26 changes: 19 additions & 7 deletions go/api/v1alpha2/agent_spec_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func TestValidateSubstrateSandboxAgentSpec(t *testing.T) {
require.Contains(t, err.Error(), substrateSandboxSkillsUnsupportedMsg)
})

t.Run("rejects python runtime on substrate platform", func(t *testing.T) {
t.Run("allows python runtime on substrate platform", func(t *testing.T) {
agent := &SandboxAgent{
Spec: SandboxAgentSpec{
Platform: SandboxPlatformSubstrate,
Expand All @@ -48,24 +48,36 @@ func TestValidateSubstrateSandboxAgentSpec(t *testing.T) {
},
},
}
err := ValidateSubstrateSandboxAgentSpec(agent)
require.Error(t, err)
require.Contains(t, err.Error(), substrateSandboxPythonRuntimeUnsupportedMsg)
require.NoError(t, ValidateSubstrateSandboxAgentSpec(agent))
})

t.Run("rejects BYO agents on substrate platform", func(t *testing.T) {
t.Run("rejects BYO agents without an explicit command on substrate platform", func(t *testing.T) {
agent := &SandboxAgent{
Spec: SandboxAgentSpec{
Platform: SandboxPlatformSubstrate,
AgentSpec: AgentSpec{
Type: AgentType_BYO,
BYO: &BYOAgentSpec{},
BYO: &BYOAgentSpec{Deployment: &ByoDeploymentSpec{Image: "example/agent:latest"}},
},
},
}
err := ValidateSubstrateSandboxAgentSpec(agent)
require.Error(t, err)
require.Contains(t, err.Error(), substrateSandboxBYOUnsupportedMsg)
require.Contains(t, err.Error(), substrateSandboxBYOMissingCommandMsg)
})

t.Run("allows BYO agents with an explicit command on substrate platform", func(t *testing.T) {
cmd := "/app"
agent := &SandboxAgent{
Spec: SandboxAgentSpec{
Platform: SandboxPlatformSubstrate,
AgentSpec: AgentSpec{
Type: AgentType_BYO,
BYO: &BYOAgentSpec{Deployment: &ByoDeploymentSpec{Image: "example/agent:latest", Cmd: &cmd}},
},
},
}
require.NoError(t, ValidateSubstrateSandboxAgentSpec(agent))
})

t.Run("allows BYO agents on agent-sandbox platform", func(t *testing.T) {
Expand Down
12 changes: 3 additions & 9 deletions go/api/v1alpha2/agent_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,16 +282,10 @@ func EffectiveDeclarativeRuntime(spec *AgentSpec) DeclarativeRuntime {
}

// EffectiveDeclarativeRuntimeForAgent returns the runtime for a reconciled agent object.
// Substrate SandboxAgents always use Go; regular Agents honor spec.declarative.runtime.
// All agents (including substrate SandboxAgents) honor spec.declarative.runtime, defaulting
// to Python when unset.
func EffectiveDeclarativeRuntimeForAgent(agent AgentObject) DeclarativeRuntime {
Comment thread
jmhbh marked this conversation as resolved.
Outdated
spec := agent.GetAgentSpec()
if agent.GetWorkloadMode() == WorkloadModeSandbox &&
AgentSandboxPlatform(agent) == SandboxPlatformSubstrate &&
spec != nil &&
spec.Type == AgentType_Declarative {
return DeclarativeRuntime_Go
}
return EffectiveDeclarativeRuntime(spec)
return EffectiveDeclarativeRuntime(agent.GetAgentSpec())
}

// NetworkConfig configures outbound network access for sandboxed execution paths.
Expand Down
1 change: 0 additions & 1 deletion go/api/v1alpha2/sandboxagent_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ type SandboxAgent struct {

// +kubebuilder:validation:XValidation:rule="!has(self.skills) || self.platform != 'substrate'",message="spec.skills is not supported when spec.platform is substrate"
// +kubebuilder:validation:XValidation:rule="!has(self.substrate) || self.platform == 'substrate'",message="spec.substrate may only be set when spec.platform is substrate"
// +kubebuilder:validation:XValidation:rule="!has(self.type) || self.type != 'BYO' || self.platform != 'substrate'",message="BYO agents are not supported when spec.platform is substrate"
type SandboxAgentSpec struct {
AgentSpec `json:",inline"`

Expand Down
36 changes: 36 additions & 0 deletions go/core/internal/controller/sandboxagent_substrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,45 @@ func (r *SandboxAgentController) reconcileSubstrateSandboxAgent(ctx context.Cont
}
return ctrl.Result{Requeue: true}, nil
}

if !r.reconcileSubstrateBlueGreen(ctx, sa) {
// Retiring superseded templates / their goldens advances one ate-api step per pass;
// requeue until the rollout converges so old templates and goldens are cleaned up.
return ctrl.Result{RequeueAfter: agentHarnessNotReadyRequeue}, nil
}
return ctrl.Result{}, nil
}

// reconcileSubstrateBlueGreen drives the cleanup half of a config-change rollout: it retires
// ActorTemplates superseded by a newer Ready one (deleting each old template with its now-Suspended
// golden), and best-effort reaps stale per-session actors. The new template keeps serving the old
// golden until its own golden is Ready (see ResolveCurrentActorTemplate), so this never causes
// downtime. Returns true when nothing more remains to retire. Errors are logged, not surfaced, so a
// transient ate-api failure doesn't wedge reconciliation.
func (r *SandboxAgentController) reconcileSubstrateBlueGreen(ctx context.Context, sa *v1alpha2.SandboxAgent) bool {
Comment thread
jmhbh marked this conversation as resolved.
Outdated
if r.SubstrateLifecycle == nil {
return true
}
retireDone, err := r.SubstrateLifecycle.RetireSupersededTemplates(ctx, sa)
if err != nil {
sandboxAgentControllerLog.Info("retiring superseded substrate templates failed (will retry)",
"sandboxagent", sa.Namespace+"/"+sa.Name, "err", err.Error())
return true
}

// Best-effort reap of stale session actors keyed to a previous config. Not required for
// correctness (config-hashed ids mean they're never reused), so failures don't requeue.
if r.SubstrateActorBackend != nil {
Comment thread
jmhbh marked this conversation as resolved.
Outdated
if active, err := substrate.ResolveCurrentActorTemplate(ctx, r.Client, sa.Namespace, sa.Name); err == nil && active != nil {
if _, err := r.SubstrateActorBackend.ReapStaleSessionActors(ctx, sa, active.Name); err != nil {
sandboxAgentControllerLog.Info("reap of stale substrate session actors failed (will retry)",
"sandboxagent", sa.Namespace+"/"+sa.Name, "err", err.Error())
}
}
}
return retireDone
}

func (r *SandboxAgentController) reconcileSubstrateSandboxAgentDelete(ctx context.Context, sa *v1alpha2.SandboxAgent) (ctrl.Result, error) {
if !controllerutil.ContainsFinalizer(sa, sandboxAgentSubstrateFinalizer) {
return ctrl.Result{}, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,11 @@ var DefaultImageConfig = ImageConfig{
Repository: "kagent-dev/kagent/app",
}

// PythonADKImageDigest, GoADKImageDigest, and GoADKFullImageDigest are set at
// controller link time from the pushed runtime image manifest digests.
// PythonADKImageDigest, PythonADKFullImageDigest, GoADKImageDigest, and GoADKFullImageDigest
// are set at controller link time from the pushed runtime image manifest digests. The "full"
// variants bundle the sandbox runtime (code execution / bash tools); the slim variants do not.
var PythonADKImageDigest string
var PythonADKFullImageDigest string
var GoADKImageDigest string
var GoADKFullImageDigest string

Expand Down
17 changes: 12 additions & 5 deletions go/core/internal/controller/translator/agent/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,20 @@ func validateExtraContainers(containers []corev1.Container) error {
return nil
}

func resolvePythonRuntimeImage(registry string) (string, error) {
func resolvePythonRuntimeImage(registry string, full bool) (string, error) {
repo := DefaultImageConfig.Repository
if d := normalizeImageDigest(PythonADKImageDigest); d != "" {
digest := PythonADKImageDigest
imageLabel := "app"
if full {
digest = PythonADKFullImageDigest
imageLabel = "app-full"
}
if d := normalizeImageDigest(digest); d != "" {
return fmt.Sprintf("%s/%s@%s", registry, repo, d), nil
}
return "", fmt.Errorf(
"app image digest is not set at link time; rebuild the controller after pushing agent runtime images",
"%s image digest is not set at link time; rebuild the controller after pushing agent runtime images",
imageLabel,
)
}

Expand Down Expand Up @@ -182,7 +189,7 @@ func resolveInlineDeployment(agent v1alpha2.AgentObject, mdd *modelDeploymentDat
}

var image string
full := runtime == v1alpha2.DeclarativeRuntime_Go && needsSRTSettings(agent, specRef.Sandbox)
full := needsSRTSettings(agent, specRef.Sandbox)
switch runtime {
case v1alpha2.DeclarativeRuntime_Go:
var err error
Expand All @@ -192,7 +199,7 @@ func resolveInlineDeployment(agent v1alpha2.AgentObject, mdd *modelDeploymentDat
}
default:
var err error
image, err = resolvePythonRuntimeImage(registry)
image, err = resolvePythonRuntimeImage(registry, full)
if err != nil {
return nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

func TestMain(m *testing.M) {
translator.PythonADKImageDigest = "sha256:test-app"
translator.PythonADKFullImageDigest = "sha256:test-app-full"
translator.GoADKImageDigest = "sha256:test-go-base"
translator.GoADKFullImageDigest = "sha256:test-go-full"
os.Exit(m.Run())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,33 @@ func TestResolveGoRuntimeImageWithoutDigest(t *testing.T) {

func TestResolvePythonRuntimeImageWithDigest(t *testing.T) {
original := PythonADKImageDigest
originalFull := PythonADKFullImageDigest
t.Cleanup(func() {
PythonADKImageDigest = original
PythonADKFullImageDigest = originalFull
})
PythonADKImageDigest = "sha256:app-digest"
PythonADKFullImageDigest = "sha256:app-full-digest"

got, err := resolvePythonRuntimeImage("cr.kagent.dev")
got, err := resolvePythonRuntimeImage("cr.kagent.dev", false)
require.NoError(t, err)
require.Equal(t, "cr.kagent.dev/kagent-dev/kagent/app@sha256:app-digest", got)

gotFull, err := resolvePythonRuntimeImage("cr.kagent.dev", true)
require.NoError(t, err)
require.Equal(t, "cr.kagent.dev/kagent-dev/kagent/app@sha256:app-full-digest", gotFull)
}

func TestResolvePythonFullRuntimeImageWithoutDigest(t *testing.T) {
original := PythonADKFullImageDigest
t.Cleanup(func() {
PythonADKFullImageDigest = original
})
PythonADKFullImageDigest = ""

_, err := resolvePythonRuntimeImage("cr.kagent.dev", true)
require.Error(t, err)
require.Contains(t, err.Error(), "app-full")
}

func TestPythonADKImageDigestSupportsLinkerFlag(t *testing.T) {
Expand All @@ -103,7 +122,7 @@ func TestResolvePythonRuntimeImageWithoutDigest(t *testing.T) {
})
PythonADKImageDigest = ""

_, err := resolvePythonRuntimeImage("cr.kagent.dev")
_, err := resolvePythonRuntimeImage("cr.kagent.dev", false)
require.Error(t, err)
require.Contains(t, err.Error(), "app")
}
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
"value": "/config/srt-settings.json"
}
],
"image": "cr.kagent.dev/kagent-dev/kagent/app@sha256:test-app",
"image": "cr.kagent.dev/kagent-dev/kagent/app@sha256:test-app-full",
"imagePullPolicy": "IfNotPresent",
"name": "kagent",
"ports": [
Expand Down
Loading
Loading