-
Notifications
You must be signed in to change notification settings - Fork 1
PAAL-123 kubernetes health probes #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
apiVersion: runtime.agentic-layer.ai/v1alpha1 | ||
kind: Agent | ||
metadata: | ||
labels: | ||
app.kubernetes.io/name: agent-runtime-operator | ||
app.kubernetes.io/managed-by: kustomize | ||
name: openai-agent | ||
spec: | ||
framework: custom | ||
image: ghcr.io/example/openai-agent:latest | ||
protocols: | ||
- type: OpenAI # Will generate TCP readiness probe on port 8000 | ||
port: 8000 | ||
replicas: 1 | ||
env: | ||
- name: OPENAI_API_KEY | ||
valueFrom: | ||
secretKeyRef: | ||
name: openai-secret | ||
key: api-key | ||
- name: PORT | ||
value: "8000" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,8 @@ import ( | |
|
||
const ( | ||
agentContainerName = "agent" | ||
agentCardEndpoint = "/.well-known/agent-card.json" | ||
a2AProtocol = "A2A" | ||
) | ||
|
||
// AgentReconciler reconciles a Agent object | ||
|
@@ -356,6 +358,99 @@ func (r *AgentReconciler) mergeEnvironmentVariables(templateEnvVars, userEnvVars | |
return result | ||
} | ||
|
||
// hasA2AProtocol checks if the agent has A2A protocol configured | ||
func (r *AgentReconciler) hasA2AProtocol(agent *runtimev1alpha1.Agent) bool { | ||
for _, protocol := range agent.Spec.Protocols { | ||
|
||
if protocol.Type == a2AProtocol { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
// hasOpenAIProtocol checks if the agent has OpenAI protocol configured | ||
func (r *AgentReconciler) hasOpenAIProtocol(agent *runtimev1alpha1.Agent) bool { | ||
for _, protocol := range agent.Spec.Protocols { | ||
if protocol.Type == "OpenAI" { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
// getA2AProtocol returns the first A2A protocol configuration found | ||
func (r *AgentReconciler) getA2AProtocol(agent *runtimev1alpha1.Agent) *runtimev1alpha1.AgentProtocol { | ||
for _, protocol := range agent.Spec.Protocols { | ||
if protocol.Type == a2AProtocol { | ||
return &protocol | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// getOpenAIProtocol returns the first OpenAI protocol configuration found | ||
func (r *AgentReconciler) getOpenAIProtocol(agent *runtimev1alpha1.Agent) *runtimev1alpha1.AgentProtocol { | ||
for _, protocol := range agent.Spec.Protocols { | ||
if protocol.Type == "OpenAI" { | ||
return &protocol | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// getProtocolPort returns the port from protocol or default if not specified | ||
func (r *AgentReconciler) getProtocolPort(protocol *runtimev1alpha1.AgentProtocol) int32 { | ||
defaultPort := int32(8000) // Default port if none specified | ||
if protocol != nil && protocol.Port != 0 { | ||
return protocol.Port | ||
} | ||
return defaultPort | ||
} | ||
|
||
// getA2AHealthPath returns the A2A health check path based on protocol configuration | ||
func (r *AgentReconciler) getA2AHealthPath(protocol *runtimev1alpha1.AgentProtocol) string { | ||
basePath := agentCardEndpoint | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if protocol != nil && protocol.Path != "" { | ||
// If path is explicitly specified, use it | ||
// Special case: "/" means root path (no prefix) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see a need for a special case here. The default is an empty path. If a user explicitly configures |
||
if protocol.Path == "/" { | ||
return basePath | ||
} | ||
return protocol.Path + basePath | ||
} | ||
// Default for agents without protocol specification or path | ||
return "/a2a" + basePath | ||
qa-jil-kamerling marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// generateReadinessProbe generates appropriate readiness probe based on agent protocols | ||
func (r *AgentReconciler) generateReadinessProbe(agent *runtimev1alpha1.Agent) *corev1.Probe { | ||
// Check if agent has external dependencies (subAgents or tools) | ||
|
||
// Priority: A2A > OpenAI > None | ||
if r.hasA2AProtocol(agent) { | ||
// Use A2A agent card endpoint for health check | ||
a2aProtocol := r.getA2AProtocol(agent) | ||
healthPath := r.getA2AHealthPath(a2aProtocol) | ||
port := r.getProtocolPort(a2aProtocol) | ||
|
||
probe := r.buildA2AReadinessProbe(healthPath, port) | ||
|
||
return probe | ||
} else if r.hasOpenAIProtocol(agent) { | ||
// Use TCP probe for OpenAI-only agents | ||
openaiProtocol := r.getOpenAIProtocol(agent) | ||
port := r.getProtocolPort(openaiProtocol) | ||
|
||
probe := r.buildOpenAIReadinessProbe(port) | ||
|
||
return probe | ||
} | ||
|
||
// No recognized protocols - no readiness probe | ||
return nil | ||
} | ||
|
||
// createDeploymentForAgent creates a deployment for the given Agent | ||
func (r *AgentReconciler) createDeploymentForAgent(agent *runtimev1alpha1.Agent, deploymentName string) (*appsv1.Deployment, error) { | ||
replicas := agent.Spec.Replicas | ||
|
@@ -416,11 +511,12 @@ func (r *AgentReconciler) createDeploymentForAgent(agent *runtimev1alpha1.Agent, | |
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: agentContainerName, | ||
Image: agentImage, | ||
Ports: containerPorts, | ||
Env: allEnvVars, | ||
EnvFrom: agent.Spec.EnvFrom, | ||
Name: agentContainerName, | ||
Image: agentImage, | ||
Ports: containerPorts, | ||
Env: allEnvVars, | ||
EnvFrom: agent.Spec.EnvFrom, | ||
ReadinessProbe: r.generateReadinessProbe(agent), | ||
}, | ||
}, | ||
}, | ||
|
@@ -524,6 +620,11 @@ func (r *AgentReconciler) needsDeploymentUpdate(existing, desired *appsv1.Deploy | |
return true | ||
} | ||
|
||
// Check readiness probes | ||
if !r.probesEqual(existingContainer.ReadinessProbe, desiredContainer.ReadinessProbe) { | ||
return true | ||
} | ||
|
||
return false | ||
} | ||
|
||
|
@@ -559,6 +660,7 @@ func (r *AgentReconciler) updateAgentContainer(deployment, desiredDeployment *ap | |
agentContainer.Ports = desiredAgentContainer.Ports | ||
agentContainer.Env = desiredAgentContainer.Env | ||
agentContainer.EnvFrom = desiredAgentContainer.EnvFrom | ||
agentContainer.ReadinessProbe = desiredAgentContainer.ReadinessProbe | ||
|
||
return nil | ||
} | ||
|
@@ -663,6 +765,11 @@ func (r *AgentReconciler) servicePortsEqual(existing, desired []corev1.ServicePo | |
return true | ||
} | ||
|
||
// probesEqual compares two readiness probes for equality | ||
func (r *AgentReconciler) probesEqual(existing, desired *corev1.Probe) bool { | ||
qa-jil-kamerling marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return equality.ProbesEqual(existing, desired) | ||
} | ||
|
||
// sanitizeAgentName sanitizes the agent name to meet environment variable naming requirements. | ||
// Environment variable names should start with a letter (a-z, A-Z) or underscore (_), | ||
// and can only contain letters, digits (0-9), and underscores. | ||
|
@@ -709,7 +816,7 @@ func (r *AgentReconciler) sanitizeAgentName(name string) string { | |
func (r *AgentReconciler) buildA2AAgentCardUrl(agent *runtimev1alpha1.Agent) string { | ||
// Find the A2A protocol | ||
for _, protocol := range agent.Spec.Protocols { | ||
if protocol.Type == "A2A" { | ||
if protocol.Type == a2AProtocol { | ||
return fmt.Sprintf("http://%s.%s.svc.cluster.local:%d%s", | ||
agent.Name, agent.Namespace, protocol.Port, protocol.Path) | ||
} | ||
|
@@ -726,3 +833,36 @@ func (r *AgentReconciler) SetupWithManager(mgr ctrl.Manager) error { | |
Named("agent"). | ||
Complete(r) | ||
} | ||
|
||
// buildOpenAIReadinessProbe creates TCP-readiness-probe for OpenAI-protocols | ||
func (r *AgentReconciler) buildOpenAIReadinessProbe(port int32) *corev1.Probe { | ||
return &corev1.Probe{ | ||
ProbeHandler: corev1.ProbeHandler{ | ||
TCPSocket: &corev1.TCPSocketAction{ | ||
Port: intstr.FromInt(int(port)), | ||
}, | ||
}, | ||
InitialDelaySeconds: 60, | ||
PeriodSeconds: 10, | ||
TimeoutSeconds: 3, | ||
SuccessThreshold: 1, | ||
FailureThreshold: 3, | ||
} | ||
} | ||
|
||
// buildA2AReadinessProbe creates HTTP-readiness-probe for A2A-protocols. | ||
func (r *AgentReconciler) buildA2AReadinessProbe(healthPath string, port int32) *corev1.Probe { | ||
return &corev1.Probe{ | ||
ProbeHandler: corev1.ProbeHandler{ | ||
HTTPGet: &corev1.HTTPGetAction{ | ||
Path: healthPath, | ||
Port: intstr.FromInt(int(port)), | ||
}, | ||
}, | ||
InitialDelaySeconds: 60, | ||
PeriodSeconds: 10, | ||
TimeoutSeconds: 3, | ||
SuccessThreshold: 1, | ||
FailureThreshold: 3, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There it is! 🙈 I've looked for the source of the openai-agent recently in our cluster...
Please do not include invalid agents in the sample directory. It should only include working examples.