diff --git a/cmd/agent-sandbox-controller/main.go b/cmd/agent-sandbox-controller/main.go index 56c21cb23..cf627fd4c 100644 --- a/cmd/agent-sandbox-controller/main.go +++ b/cmd/agent-sandbox-controller/main.go @@ -61,6 +61,7 @@ func main() { var sandboxClaimConcurrentWorkers int var sandboxWarmPoolConcurrentWorkers int var sandboxTemplateConcurrentWorkers int + var clusterDomain string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", true, @@ -86,6 +87,7 @@ func main() { flag.IntVar(&sandboxClaimConcurrentWorkers, "sandbox-claim-concurrent-workers", 1, "Max concurrent reconciles for the SandboxClaim controller") flag.IntVar(&sandboxWarmPoolConcurrentWorkers, "sandbox-warm-pool-concurrent-workers", 1, "Max concurrent reconciles for the SandboxWarmPool controller") flag.IntVar(&sandboxTemplateConcurrentWorkers, "sandbox-template-concurrent-workers", 1, "Max concurrent reconciles for the SandboxTemplate controller") + flag.StringVar(&clusterDomain, "cluster-domain", "cluster.local", "The Kubernetes cluster domain used to construct service FQDNs (e.g. cluster.local)") opts := zap.Options{ Development: true, } @@ -112,6 +114,11 @@ func main() { setupLog.Info("Warning: total concurrent workers exceeds 1000, which could lead to resource exhaustion", "total", totalWorkers) } + if clusterDomain == "" { + setupLog.Error(nil, "cluster-domain must not be empty") + os.Exit(1) + } + if kubeAPIBurst <= 0 { setupLog.Error(nil, "kube-api-burst must be greater than 0") os.Exit(1) @@ -213,9 +220,10 @@ func main() { asmetrics.RegisterSandboxCollector(mgr.GetClient(), mgr.GetLogger().WithName("sandbox-collector")) if err = (&controllers.SandboxReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Tracer: instrumenter, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Tracer: instrumenter, + ClusterDomain: clusterDomain, }).SetupWithManager(mgr, sandboxConcurrentWorkers); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Sandbox") os.Exit(1) diff --git a/controllers/sandbox_controller.go b/controllers/sandbox_controller.go index e788be924..54c40763f 100644 --- a/controllers/sandbox_controller.go +++ b/controllers/sandbox_controller.go @@ -62,6 +62,8 @@ type SandboxReconciler struct { client.Client Scheme *runtime.Scheme Tracer asmetrics.Instrumenter + + ClusterDomain string } //+kubebuilder:rbac:groups=agents.x-k8s.io,resources=sandboxes,verbs=get;list;watch;create;update;patch;delete @@ -290,7 +292,7 @@ func (r *SandboxReconciler) reconcileService(ctx context.Context, sandbox *sandb } } else { log.Info("Found Service", "Service.Namespace", service.Namespace, "Service.Name", service.Name) - setServiceStatus(sandbox, service) + r.setServiceStatus(sandbox, service) return service, nil } @@ -322,15 +324,14 @@ func (r *SandboxReconciler) reconcileService(ctx context.Context, sandbox *sandb return nil, err } - setServiceStatus(sandbox, service) + r.setServiceStatus(sandbox, service) return service, nil } // setServiceStatus updates the sandbox status with the service name and FQDN. -// TODO(barney-s): hardcoded to svc.cluster.local which is the default. Need a way to change it. -func setServiceStatus(sandbox *sandboxv1alpha1.Sandbox, service *corev1.Service) { +func (r *SandboxReconciler) setServiceStatus(sandbox *sandboxv1alpha1.Sandbox, service *corev1.Service) { sandbox.Status.Service = service.Name - sandbox.Status.ServiceFQDN = service.Name + "." + service.Namespace + ".svc.cluster.local" + sandbox.Status.ServiceFQDN = service.Name + "." + service.Namespace + ".svc." + r.ClusterDomain } func (r *SandboxReconciler) reconcilePod(ctx context.Context, sandbox *sandboxv1alpha1.Sandbox, nameHash string) (*corev1.Pod, error) { diff --git a/controllers/sandbox_controller_test.go b/controllers/sandbox_controller_test.go index 522872070..89f8478cd 100644 --- a/controllers/sandbox_controller_test.go +++ b/controllers/sandbox_controller_test.go @@ -505,9 +505,10 @@ func TestReconcile(t *testing.T) { sb.Generation = 1 sb.Spec = tc.sandboxSpec r := SandboxReconciler{ - Client: newFakeClient(append(tc.initialObjs, sb)...), - Scheme: Scheme, - Tracer: asmetrics.NewNoOp(), + Client: newFakeClient(append(tc.initialObjs, sb)...), + Scheme: Scheme, + Tracer: asmetrics.NewNoOp(), + ClusterDomain: "cluster.local", } _, err := r.Reconcile(t.Context(), ctrl.Request{ @@ -961,3 +962,37 @@ func TestSandboxExpiry(t *testing.T) { }) } } + +func TestSetServiceStatus(t *testing.T) { + tests := []struct { + name string + clusterDomain string + wantFQDN string + }{ + { + name: "default cluster domain", + clusterDomain: "cluster.local", + wantFQDN: "my-sandbox.default.svc.cluster.local", + }, + { + name: "custom cluster domain", + clusterDomain: "my-company.local", + wantFQDN: "my-sandbox.default.svc.my-company.local", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + r := &SandboxReconciler{ClusterDomain: tc.clusterDomain} + sandbox := &sandboxv1alpha1.Sandbox{} + service := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-sandbox", + Namespace: "default", + }, + } + r.setServiceStatus(sandbox, service) + require.Equal(t, "my-sandbox", sandbox.Status.Service) + require.Equal(t, tc.wantFQDN, sandbox.Status.ServiceFQDN) + }) + } +} diff --git a/docs/configuration.md b/docs/configuration.md index e2db63eb5..d89cc37ee 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -10,6 +10,12 @@ The `agent-sandbox-controller` supports several command-line flags to tune perfo * `--kube-api-qps` (default: -1 ; no rate limiting): The maximum Queries Per Second (QPS) sent to the Kubernetes API server from the controller. * `--kube-api-burst` (default: 10): The maximum burst for throttle requests to the Kubernetes API server. +## Cluster Settings + +* `--cluster-domain` (default: `cluster.local`): The Kubernetes cluster domain used to + construct service FQDNs. Only change this if your cluster is configured with a non-default + domain (e.g. `my-company.local`). + ## Deployment Example To deploy the controller with custom concurrency settings, modify the `args` of the `agent-sandbox-controller` container within the project's installation manifests.