Skip to content
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d251e6f
feat: label auth CMs on first interaction and reconcile Shoots on Gre…
Zaggy21 Apr 24, 2026
3748d84
add integration tests for auth CM labeling and Greenhouse CM watch-tr…
Zaggy21 Apr 24, 2026
3a213d6
add note on auth CM labeling and watch-triggered shoot reconciliation
Zaggy21 Apr 24, 2026
f9ac129
feat: trigger shoot reconciliation after OIDC updates (#44)
Zaggy21 Apr 24, 2026
9f5e844
chore: bump Go base image to 1.26.0 to match go.mod (#47)
mikolajkucinski Apr 24, 2026
62c40a9
wait for the greenhouse cache before making assertions
Zaggy21 Apr 27, 2026
b6f9e84
unify the prefix for labels to be shoot-grafter.cloudoperators.dev
Zaggy21 Apr 27, 2026
b519f20
patch the labels on auth configmap instead of updating
Zaggy21 Apr 27, 2026
543fa36
fix: skip auth CM relabel when already owned by another CareInstruction
Zaggy21 Apr 28, 2026
b97b538
fix suggestions
Zaggy21 Apr 28, 2026
af259b5
filter shoots not matching CEL for watch
Zaggy21 Apr 28, 2026
9b637cf
change the AuthConfigMapLabelKey to shoot-grafter.cloudoperators.dev/…
Zaggy21 Apr 28, 2026
13cf18b
restrict watch to specific configmaps, add a timeout to greenhouse ma…
Zaggy21 Apr 29, 2026
4b70d23
change predicate to fire only when Data in config map changed
Zaggy21 Apr 29, 2026
c9f9369
fix lint error
Zaggy21 Apr 29, 2026
1807100
feat: move auth ConfigMap watch from ShootController to CareInstructi…
Zaggy21 May 12, 2026
463bfde
Merge branch 'main' into feat/watch-authenticationconfig-cm-changes
Zaggy21 May 12, 2026
6d1192a
fix goconst issues
Zaggy21 May 12, 2026
a18aa61
fix more goconst issues
Zaggy21 May 12, 2026
a15cbb9
update go version to 1.26.3 and run go-makefile-maker
Zaggy21 May 12, 2026
09d36cc
update golang dependencies
Zaggy21 May 12, 2026
02d38f5
fix README, log CM fetch errors, add patch RBAC verb, handle CM creat…
Zaggy21 May 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
uses: actions/setup-go@v6
with:
check-latest: true
go-version: 1.26.2
go-version: 1.26.3
- name: Run golangci-lint
uses: golangci/golangci-lint-action@v9
with:
Expand All @@ -44,8 +44,6 @@ jobs:
uses: crate-ci/typos@v1
env:
CLICOLOR: "1"
- name: Delete typos binary
run: rm -f typos
- name: Check if source code files have license header
run: make check-addlicense
- name: REUSE Compliance Check
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
uses: actions/setup-go@v6
with:
check-latest: true
go-version: 1.26.2
go-version: 1.26.3
- name: Build all binaries
run: make build-all
code_coverage:
Expand Down Expand Up @@ -65,7 +65,7 @@ jobs:
uses: actions/setup-go@v6
with:
check-latest: true
go-version: 1.26.2
go-version: 1.26.3
- name: Run tests and generate coverage report
run: make test-with-envtest
- name: Archive code coverage results
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
uses: actions/setup-go@v6
with:
check-latest: true
go-version: 1.26.2
go-version: 1.26.3
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
Expand Down
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ prepare-static-check: FORCE install-goimports install-golangci-lint install-shel
# To add additional flags or values (before the default ones), specify the variable in the environment, e.g. `GO_BUILDFLAGS='-tags experimental' make`.
# To override the default flags or values, specify the variable on the command line, e.g. `make GO_BUILDFLAGS='-tags experimental'`.
GO_BUILDFLAGS +=
GO_LDFLAGS +=
GO_TESTFLAGS +=
GO_TESTENV +=
GO_BUILDENV +=
GO_LDFLAGS +=
GO_TESTFLAGS +=
GO_TESTENV +=
GO_BUILDENV +=

build-all: build/shoot-grafter

Expand Down
1 change: 0 additions & 1 deletion Makefile.maker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# NOTE: After running go-makefile-maker, manually apply these changes:
# 1. Add 'branches: [main]' to container-registry-ghcr.yaml for main branch builds
# 2. Change 'make build/cover.out' to 'make test-with-envtest' in ci.yaml for envtest support
# 3. Change 'rm typos' to 'rm -f typos' in checks.yaml

metadata:
url: https://github.com/cloudoperators/shoot-grafter
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,11 @@ For each CareInstruction, a dedicated Shoot controller is dynamically created an
- Extracts cluster connection details (API server URL, CA certificate)
- Creates or updates corresponding Secret resources with OIDC configuration
- Generates Greenhouse Cluster resources with appropriate labels
- Optionally configures OIDC authentication on Shoot clusters for Greenhouse access. Also see respective [Greenhouse docs](https://cloudoperators.github.io/greenhouse/docs/user-guides/cluster/oidc_connectivity/) and [Gardener docs](https://gardener.cloud/docs/guides/administer-shoots/oidc-login/#configure-the-shoot-cluster)
- Optionally configures OIDC authentication on Shoot clusters for Greenhouse access. Also see respective [Greenhouse docs](https://cloudoperators.github.io/greenhouse/docs/user-guides/cluster/oidc-login/) and [Gardener docs](https://gardener.cloud/docs/guides/administer-shoots/oidc-login/#configure-the-shoot-cluster)
- Optionally configures RBAC on the Shoot cluster for Greenhouse access

> **Auth ConfigMap labeling & watch**: When `authenticationConfigMapName` is set, the shoot controller labels the referenced Greenhouse ConfigMap with `shoot-grafter.cloudoperators.dev/auth-configmap: "true"` and `shoot-grafter.cloudoperators.dev/careinstruction: <ci-name>` on first interaction (creation or update). The controller also watches these labeled ConfigMaps on the Greenhouse cluster; any change to the auth ConfigMap automatically re-enqueues all matching Shoots so the Garden-side OIDC configuration stays in sync without waiting for the next Shoot event.
Comment thread
Zaggy21 marked this conversation as resolved.
Outdated
Comment thread
Zaggy21 marked this conversation as resolved.
Outdated

## Custom Resource: CareInstruction

A `CareInstruction` defines the configuration for onboarding Shoots from a specific Garden cluster.
Expand Down Expand Up @@ -208,7 +210,7 @@ spec:
| `shootSelector.expression` | string | No | CEL expression for filtering shoots by status or other fields (max 1024 chars). The shoot object is available as `object` |
| `propagateLabels` | []string | No | List of label keys to copy from Shoot to Greenhouse Cluster |
| `additionalLabels` | map[string]string | No | Additional labels to add to all created Greenhouse Clusters |
| `authenticationConfigMapName` | string | No | Name of ConfigMap in Greenhouse cluster containing AuthenticationConfiguration [(config.yaml with apiserver.config.k8s.io/v1beta1 content)](https://gardener.cloud/docs/guides/administer-shoots/oidc-login/#configure-the-shoot-cluster)|
| `authenticationConfigMapName` | string | No | Name of ConfigMap in Greenhouse cluster containing AuthenticationConfiguration [(config.yaml with apiserver.config.k8s.io/v1beta1 content)](https://gardener.cloud/docs/guides/administer-shoots/oidc-login/#configure-the-shoot-cluster). The ConfigMap is labeled by the shoot controller on first interaction and watched for changes to trigger automatic re-reconciliation of Shoots. |
| `enableRBAC` | bool | No | When false, skips automatic RBAC setup on Shoot clusters (default: true‚) |

*Note: Either `gardenClusterName` or `gardenClusterKubeConfigSecretName` must be provided (priority: kubeconfig secret > cluster name)
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/careinstruction_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const (
CareInstructionLabel = "shoot-grafter.cloudoperators.dev/careinstruction"

// AuthConfigMapLabel is the label used to identify AuthenticationConfiguration ConfigMaps
AuthConfigMapLabel = "shoot-grafter.cloudoperators/authconfigmap"
AuthConfigMapLabel = "shoot-grafter.cloudoperators.dev/auth-configmap"

Comment on lines 35 to 40
// ShootStatusOnboarded indicates the shoot has been onboarded as a Greenhouse Cluster.
ShootStatusOnboarded = "Onboarded"
Expand Down
100 changes: 91 additions & 9 deletions controller/careinstruction/careinstruction_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package careinstruction
import (
"context"
"errors"
"maps"
"reflect"
"sync"

Expand All @@ -18,9 +19,11 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/config"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/predicate"

greenhouseapis "github.com/cloudoperators/greenhouse/api"
greenhousemetav1alpha1 "github.com/cloudoperators/greenhouse/api/meta/v1alpha1"
Expand Down Expand Up @@ -52,6 +55,7 @@ type garden struct {
careInstructionSpec *v1alpha1.CareInstructionSpec // The CareInstruction object for the garden cluster
cancelFunc context.CancelFunc // Cancel function to stop the manager
stopChan chan bool // Channel to know if the manager is stopped
authConfigMapData map[string]string // In-memory cache of the latest auth ConfigMap Data
}

type careInstructionContextKey struct{}
Expand All @@ -64,11 +68,35 @@ type careInstructionContextKey struct{}
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch;delete

func (r *CareInstructionReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Auth ConfigMap predicate: only re-enqueue the CareInstruction when ConfigMap Data changes.
// Label-only patches (e.g. from our own MergeFrom calls in auth.go) are ignored.
authCMDataChangedPredicate := predicate.Funcs{
CreateFunc: func(_ event.CreateEvent) bool { return false },
UpdateFunc: func(e event.UpdateEvent) bool {
Comment thread
Zaggy21 marked this conversation as resolved.
oldCM, ok1 := e.ObjectOld.(*corev1.ConfigMap)
newCM, ok2 := e.ObjectNew.(*corev1.ConfigMap)
if !ok1 || !ok2 {
return false
}
return !maps.Equal(oldCM.Data, newCM.Data)
},
DeleteFunc: func(_ event.DeleteEvent) bool { return false },
}

// Setup the controller with the manager
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.CareInstruction{}).
Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(r.enqueueCareInstructionForGardenCluster), builder.WithPredicates(clientutil.PredicateFilterBySecretTypes(greenhouseapis.SecretTypeKubeConfig, greenhouseapis.SecretTypeOIDCConfig))).
Watches(&greenhousev1alpha1.Cluster{}, handler.EnqueueRequestsFromMapFunc(r.enqueueCareInstructionForCreatedClusters), builder.WithPredicates(clientutil.PredicateHasLabel(v1alpha1.CareInstructionLabel))).
// Watch auth ConfigMaps on the Greenhouse cluster. When their Data changes, re-enqueue the owning
// CareInstruction so reconcileManager detects the change and restarts the ShootController with
// the updated CM data.
Watches(&corev1.ConfigMap{}, handler.EnqueueRequestsFromMapFunc(r.enqueueCareInstructionForAuthConfigMap),
builder.WithPredicates(
clientutil.PredicateHasLabel(v1alpha1.AuthConfigMapLabel),
authCMDataChangedPredicate,
),
).
Comment on lines +91 to +99
Complete(r)
}

Expand Down Expand Up @@ -172,6 +200,7 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
gardenClient: nil,
careInstructionSpec: &careInstruction.Spec,
cancelFunc: nil,
authConfigMapData: nil,
}
}
r.gardensMu.Unlock()
Expand All @@ -197,6 +226,12 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
),
)

// Fetch the current auth ConfigMap data so we can detect changes and pass it to the ShootController.
currentAuthCMData, authCMErr := r.fetchAuthConfigMapData(ctx, &careInstruction)
if authCMErr != nil && !apierrors.IsNotFound(authCMErr) {
r.Info("failed to fetch auth ConfigMap data, will proceed without it", "error", authCMErr)
}

// Now we check the following to see if we need to recreate and restart the manager (with read lock):
r.gardensMu.RLock()
garden := r.gardens[gardenKey]
Expand All @@ -221,9 +256,11 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
channelOpen = true
}
}
// 6. If the auth ConfigMap data has changed, the ShootController must be restarted with the new data
authConfigMapDataChanged := !maps.Equal(garden.authConfigMapData, currentAuthCMData)
r.gardensMu.RUnlock()

if mgrExists && shootControllerStarted && !gardenConfigChanged && !careInstructionSpecChanged && channelExists && channelOpen {
if mgrExists && shootControllerStarted && !gardenConfigChanged && !careInstructionSpecChanged && channelExists && channelOpen && !authConfigMapDataChanged {
r.Info("Manager is running, garden cluster config & careInstruction.Spec is unchanged, skipping client and manager recreation", "careInstruction", careInstruction.Name)
return nil
}
Expand All @@ -241,6 +278,8 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
reason = "stop channel is missing"
case !channelOpen:
reason = "manager stop channel is closed"
case authConfigMapDataChanged:
reason = "auth ConfigMap data has changed"
default:
reason = "unknown reason"
}
Expand Down Expand Up @@ -287,15 +326,17 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
return err
}

// Register the ShootController with the garden manager
// Note: EventRecorder is obtained from the Greenhouse manager to emit events on the Greenhouse cluster
// Register the ShootController with the garden manager.
// Note: EventRecorder is obtained from the Greenhouse manager to emit events on the Greenhouse cluster.
// AuthConfigMapData is passed in-memory from the CareInstruction controller, which owns the auth CM watch.
sc := &shoot.ShootController{
GreenhouseClient: r.Client,
GardenClient: gardenClient,
Logger: r.WithValues("careInstruction", careInstruction.Name),
Name: shoot.GenerateName(careInstruction.Name),
CareInstruction: careInstruction.DeepCopy(),
EventRecorder: r.GetEventRecorderFor(shoot.GenerateName(careInstruction.Name)),
GreenhouseClient: r.Client,
GardenClient: gardenClient,
Logger: r.WithValues("careInstruction", careInstruction.Name),
Name: shoot.GenerateName(careInstruction.Name),
CareInstruction: careInstruction.DeepCopy(),
EventRecorder: r.GetEventRecorderFor(shoot.GenerateName(careInstruction.Name)),
AuthConfigMapData: currentAuthCMData,
}
if err := sc.SetupWithManager(shootControllerMgr); err != nil {
return err
Expand All @@ -313,6 +354,7 @@ func (r *CareInstructionReconciler) reconcileManager(ctx context.Context, careIn
r.gardens[gardenKey].cancelFunc = cancel
r.gardens[gardenKey].stopChan = make(chan bool)
r.gardens[gardenKey].careInstructionSpec = &careInstruction.Spec
r.gardens[gardenKey].authConfigMapData = currentAuthCMData
stopChan := r.gardens[gardenKey].stopChan
r.gardensMu.Unlock()

Expand Down Expand Up @@ -587,3 +629,43 @@ func (r *CareInstructionReconciler) enqueueCareInstructionForCreatedClusters(_ c
},
}
}

// enqueueCareInstructionForAuthConfigMap enqueues the CareInstruction that references the changed auth ConfigMap.
// The CareInstructionLabel on the ConfigMap identifies the owning CareInstruction.
func (r *CareInstructionReconciler) enqueueCareInstructionForAuthConfigMap(_ context.Context, obj client.Object) []ctrl.Request {
cm, ok := obj.(*corev1.ConfigMap)
if !ok {
return nil
}

careInstructionName, exists := cm.Labels[v1alpha1.CareInstructionLabel]
if !exists {
return nil
}

r.Info("Enqueuing CareInstruction for auth ConfigMap change", "configMap", cm.Name, "careInstruction", careInstructionName)
return []ctrl.Request{
{
NamespacedName: client.ObjectKey{
Name: careInstructionName,
Namespace: cm.Namespace,
},
},
}
}

// fetchAuthConfigMapData fetches the current Data of the auth ConfigMap referenced by the CareInstruction.
// Returns nil (no error) when no auth ConfigMap is configured or the CM does not exist yet.
func (r *CareInstructionReconciler) fetchAuthConfigMapData(ctx context.Context, careInstruction *v1alpha1.CareInstruction) (map[string]string, error) {
if careInstruction.Spec.AuthenticationConfigMapName == "" {
return nil, nil
}
var cm corev1.ConfigMap
if err := r.Get(ctx, client.ObjectKey{
Namespace: careInstruction.Namespace,
Name: careInstruction.Spec.AuthenticationConfigMapName,
}, &cm); err != nil {
Comment thread
Zaggy21 marked this conversation as resolved.
return nil, err
}
return cm.Data, nil
}
41 changes: 24 additions & 17 deletions controller/careinstruction/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,41 @@ import (
"shoot-grafter/api/v1alpha1"
)

const (
labelCareInstruction = "care_instruction"
labelNamespace = "namespace"
labelGardenNamespace = "garden_namespace"
labelShootName = "shoot_name"
)

var (
TotalTargetShootsGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "shoot_grafter_total_target_shoots",
Help: "Total number of shoots matching the CareInstruction label selector",
},
[]string{"care_instruction", "namespace", "garden_namespace"},
[]string{labelCareInstruction, labelNamespace, labelGardenNamespace},
)
CreatedClustersGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "shoot_grafter_created_clusters",
Help: "Number of clusters created by the CareInstruction",
},
[]string{"care_instruction", "namespace", "garden_namespace"},
[]string{labelCareInstruction, labelNamespace, labelGardenNamespace},
)
FailedClustersGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "shoot_grafter_failed_clusters",
Help: "Number of clusters failed to be created by the CareInstruction",
},
[]string{"care_instruction", "namespace", "garden_namespace"},
[]string{labelCareInstruction, labelNamespace, labelGardenNamespace},
)
ShootOnboardedGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "shoot_grafter_shoot_onboarded",
Help: "Is shoot onboarded by the CareInstruction",
},
[]string{"care_instruction", "namespace", "garden_namespace", "shoot_name"},
[]string{labelCareInstruction, labelNamespace, labelGardenNamespace, labelShootName},
)
)

Expand All @@ -59,29 +66,29 @@ func UpdateCareInstructionMetrics(careInstruction *v1alpha1.CareInstruction) {

func updateTotalTargetShootsMetric(careInstruction *v1alpha1.CareInstruction) {
metricLabels := prometheus.Labels{
"care_instruction": careInstruction.Name,
"namespace": careInstruction.Namespace,
"garden_namespace": careInstruction.Spec.GardenNamespace,
labelCareInstruction: careInstruction.Name,
labelNamespace: careInstruction.Namespace,
labelGardenNamespace: careInstruction.Spec.GardenNamespace,
}
totalTargetShoots := careInstruction.Status.TotalTargetShoots
TotalTargetShootsGauge.With(metricLabels).Set(float64(totalTargetShoots))
}

func updateCreatedClustersMetric(careInstruction *v1alpha1.CareInstruction) {
metricLabels := prometheus.Labels{
"care_instruction": careInstruction.Name,
"namespace": careInstruction.Namespace,
"garden_namespace": careInstruction.Spec.GardenNamespace,
labelCareInstruction: careInstruction.Name,
labelNamespace: careInstruction.Namespace,
labelGardenNamespace: careInstruction.Spec.GardenNamespace,
}
createdCount := careInstruction.Status.CreatedClusters
CreatedClustersGauge.With(metricLabels).Set(float64(createdCount))
}

func updateFailedClustersMetric(careInstruction *v1alpha1.CareInstruction) {
metricLabels := prometheus.Labels{
"care_instruction": careInstruction.Name,
"namespace": careInstruction.Namespace,
"garden_namespace": careInstruction.Spec.GardenNamespace,
labelCareInstruction: careInstruction.Name,
labelNamespace: careInstruction.Namespace,
labelGardenNamespace: careInstruction.Spec.GardenNamespace,
}
failedCount := careInstruction.Status.FailedClusters
FailedClustersGauge.With(metricLabels).Set(float64(failedCount))
Expand All @@ -90,10 +97,10 @@ func updateFailedClustersMetric(careInstruction *v1alpha1.CareInstruction) {
func updateOnboardedShootsMetrics(careInstruction *v1alpha1.CareInstruction) {
for _, ss := range careInstruction.Status.Shoots {
metricLabels := prometheus.Labels{
"care_instruction": careInstruction.Name,
"namespace": careInstruction.Namespace,
"garden_namespace": careInstruction.Spec.GardenNamespace,
"shoot_name": ss.Name,
labelCareInstruction: careInstruction.Name,
labelNamespace: careInstruction.Namespace,
labelGardenNamespace: careInstruction.Spec.GardenNamespace,
labelShootName: ss.Name,
}
if ss.Status == v1alpha1.ShootStatusOnboarded {
ShootOnboardedGauge.With(metricLabels).Set(float64(1))
Expand Down
Loading
Loading