From 1a2bb1539081011b4e1d0cc229e044934d72b348 Mon Sep 17 00:00:00 2001 From: Jeff Mealo Date: Wed, 10 Dec 2025 12:23:26 -0500 Subject: [PATCH] Fix PVC storage validation and error messages Prevents silent reconciliation failures when override.statefulSet.spec.volumeClaimTemplates is provided with incomplete configuration (e.g., only metadata.annotations without spec.resources.requests.storage). Changes: - Add CRD validation requiring PVC spec field (rejected at admission time) - Detect and error on storage=0 with hints about override behavior - Show actual values in shrink errors: "(existing: 20Gi, desired: 5Gi)" Fixes #2023 --- api/v1beta1/rabbitmqcluster_types.go | 6 +++-- .../bases/rabbitmq.com_rabbitmqclusters.yaml | 2 ++ controllers/reconcile_persistence.go | 25 +++++++++++++++---- docs/api/rabbitmq.com.ref.asciidoc | 2 ++ internal/scaling/scaling.go | 3 ++- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/api/v1beta1/rabbitmqcluster_types.go b/api/v1beta1/rabbitmqcluster_types.go index 2f4b171bb..c069a6767 100644 --- a/api/v1beta1/rabbitmqcluster_types.go +++ b/api/v1beta1/rabbitmqcluster_types.go @@ -351,8 +351,10 @@ type PersistentVolumeClaim struct { // Spec defines the desired characteristics of a volume requested by a pod author. // More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims - // +optional - Spec corev1.PersistentVolumeClaimSpec `json:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"` + // When using override.statefulSet.spec.volumeClaimTemplates, you must provide the complete + // template including spec.resources.requests.storage. Overrides replace the entire template. + // +kubebuilder:validation:Required + Spec corev1.PersistentVolumeClaimSpec `json:"spec" protobuf:"bytes,2,opt,name=spec"` } // TLSSpec allows for the configuration of TLS certificates to be used by RabbitMQ. Also allows for non-TLS traffic to be disabled. diff --git a/config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml b/config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml index d09a0874c..5c36c99f4 100644 --- a/config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml +++ b/config/crd/bases/rabbitmq.com_rabbitmqclusters.yaml @@ -5059,6 +5059,8 @@ spec: volumeName: type: string type: object + required: + - spec type: object type: array type: object diff --git a/controllers/reconcile_persistence.go b/controllers/reconcile_persistence.go index 673b35684..3c5dca6ac 100644 --- a/controllers/reconcile_persistence.go +++ b/controllers/reconcile_persistence.go @@ -14,8 +14,14 @@ import ( func (r *RabbitmqClusterReconciler) reconcilePVC(ctx context.Context, rmq *rabbitmqv1beta1.RabbitmqCluster, desiredSts *appsv1.StatefulSet) error { logger := ctrl.LoggerFrom(ctx) - desiredCapacity := persistenceStorageCapacity(desiredSts.Spec.VolumeClaimTemplates) - err := scaling.NewPersistenceScaler(r.Clientset).Scale(ctx, *rmq, desiredCapacity) + desiredCapacity, err := persistenceStorageCapacity(desiredSts.Spec.VolumeClaimTemplates) + if err != nil { + msg := fmt.Sprintf("Failed to determine PVC capacity: %s", err.Error()) + logger.Error(err, msg) + r.Recorder.Event(rmq, corev1.EventTypeWarning, "FailedReconcilePersistence", msg) + return err + } + err = scaling.NewPersistenceScaler(r.Clientset).Scale(ctx, *rmq, desiredCapacity) if err != nil { msg := fmt.Sprintf("Failed to scale PVCs: %s", err.Error()) logger.Error(fmt.Errorf("hit an error while scaling PVC capacity: %w", err), msg) @@ -24,11 +30,20 @@ func (r *RabbitmqClusterReconciler) reconcilePVC(ctx context.Context, rmq *rabbi return err } -func persistenceStorageCapacity(templates []corev1.PersistentVolumeClaim) k8sresource.Quantity { +func persistenceStorageCapacity(templates []corev1.PersistentVolumeClaim) (k8sresource.Quantity, error) { for _, t := range templates { if t.Name == "persistence" { - return t.Spec.Resources.Requests[corev1.ResourceStorage] + storage := t.Spec.Resources.Requests[corev1.ResourceStorage] + if storage.IsZero() { + return storage, fmt.Errorf( + "PVC template 'persistence' has spec.resources.requests.storage=0 (or missing). " + + "If using override.statefulSet.spec.volumeClaimTemplates, you must provide " + + "the COMPLETE template including spec.resources.requests.storage. " + + "Overrides replace the entire volumeClaimTemplate, not merge with it") + } + return storage, nil } } - return k8sresource.MustParse("0") + // No persistence template found - this is valid for ephemeral storage (storage: "0Gi") + return k8sresource.MustParse("0"), nil } diff --git a/docs/api/rabbitmq.com.ref.asciidoc b/docs/api/rabbitmq.com.ref.asciidoc index 08bcc5daa..6751938a3 100644 --- a/docs/api/rabbitmq.com.ref.asciidoc +++ b/docs/api/rabbitmq.com.ref.asciidoc @@ -114,6 +114,8 @@ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api- | *`spec`* __link:https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#persistentvolumeclaimspec-v1-core[$$PersistentVolumeClaimSpec$$]__ | Spec defines the desired characteristics of a volume requested by a pod author. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims +When using override.statefulSet.spec.volumeClaimTemplates, you must provide the complete +template including spec.resources.requests.storage. Overrides replace the entire template. |=== diff --git a/internal/scaling/scaling.go b/internal/scaling/scaling.go index 3259c5724..8fc78d510 100644 --- a/internal/scaling/scaling.go +++ b/internal/scaling/scaling.go @@ -48,7 +48,8 @@ func (p PersistenceScaler) Scale(ctx context.Context, rmq rabbitmqv1beta1.Rabbit // desired storage capacity is smaller than the current capacity; we can't proceed lest we lose data if err == nil && existingCapacity.Cmp(desiredCapacity) == 1 { - msg := "shrinking persistent volumes is not supported" + msg := fmt.Sprintf("shrinking persistent volumes is not supported (existing: %s, desired: %s)", + existingCapacity.String(), desiredCapacity.String()) logger.Error(errors.New("unsupported operation"), msg) return errors.New(msg) }