Skip to content
Open
7 changes: 7 additions & 0 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,13 @@ type Rack struct { //nolint:govet // for readability
// Effective/operative PodSpec. The resultant is user input if specified else global PodSpec
// +optional
PodSpec RackPodSpec `json:"effectivePodSpec,omitempty"`

// ForceBlockFromRoster, when set to true, forcefully removes all nodes in this rack from the roster.
// With this enabled, the operator ignores unavailable or dead partitions and proceeds with node removal.
// Additionally, any failed or pending pods in this rack are also ignored during this process.
// It may result in data loss. Use this with caution.
// +optional
ForceBlockFromRoster *bool `json:"forceBlockFromRoster,omitempty"`
}

// ValidationPolicySpec controls validation of the Aerospike cluster resource.
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6816,6 +6816,13 @@ spec:
- name
x-kubernetes-list-type: map
type: object
forceBlockFromRoster:
description: |-
ForceBlockFromRoster, when set to true, forcefully removes all nodes in this rack from the roster.
With this enabled, the operator ignores unavailable or dead partitions and proceeds with node removal.
Additionally, any failed or pending pods in this rack are also ignored during this process.
It may result in data loss. Use this with caution.
type: boolean
id:
description: Identifier for the rack
type: integer
Expand Down Expand Up @@ -16088,6 +16095,13 @@ spec:
- name
x-kubernetes-list-type: map
type: object
forceBlockFromRoster:
description: |-
ForceBlockFromRoster, when set to true, forcefully removes all nodes in this rack from the roster.
With this enabled, the operator ignores unavailable or dead partitions and proceeds with node removal.
Additionally, any failed or pending pods in this rack are also ignored during this process.
It may result in data loss. Use this with caution.
type: boolean
id:
description: Identifier for the rack
type: integer
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.23.11
require (
github.com/aerospike/aerospike-backup-service/v3 v3.2.0
github.com/aerospike/aerospike-client-go/v8 v8.2.2
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250829065008-0363517825bc
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924160556-d98795e04f2d
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/deckarep/golang-set/v2 v2.8.0
github.com/evanphx/json-patch v4.12.0+incompatible
Expand Down
12 changes: 10 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@ github.com/aerospike/aerospike-backup-service/v3 v3.2.0 h1:vqvT7OE1didb/R4vOTNWI
github.com/aerospike/aerospike-backup-service/v3 v3.2.0/go.mod h1:aSD1pSRjyMYQ1UW2Kkg/vinn+ngeobKzDIVj+UZIi2c=
github.com/aerospike/aerospike-client-go/v8 v8.2.2 h1:NV1GxB+ATUb1cQtwaIS731A/6EkwuAX4/heh8CpvQOI=
github.com/aerospike/aerospike-client-go/v8 v8.2.2/go.mod h1:H6CzKDoHxBj1yY/oQPci1bUIbEx2ATQtJ2GtZ+N64Wg=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250829065008-0363517825bc h1:/vlU9v4xNL/e0Lpa+OXixHLqZQedvsiK385H7rMWOlk=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250829065008-0363517825bc/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250912062927-f5201cbc4c08 h1:AoY6nlBRt+FYnFo6RDu7BDGwOQ52HTI8+f5aW/ZHvKw=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250912062927-f5201cbc4c08/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250923053621-e31d326700f1 h1:yESLHReb7BGWe0xTL99bPNCZpiTKDsPwVEQftQJM50E=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250923053621-e31d326700f1/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924080822-3f9700084ca1 h1:FdeS8gDa+CGMRi8xWRngTufba3eOU24+z29ZRGqHCkw=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924080822-3f9700084ca1/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924083206-778ebe8d3cd4 h1:NGNU0FkRL8s3Sog7PP2c0zc3sB/IMdxAJfdOdJCgIlI=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924083206-778ebe8d3cd4/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924160556-d98795e04f2d h1:wiUiFdCrWApczqiF8ihivyGshEZBkE57NbAJ5h0pRKY=
github.com/aerospike/aerospike-management-lib v1.8.1-0.20250924160556-d98795e04f2d/go.mod h1:RNKlkSnppiaBaLEdAiCeBv68M+odPR5JR+ziO0NfaHQ=
github.com/aerospike/backup-go v0.5.1 h1:8ryLV0nOoo3Zn7LRV1kQABFnYgg1V0r9ZJJOL22FBs0=
github.com/aerospike/backup-go v0.5.1/go.mod h1:+RJdOaVCG6jQlhoNIJrzQgZnJ/HEBPfAiPGdv3YYzyg=
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6816,6 +6816,13 @@ spec:
- name
x-kubernetes-list-type: map
type: object
forceBlockFromRoster:
description: |-
ForceBlockFromRoster, when set to true, forcefully removes all nodes in this rack from the roster.
With this enabled, the operator ignores unavailable or dead partitions and proceeds with node removal.
Additionally, any failed or pending pods in this rack are also ignored during this process.
It may result in data loss. Use this with caution.
type: boolean
id:
description: Identifier for the rack
type: integer
Expand Down Expand Up @@ -16088,6 +16095,13 @@ spec:
- name
x-kubernetes-list-type: map
type: object
forceBlockFromRoster:
description: |-
ForceBlockFromRoster, when set to true, forcefully removes all nodes in this rack from the roster.
With this enabled, the operator ignores unavailable or dead partitions and proceeds with node removal.
Additionally, any failed or pending pods in this rack are also ignored during this process.
It may result in data loss. Use this with caution.
type: boolean
id:
description: Identifier for the rack
type: integer
Expand Down
28 changes: 19 additions & 9 deletions internal/controller/cluster/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -908,10 +908,16 @@ func (r *SingleClusterReconciler) cleanupDanglingPodsRack(sts *appsv1.StatefulSe
func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, configuredRacks []RackState) (
sets.Set[string], error,
) {
var ignorableRacks []asdbv1.Rack

ignorablePodNames := sets.Set[string]{}
ignorableRackIDs := sets.Set[int]{}

ignorableRacks = append(ignorableRacks, racksToDelete...)
ignorableRacks = append(ignorableRacks, getRacksToBeBlockedFromRoster(r.Log, configuredRacks)...)

for rackIdx := range racksToDelete {
rackPods, err := r.getRackPodList(racksToDelete[rackIdx].ID)
for idx := range ignorableRacks {
rackPods, err := r.getRackPodList(ignorableRacks[idx].ID)
if err != nil {
return nil, err
}
Expand All @@ -922,24 +928,27 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack,
ignorablePodNames.Insert(pod.Name)
}
}

ignorableRackIDs.Insert(ignorableRacks[idx].ID)
}

for idx := range configuredRacks {
rack := &configuredRacks[idx]
rackState := &configuredRacks[idx]
if ignorableRackIDs.Has(rackState.Rack.ID) {
// Already handled above
continue
}

failedAllowed, _ := intstr.GetScaledValueFromIntOrPercent(
r.aeroCluster.Spec.RackConfig.MaxIgnorablePods, int(rack.Size), false,
r.aeroCluster.Spec.RackConfig.MaxIgnorablePods, int(rackState.Size), false,
)

podList, err := r.getRackPodList(rack.Rack.ID)
podList, err := r.getRackPodList(rackState.Rack.ID)
if err != nil {
return nil, err
}

var (
failedPod []string
pendingPod []string
)
var failedPod, pendingPod []string

for podIdx := range podList.Items {
pod := &podList.Items[podIdx]
Expand Down Expand Up @@ -970,6 +979,7 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack,

return ignorablePodNames, nil
}

func (r *SingleClusterReconciler) getPodsPVCList(
podNames []string, rackID int,
) ([]corev1.PersistentVolumeClaim, error) {
Expand Down
17 changes: 17 additions & 0 deletions internal/controller/cluster/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,23 @@ func (r *SingleClusterReconciler) createEmptyRack(rackState *RackState) (
return found, common.ReconcileSuccess()
}

// getRacksToBeBlockedFromRoster identifies racks that should have their nodes blocked from roster
// and returns a slice of racks that have ForceBlockFromRoster: true
func getRacksToBeBlockedFromRoster(log logger, rackStateList []RackState) []asdbv1.Rack {
var racksToBlock []asdbv1.Rack

for _, rackState := range rackStateList {
// Check if this rack has ForceBlockFromRoster set to true
if asdbv1.GetBool(rackState.Rack.ForceBlockFromRoster) {
racksToBlock = append(racksToBlock, *rackState.Rack)
log.Info("Rack marked for roster blocking",
"rackID", rackState.Rack.ID)
}
}

return racksToBlock
}

func (r *SingleClusterReconciler) getRacksToDelete(rackStateList []RackState) (
[]asdbv1.Rack, error,
) {
Expand Down
13 changes: 12 additions & 1 deletion internal/controller/cluster/strong_consistency.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package cluster

import (
"strconv"

gosets "github.com/deckarep/golang-set/v2"
"k8s.io/apimachinery/pkg/util/sets"

Expand All @@ -13,6 +15,14 @@ func (r *SingleClusterReconciler) getAndSetRoster(
policy *as.ClientPolicy, rosterNodeBlockList []string,
ignorablePodNames sets.Set[string],
) error {
rackStateList := getConfiguredRackStateList(r.aeroCluster)
blockedRackIDs := gosets.NewSet[string]()
blockedRacks := getRacksToBeBlockedFromRoster(r.Log, rackStateList)

for idx := range blockedRacks {
blockedRackIDs.Add(strconv.Itoa(blockedRacks[idx].ID))
}

allHostConns, err := r.newAllHostConnWithOption(ignorablePodNames)
if err != nil {
return err
Expand All @@ -23,7 +33,8 @@ func (r *SingleClusterReconciler) getAndSetRoster(
return err
}

return deployment.GetAndSetRoster(r.Log, allHostConns, policy, rosterNodeBlockList, ignorableNamespaces)
return deployment.ManageRoster(r.Log, allHostConns, policy, rosterNodeBlockList,
ignorableNamespaces, blockedRackIDs)
}

func (r *SingleClusterReconciler) validateSCClusterState(policy *as.ClientPolicy, ignorablePodNames sets.Set[string],
Expand Down
58 changes: 58 additions & 0 deletions internal/webhook/v1/aerospikecluster_validating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ func validateRackUpdate(
return nil
}

forceBlockFromRosterChanged := false

// Old racks cannot be updated
// Also need to exclude a default rack with default rack ID. No need to check here,
// user should not provide or update default rackID
Expand Down Expand Up @@ -510,11 +512,47 @@ func validateRackUpdate(
}
}

if oldRack.ForceBlockFromRoster != newRack.ForceBlockFromRoster {
forceBlockFromRosterChanged = true
}

break
}
}
}

if forceBlockFromRosterChanged && newObj.Status.AerospikeConfig == nil {
return fmt.Errorf("status is not updated yet, cannot change ForceBlockFromRoster in rack")
}

racksBlockedFromRosterInSpec := make(sets.Set[int])
racksBlockedFromRosterInStatus := make(sets.Set[int])

for idx := range newObj.Spec.RackConfig.Racks {
rack := newObj.Spec.RackConfig.Racks[idx]
if asdbv1.GetBool(rack.ForceBlockFromRoster) {
racksBlockedFromRosterInSpec.Insert(rack.ID)
}
}

for idx := range newObj.Status.RackConfig.Racks {
rack := newObj.Status.RackConfig.Racks[idx]
if asdbv1.GetBool(rack.ForceBlockFromRoster) {
racksBlockedFromRosterInStatus.Insert(rack.ID)
}
}

if len(newObj.Status.RackConfig.Racks)-len(racksBlockedFromRosterInSpec) == 1 &&
(newObj.Spec.RackConfig.RollingUpdateBatchSize != nil || newObj.Spec.RackConfig.ScaleDownBatchSize != nil) {
return fmt.Errorf("with only one rack remaining in roster, cannot use rollingUpdateBatchSize or scaleDownBatchSize")
}

desiredRacksBlockedFromRoster := racksBlockedFromRosterInSpec.Difference(racksBlockedFromRosterInStatus)

if len(desiredRacksBlockedFromRoster) > 1 {
return fmt.Errorf("only one rack can be force-blocked from the roster at a time using the forceBlockFromRoster flag")
}

return nil
}

Expand Down Expand Up @@ -594,6 +632,8 @@ func validateRackConfig(_ logr.Logger, cluster *asdbv1.AerospikeCluster) error {
rackMap := map[int]bool{}
migrateFillDelaySet := sets.Set[int]{}

var racksBlockedFromRoster int

for idx := range cluster.Spec.RackConfig.Racks {
rack := &cluster.Spec.RackConfig.Racks[idx]
// Check for duplicate
Expand Down Expand Up @@ -640,6 +680,24 @@ func validateRackConfig(_ logr.Logger, cluster *asdbv1.AerospikeCluster) error {
}

migrateFillDelaySet.Insert(migrateFillDelay)

if asdbv1.GetBool(rack.ForceBlockFromRoster) {
racksBlockedFromRoster++
}
}

if racksBlockedFromRoster > 0 {
if cluster.Spec.RackConfig.MaxIgnorablePods != nil {
return fmt.Errorf("forceBlockFromRoster cannot be enabled when maxIgnorablePods is set")
}

if len(cluster.Spec.RosterNodeBlockList) > 0 {
return fmt.Errorf("forceBlockFromRoster cannot be enabled with RosterNodeBlockList")
}

if racksBlockedFromRoster == len(cluster.Spec.RackConfig.Racks) {
return fmt.Errorf("all racks cannot have forceBlockFromRoster enabled. At least one rack must remain in the roster")
}
}

// If len of migrateFillDelaySet is more than 1, it means that different migrate-fill-delay is set across racks
Expand Down
Loading