diff --git a/api/redisfailover/v1/types.go b/api/redisfailover/v1/types.go index 181b59116..1e8572e2e 100644 --- a/api/redisfailover/v1/types.go +++ b/api/redisfailover/v1/types.go @@ -73,6 +73,7 @@ type RedisSettings struct { CustomStartupProbe *corev1.Probe `json:"customStartupProbe,omitempty"` DisablePodDisruptionBudget bool `json:"disablePodDisruptionBudget,omitempty"` PreventMasterEviction bool `json:"preventMasterEviction,omitempty"` + DisableIPMode bool `json:"disableIPMode,omitempty"` } // SentinelSettings defines the specification of the sentinel cluster diff --git a/charts/redisoperator/Chart.yaml b/charts/redisoperator/Chart.yaml index 5f0aea498..c62b15a36 100644 --- a/charts/redisoperator/Chart.yaml +++ b/charts/redisoperator/Chart.yaml @@ -4,7 +4,7 @@ appVersion: 1.3.0 apiVersion: v1 description: A Helm chart for the Spotahome Redis Operator name: redis-operator -version: 3.3.2 +version: 3.3.3 home: https://github.com/freshworks/redis-operator keywords: - "golang" diff --git a/example/redisfailover/basic.yaml b/example/redisfailover/basic.yaml index ccb3c6c9e..48348da89 100644 --- a/example/redisfailover/basic.yaml +++ b/example/redisfailover/basic.yaml @@ -13,6 +13,7 @@ spec: memory: 100Mi redis: replicas: 3 + disableIPMode: true resources: requests: cpu: 100m diff --git a/manifests/databases.spotahome.com_redisfailovers.yaml b/manifests/databases.spotahome.com_redisfailovers.yaml index 06c78e0bb..d6054c812 100644 --- a/manifests/databases.spotahome.com_redisfailovers.yaml +++ b/manifests/databases.spotahome.com_redisfailovers.yaml @@ -6836,6 +6836,8 @@ spec: type: integer preventMasterEviction: type: boolean + disableIPMode: + type: boolean priorityClassName: type: string replicas: diff --git a/manifests/kustomize/base/databases.spotahome.com_redisfailovers.yaml b/manifests/kustomize/base/databases.spotahome.com_redisfailovers.yaml index 37af69cb5..b4c1b455e 100644 --- a/manifests/kustomize/base/databases.spotahome.com_redisfailovers.yaml +++ b/manifests/kustomize/base/databases.spotahome.com_redisfailovers.yaml @@ -6833,6 +6833,8 @@ spec: type: integer preventMasterEviction: type: boolean + disableIPMode: + type: boolean priorityClassName: type: string replicas: diff --git a/mocks/operator/redisfailover/service/RedisFailoverCheck.go b/mocks/operator/redisfailover/service/RedisFailoverCheck.go index b1b60a181..693cc7357 100644 --- a/mocks/operator/redisfailover/service/RedisFailoverCheck.go +++ b/mocks/operator/redisfailover/service/RedisFailoverCheck.go @@ -5,6 +5,7 @@ package mocks import ( mock "github.com/stretchr/testify/mock" + corev1 "k8s.io/api/core/v1" time "time" v1 "github.com/freshworks/redis-operator/api/redisfailover/v1" @@ -442,6 +443,36 @@ func (_m *RedisFailoverCheck) GetRedisesSlavesPods(rFailover *v1.RedisFailover) return r0, r1 } +// GetRedisesPods provides a mock function with given fields: rFailover +func (_m *RedisFailoverCheck) GetRedisesPods(rFailover *v1.RedisFailover) (*corev1.PodList, error) { + ret := _m.Called(rFailover) + + if len(ret) == 0 { + panic("no return value specified for GetRedisesPods") + } + + var r0 *corev1.PodList + var r1 error + if rf, ok := ret.Get(0).(func(*v1.RedisFailover) (*corev1.PodList, error)); ok { + return rf(rFailover) + } + if rf, ok := ret.Get(0).(func(*v1.RedisFailover) *corev1.PodList); ok { + r0 = rf(rFailover) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*corev1.PodList) + } + } + + if rf, ok := ret.Get(1).(func(*v1.RedisFailover) error); ok { + r1 = rf(rFailover) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // GetSentinelsIPs provides a mock function with given fields: rFailover func (_m *RedisFailoverCheck) GetSentinelsIPs(rFailover *v1.RedisFailover) ([]string, error) { ret := _m.Called(rFailover) diff --git a/operator/redisfailover/checker.go b/operator/redisfailover/checker.go index b1e5f833a..3c1fe92e1 100644 --- a/operator/redisfailover/checker.go +++ b/operator/redisfailover/checker.go @@ -7,6 +7,7 @@ import ( redisfailoverv1 "github.com/freshworks/redis-operator/api/redisfailover/v1" "github.com/freshworks/redis-operator/metrics" + rfservice "github.com/freshworks/redis-operator/operator/redisfailover/service" ) // UpdateRedisesPods if the running version of pods are equal to the statefulset one @@ -216,13 +217,24 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e return err } + // Get pods to resolve DNS names to IPs for sentinel monitoring + // Sentinel only accepts IP addresses, not DNS names + // We need to get the pods to map DNS names back to IPs + pods, err := r.rfChecker.GetRedisesPods(rf) + if err != nil { + return err + } + + // Convert master address (which might be DNS) to IP for sentinel monitoring + masterIP := rfservice.GetPodIPFromAddress(master, rf, pods) + port := getRedisPort(rf.Spec.Redis.Port) for _, sip := range sentinels { - err = r.rfChecker.CheckSentinelMonitor(sip, rf.MasterName(), master, port) + err = r.rfChecker.CheckSentinelMonitor(sip, rf.MasterName(), masterIP, port) setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_WRONG_MASTER, sip, err) if err != nil { r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Fixing sentinel not monitoring expected master: %s", err.Error()) - if err := r.rfHealer.NewSentinelMonitor(sip, master, rf); err != nil { + if err := r.rfHealer.NewSentinelMonitor(sip, masterIP, rf); err != nil { return err } } diff --git a/operator/redisfailover/checker_test.go b/operator/redisfailover/checker_test.go index a6f6b35bb..a8dfa3276 100644 --- a/operator/redisfailover/checker_test.go +++ b/operator/redisfailover/checker_test.go @@ -635,6 +635,23 @@ func TestCheckAndHeal(t *testing.T) { if allowSentinels && !expErr && continueTests { mrfc.On("GetSentinelsIPs", rf).Once().Return([]string{sentinel}, nil) + // Create a PodList for DNS resolution (GetRedisesPods is called to resolve DNS names to IPs) + // Only needed in non-bootstrapping mode - bootstrapping mode uses external host directly + if !test.bootstrapping { + podList := &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rfr-redisfailover-0", + }, + Status: corev1.PodStatus{ + PodIP: master, + }, + }, + }, + } + mrfc.On("GetRedisesPods", rf).Once().Return(podList, nil) + } if test.sentinelMonitorOK { if test.bootstrapping { mrfc.On("CheckSentinelMonitor", sentinel, rf.MasterName(), bootstrapMaster, bootstrapMasterPort).Once().Return(nil) diff --git a/operator/redisfailover/ensurer.go b/operator/redisfailover/ensurer.go index d5ffae530..5c38eece0 100644 --- a/operator/redisfailover/ensurer.go +++ b/operator/redisfailover/ensurer.go @@ -9,7 +9,8 @@ import ( // Ensure is called to ensure all of the resources associated with a RedisFailover are created func (w *RedisFailoverHandler) Ensure(rf *redisfailoverv1.RedisFailover, labels map[string]string, or []metav1.OwnerReference, metricsClient metrics.Recorder) error { - if rf.Spec.Redis.Exporter.Enabled { + // Create headless service if IP mode is disabled OR exporter is enabled + if rf.Spec.Redis.DisableIPMode || rf.Spec.Redis.Exporter.Enabled { if err := w.rfService.EnsureRedisService(rf, labels, or); err != nil { return err } diff --git a/operator/redisfailover/service/check.go b/operator/redisfailover/service/check.go index f4f776093..124389951 100644 --- a/operator/redisfailover/service/check.go +++ b/operator/redisfailover/service/check.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "strconv" + "strings" "time" appsv1 "k8s.io/api/apps/v1" @@ -40,6 +41,7 @@ type RedisFailoverCheck interface { IsRedisRunning(rFailover *redisfailoverv1.RedisFailover) bool IsSentinelRunning(rFailover *redisfailoverv1.RedisFailover) bool IsClusterRunning(rFailover *redisfailoverv1.RedisFailover) bool + GetRedisesPods(rFailover *redisfailoverv1.RedisFailover) (*corev1.PodList, error) } // RedisFailoverChecker is our implementation of RedisFailoverCheck interface @@ -156,7 +158,8 @@ func (r *RedisFailoverChecker) CheckAllSlavesFromMaster(master string, rf *redis rport := getRedisPort(rf.Spec.Redis.Port) for _, rp := range rps.Items { - if rp.Status.PodIP == master { + podAddress := GetPodAddress(&rp, rf) + if podAddress == master { err = r.setMasterLabelIfNecessary(rf.Namespace, rp) if err != nil { return err @@ -176,13 +179,27 @@ func (r *RedisFailoverChecker) CheckAllSlavesFromMaster(master string, rf *redis } } - slave, err := r.redisClient.GetSlaveOf(rp.Status.PodIP, rport, password) + slave, err := r.redisClient.GetSlaveOf(podAddress, rport, password) if err != nil { - r.logger.Errorf("Get slave of master failed, maybe this node is not ready, pod ip: %s", rp.Status.PodIP) + r.logger.Errorf("get slave of master failed, maybe this node is not ready, pod address: %s", podAddress) return err } + // Compare master with what Redis returns + // Redis may return either DNS name or IP depending on how it was configured if slave != "" && slave != master { - return fmt.Errorf("slave %s don't have the master %s, has %s", rp.Status.PodIP, master, slave) + // If they don't match directly, try resolving both to IPs for comparison + // (in case one is DNS and the other is IP, but they refer to the same pod) + masterIP := GetPodIPFromAddress(master, rf, rps) + slaveIP := GetPodIPFromAddress(slave, rf, rps) + if slaveIP != masterIP { + return fmt.Errorf("slave %s don't have the master %s, has %s", podAddress, master, slave) + } + // They resolve to the same IP, but formats differ + // If IP mode is disabled and master is DNS name but slave is IP, prefer DNS name + if rf.Spec.Redis.DisableIPMode && strings.Contains(master, ".svc.cluster.local") && !strings.Contains(slave, ".svc.cluster.local") { + // Master is DNS name, slave is IP - reconfigure to use DNS name + return fmt.Errorf("slave %s configured with IP %s but should use DNS name %s for DNS mode stability", podAddress, slave, master) + } } } return nil @@ -221,12 +238,13 @@ func (r *RedisFailoverChecker) CheckIfMasterLocalhost(rFailover *redisfailoverv1 for _, sip := range redisIps { master, err := r.redisClient.GetSlaveOf(sip, rport, password) if err != nil { - r.logger.Warningf("CheckIfMasterLocalhost -- GetSlaveOf Failed") + r.logger.Warningf("CheckIfMasterLocalhost -- GetSlaveOf Failed for address %s", sip) return false, err } else if master == "" { r.logger.Warningf("CheckIfMasterLocalhost -- Master already available ?? check manually") return false, errors.New("unexpected master state, fix manually") } else { + // Check if master is localhost (127.0.0.1) - this check works for both IP and DNS if master == "127.0.0.1" { lhmaster++ } @@ -312,12 +330,22 @@ func (r *RedisFailoverChecker) CheckSentinelMonitor(sentinel, masterName string, } // GetMasterIP connects to all redis and returns the master of the redis failover +// When IP mode is disabled, returns DNS name if the master pod is Ready, otherwise returns IP func (r *RedisFailoverChecker) GetMasterIP(rf *redisfailoverv1.RedisFailover) (string, error) { - rips, err := r.GetRedisesIPs(rf) + // Get pods first to avoid duplicate calls (GetRedisesIPs also calls GetStatefulSetPods) + pods, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf)) if err != nil { return "", err } + // Build list of Redis addresses from pods (same logic as GetRedisesIPs) + rips := []string{} + for _, rp := range pods.Items { + if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running pods + rips = append(rips, GetPodAddress(&rp, rf)) + } + } + password, err := k8s.GetRedisPassword(r.k8sService, rf) if err != nil { return "", err @@ -328,11 +356,30 @@ func (r *RedisFailoverChecker) GetMasterIP(rf *redisfailoverv1.RedisFailover) (s for _, rip := range rips { master, err := r.redisClient.IsMaster(rip, rport, password) if err != nil { - r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod ip: %s", rip) + r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod address: %s", rip) continue } if master { - masters = append(masters, rip) + // If rip is already a DNS name, use it directly + // Otherwise, find the pod and get its DNS name if IP mode is disabled + if strings.Contains(rip, ".svc.cluster.local") { + masters = append(masters, rip) + } else { + // rip is an IP, find the matching pod and get DNS name if IP mode is disabled + foundPod := false + for _, pod := range pods.Items { + if pod.Status.PodIP == rip { + masterAddress := GetPodAddress(&pod, rf) + masters = append(masters, masterAddress) + foundPod = true + break + } + } + // If we didn't find a matching pod, use the IP as fallback + if !foundPod { + masters = append(masters, rip) + } + } } } @@ -361,7 +408,7 @@ func (r *RedisFailoverChecker) GetNumberMasters(rf *redisfailoverv1.RedisFailove for _, rip := range rips { master, err := r.redisClient.IsMaster(rip, rport, password) if err != nil { - r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod ip: %s", rip) + r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod address: %s", rip) continue } if master { @@ -371,7 +418,7 @@ func (r *RedisFailoverChecker) GetNumberMasters(rf *redisfailoverv1.RedisFailove return nMasters, nil } -// GetRedisesIPs returns the IPs of the Redis nodes +// GetRedisesIPs returns the addresses (IPs or DNS names) of the Redis nodes func (r *RedisFailoverChecker) GetRedisesIPs(rf *redisfailoverv1.RedisFailover) ([]string, error) { redises := []string{} rps, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf)) @@ -380,7 +427,7 @@ func (r *RedisFailoverChecker) GetRedisesIPs(rf *redisfailoverv1.RedisFailover) } for _, rp := range rps.Items { if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running pods - redises = append(redises, rp.Status.PodIP) + redises = append(redises, GetPodAddress(&rp, rf)) } } return redises, nil @@ -414,7 +461,8 @@ func (r *RedisFailoverChecker) GetMaxRedisPodTime(rf *redisfailoverv1.RedisFailo } start := redisNode.Status.StartTime.Round(time.Second) alive := time.Since(start) - r.logger.Debugf("Pod %s has been alive for %.f seconds", redisNode.Status.PodIP, alive.Seconds()) + podAddress := GetPodAddress(&redisNode, rf) + r.logger.Debugf("Pod %s (address: %s) has been alive for %.f seconds", redisNode.Name, podAddress, alive.Seconds()) if alive > maxTime { maxTime = alive } @@ -438,7 +486,8 @@ func (r *RedisFailoverChecker) GetRedisesSlavesPods(rf *redisfailoverv1.RedisFai rport := getRedisPort(rf.Spec.Redis.Port) for _, rp := range rps.Items { if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running - master, err := r.redisClient.IsMaster(rp.Status.PodIP, rport, password) + podAddress := GetPodAddress(&rp, rf) + master, err := r.redisClient.IsMaster(podAddress, rport, password) if err != nil { return []string{}, err } @@ -465,7 +514,8 @@ func (r *RedisFailoverChecker) GetRedisesMasterPod(rFailover *redisfailoverv1.Re rport := getRedisPort(rFailover.Spec.Redis.Port) for _, rp := range rps.Items { if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running - master, err := r.redisClient.IsMaster(rp.Status.PodIP, rport, password) + podAddress := GetPodAddress(&rp, rFailover) + master, err := r.redisClient.IsMaster(podAddress, rport, password) if err != nil { return "", err } @@ -540,6 +590,11 @@ func (r *RedisFailoverChecker) IsClusterRunning(rFailover *redisfailoverv1.Redis return r.IsSentinelRunning(rFailover) && r.IsRedisRunning(rFailover) } +// GetRedisesPods returns the PodList of Redis pods +func (r *RedisFailoverChecker) GetRedisesPods(rFailover *redisfailoverv1.RedisFailover) (*corev1.PodList, error) { + return r.k8sService.GetStatefulSetPods(rFailover.Namespace, GetRedisName(rFailover)) +} + func getRedisPort(p int32) string { return strconv.Itoa(int(p)) } diff --git a/operator/redisfailover/service/disableipmode_test.go b/operator/redisfailover/service/disableipmode_test.go new file mode 100644 index 000000000..2baf3c358 --- /dev/null +++ b/operator/redisfailover/service/disableipmode_test.go @@ -0,0 +1,577 @@ +package service_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/freshworks/redis-operator/log" + "github.com/freshworks/redis-operator/metrics" + mK8SService "github.com/freshworks/redis-operator/mocks/service/k8s" + mRedisService "github.com/freshworks/redis-operator/mocks/service/redis" + rfservice "github.com/freshworks/redis-operator/operator/redisfailover/service" +) + +func TestGetPodAddress(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + podReady bool + podIP string + podName string + expectedResult string + }{ + { + name: "IP mode enabled (default) - returns PodIP", + disableIPMode: false, + podReady: true, + podIP: "10.0.0.1", + podName: "rfr-redisfailover-0", + expectedResult: "10.0.0.1", + }, + { + name: "IP mode disabled, pod ready - returns DNS name", + disableIPMode: true, + podReady: true, + podIP: "10.0.0.1", + podName: "rfr-test-0", + expectedResult: "rfr-test-0.rfr-test.testns.svc.cluster.local", + }, + { + name: "IP mode disabled, pod not ready - returns PodIP", + disableIPMode: true, + podReady: false, + podIP: "10.0.0.1", + podName: "rfr-test-0", + expectedResult: "10.0.0.1", + }, + { + name: "IP mode disabled, no PodIP - returns empty", + disableIPMode: true, + podReady: true, + podIP: "", + podName: "rfr-test-0", + expectedResult: "", + }, + { + name: "IP mode disabled, pod ready, no PodIP - returns empty", + disableIPMode: true, + podReady: true, + podIP: "", + podName: "rfr-test-0", + expectedResult: "", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: test.podName, + }, + Status: corev1.PodStatus{ + PodIP: test.podIP, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: func() corev1.ConditionStatus { + if test.podReady { + return corev1.ConditionTrue + } + return corev1.ConditionFalse + }(), + }, + }, + }, + } + + result := rfservice.GetPodAddress(pod, rf) + assert.Equal(test.expectedResult, result) + }) + } +} + +func TestGetPodIPFromAddress(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + address string + pods *corev1.PodList + expectedResult string + }{ + { + name: "IP address input - returns as-is", + disableIPMode: false, + address: "10.0.0.1", + pods: &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "rfr-redisfailover-0"}, + Status: corev1.PodStatus{PodIP: "10.0.0.1"}, + }, + }, + }, + expectedResult: "10.0.0.1", + }, + { + name: "DNS name input, IP mode disabled - resolves to PodIP", + disableIPMode: true, + address: "rfr-redisfailover-0.rfr-redisfailover.testns.svc.cluster.local", + pods: &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "rfr-redisfailover-0"}, + Status: corev1.PodStatus{ + PodIP: "10.0.0.1", + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, + }, + expectedResult: "10.0.0.1", + }, + { + name: "DNS name input, pod not ready - resolves by ordinal", + disableIPMode: true, + address: "rfr-redisfailover-1.rfr-redisfailover.testns.svc.cluster.local", + pods: &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "rfr-redisfailover-1"}, + Status: corev1.PodStatus{ + PodIP: "10.0.0.2", + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionFalse, + }, + }, + }, + }, + }, + }, + expectedResult: "10.0.0.2", + }, + { + name: "Unknown DNS name - returns address as-is", + disableIPMode: true, + address: "unknown-pod.rfr-redisfailover.testns.svc.cluster.local", + pods: &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{Name: "rfr-redisfailover-0"}, + Status: corev1.PodStatus{PodIP: "10.0.0.1"}, + }, + }, + }, + expectedResult: "unknown-pod.rfr-redisfailover.testns.svc.cluster.local", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + + result := rfservice.GetPodIPFromAddress(test.address, rf, test.pods) + assert.Equal(test.expectedResult, result) + }) + } +} + +func TestRedisServiceWithDisableIPMode(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + exporterEnabled bool + expectedClusterIP string + expectedPorts int + shouldCreate bool + }{ + { + name: "IP mode disabled, exporter disabled - headless service with Redis port", + disableIPMode: true, + exporterEnabled: false, + expectedClusterIP: "None", + expectedPorts: 1, + shouldCreate: true, + }, + { + name: "IP mode disabled, exporter enabled - headless service with Redis and exporter ports", + disableIPMode: true, + exporterEnabled: true, + expectedClusterIP: "None", + expectedPorts: 2, + shouldCreate: true, + }, + { + name: "IP mode enabled, exporter enabled - headless service with exporter port only", + disableIPMode: false, + exporterEnabled: true, + expectedClusterIP: "None", + expectedPorts: 1, + shouldCreate: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + rf.Spec.Redis.Exporter.Enabled = test.exporterEnabled + rf.Spec.Redis.Port = 6379 + + generatedService := corev1.Service{} + + ms := &mK8SService.Services{} + if test.shouldCreate { + ms.On("CreateOrUpdateService", rf.Namespace, mock.Anything).Once().Run(func(args mock.Arguments) { + s := args.Get(1).(*corev1.Service) + generatedService = *s + }).Return(nil) + } + + client := rfservice.NewRedisFailoverKubeClient(ms, log.Dummy, metrics.Dummy) + err := client.EnsureRedisService(rf, nil, []metav1.OwnerReference{{Name: "testing"}}) + + if test.shouldCreate { + assert.NoError(err) + assert.Equal(corev1.ClusterIPNone, generatedService.Spec.ClusterIP) + assert.Equal(test.expectedPorts, len(generatedService.Spec.Ports)) + } + }) + } +} + +func TestGetMasterIPWithDisableIPMode(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + podReady bool + expectedDNS bool + }{ + { + name: "IP mode enabled - returns IP", + disableIPMode: false, + podReady: true, + expectedDNS: false, + }, + { + name: "IP mode disabled, pod ready - returns DNS name", + disableIPMode: true, + podReady: true, + expectedDNS: true, + }, + { + name: "IP mode disabled, pod not ready - returns IP", + disableIPMode: true, + podReady: false, + expectedDNS: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + + pods := &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rfr-test-0", // Pod name should match StatefulSet name from GetRedisName(rf) + }, + Status: corev1.PodStatus{ + PodIP: "10.0.0.1", + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: func() corev1.ConditionStatus { + if test.podReady { + return corev1.ConditionTrue + } + return corev1.ConditionFalse + }(), + }, + }, + }, + }, + }, + } + + ms := &mK8SService.Services{} + ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(pods, nil) + mr := &mRedisService.Client{} + address := "10.0.0.1" + if test.expectedDNS { + address = "rfr-test-0.rfr-test.testns.svc.cluster.local" + } + mr.On("IsMaster", address, "0", "").Once().Return(true, nil) + + checker := rfservice.NewRedisFailoverChecker(ms, mr, log.DummyLogger{}, metrics.Dummy) + master, err := checker.GetMasterIP(rf) + + assert.NoError(err) + if test.expectedDNS { + assert.Contains(master, ".svc.cluster.local") + assert.Contains(master, "rfr-test") + } else { + assert.Equal("10.0.0.1", master) + } + }) + } +} + +func TestCheckAllSlavesFromMasterWithDisableIPMode(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + master string + slave string + shouldError bool + errorContains string + }{ + { + name: "IP mode enabled - master and slave both IPs", + disableIPMode: false, + master: "10.0.0.1", + slave: "10.0.0.1", // slave pod (10.0.0.2) reports master as 10.0.0.1 + shouldError: false, + }, + { + name: "IP mode disabled - master DNS, slave DNS", + disableIPMode: true, + master: "rfr-test-0.rfr-test.testns.svc.cluster.local", + slave: "rfr-test-0.rfr-test.testns.svc.cluster.local", // Slave reports DNS name + shouldError: false, + }, + { + name: "IP mode disabled - master DNS, slave IP (should error for reconfiguration)", + disableIPMode: true, + master: "rfr-test-0.rfr-test.testns.svc.cluster.local", + slave: "10.0.0.1", // Slave reports IP even though master is DNS + shouldError: true, // Should error to force reconfiguration to use DNS + errorContains: "should use DNS name", + }, + { + name: "IP mode disabled - master DNS, slave different IP", + disableIPMode: true, + master: "rfr-test-0.rfr-test.testns.svc.cluster.local", + slave: "10.0.0.2", + shouldError: true, + errorContains: "don't have the master", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + + // Create pods list - master pod and slave pod + // Master is always pod 0, slave is always pod 1 + pods := &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rfr-test-0", + }, + Status: corev1.PodStatus{ + PodIP: "10.0.0.1", + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rfr-test-1", + }, + Status: corev1.PodStatus{ + PodIP: "10.0.0.2", + Phase: corev1.PodRunning, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + }, + } + + ms := &mK8SService.Services{} + ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(pods, nil) + ms.On("UpdatePodLabels", namespace, mock.AnythingOfType("string"), mock.Anything).Return(nil) + ms.On("UpdatePodAnnotations", namespace, mock.AnythingOfType("string"), mock.Anything).Return(nil) + ms.On("RemovePodAnnotation", namespace, mock.AnythingOfType("string"), mock.AnythingOfType("string")).Return(nil) + + mr := &mRedisService.Client{} + // The function iterates through ALL pods and calls GetSlaveOf for each + // The check `if podAddress == master` only affects label/annotation setting + // GetSlaveOf is always called for every pod + + // Determine addresses for pods based on disableIPMode + pod0Address := "10.0.0.1" + pod1Address := "10.0.0.2" + if test.disableIPMode { + pod0Address = "rfr-test-0.rfr-test.testns.svc.cluster.local" + pod1Address = "rfr-test-1.rfr-test.testns.svc.cluster.local" + } + + // GetSlaveOf is called for all pods + // Pod 0 (master) will return empty string or its own address + // Pod 1 (slave) will return the master address + mr.On("GetSlaveOf", pod0Address, "0", "").Once().Return("", nil) // Master has no slaveof + mr.On("GetSlaveOf", pod1Address, "0", "").Once().Return(test.slave, nil) // Slave points to master + + checker := rfservice.NewRedisFailoverChecker(ms, mr, log.DummyLogger{}, metrics.Dummy) + err := checker.CheckAllSlavesFromMaster(test.master, rf) + + if test.shouldError { + assert.Error(err) + if test.errorContains != "" { + assert.Contains(err.Error(), test.errorContains) + } + } else { + assert.NoError(err) + } + }) + } +} + +func TestSetRedisCustomConfigWithDisableIPMode(t *testing.T) { + tests := []struct { + name string + disableIPMode bool + podReady bool + address string + expectedConfigContains string + }{ + { + name: "IP mode enabled - no replica-announce-ip added", + disableIPMode: false, + podReady: true, + address: "10.0.0.1", + expectedConfigContains: "", + }, + { + name: "IP mode disabled, pod ready - replica-announce-ip added", + disableIPMode: true, + podReady: true, + address: "10.0.0.1", + expectedConfigContains: "replica-announce-ip rfr-test-0.rfr-test.testns.svc.cluster.local", + }, + { + name: "IP mode disabled, pod not ready - no replica-announce-ip added", + disableIPMode: true, + podReady: false, + address: "10.0.0.1", + expectedConfigContains: "", + }, + { + name: "IP mode disabled, address is DNS name, pod ready - replica-announce-ip added", + disableIPMode: true, + podReady: true, + address: "rfr-test-0.rfr-test.testns.svc.cluster.local", + expectedConfigContains: "replica-announce-ip rfr-test-0.rfr-test.testns.svc.cluster.local", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert := assert.New(t) + + rf := generateRF() + rf.Spec.Redis.DisableIPMode = test.disableIPMode + rf.Spec.Redis.CustomConfig = []string{"some-config"} + + pods := &corev1.PodList{ + Items: []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "rfr-test-0", + }, + Status: corev1.PodStatus{ + PodIP: "10.0.0.1", + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodReady, + Status: func() corev1.ConditionStatus { + if test.podReady { + return corev1.ConditionTrue + } + return corev1.ConditionFalse + }(), + }, + }, + }, + }, + }, + } + + ms := &mK8SService.Services{} + // getRedisPodMemoryUsage always calls GetStatefulSetPods + ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(pods, nil) + // If disableIPMode is true, SetRedisCustomConfig also calls GetStatefulSetPods + if test.disableIPMode { + ms.On("GetStatefulSetPods", namespace, rfservice.GetRedisName(rf)).Once().Return(pods, nil) + } + ms.On("GetPod", namespace, mock.AnythingOfType("string")).Return(nil, nil).Maybe() + + mr := &mRedisService.Client{} + var capturedConfig []string + mr.On("SetCustomRedisConfig", test.address, "0", mock.MatchedBy(func(config []string) bool { + capturedConfig = config + return true + }), "").Once().Return(nil) + + healer := rfservice.NewRedisFailoverHealer(ms, mr, log.DummyLogger{}) + err := healer.SetRedisCustomConfig(test.address, rf) + + assert.NoError(err) + if test.expectedConfigContains != "" { + found := false + for _, config := range capturedConfig { + if config == test.expectedConfigContains { + found = true + break + } + } + assert.True(found, "Expected config '%s' not found in %v", test.expectedConfigContains, capturedConfig) + } else { + // Verify replica-announce-ip is NOT in the config + for _, config := range capturedConfig { + assert.NotContains(config, "replica-announce-ip", "replica-announce-ip should not be present when IP mode is enabled or pod is not ready") + } + } + }) + } +} diff --git a/operator/redisfailover/service/generator.go b/operator/redisfailover/service/generator.go index b9439f0a9..5c788208f 100644 --- a/operator/redisfailover/service/generator.go +++ b/operator/redisfailover/service/generator.go @@ -90,12 +90,51 @@ func generateRedisService(rf *redisfailoverv1.RedisFailover, labels map[string]s selectorLabels := generateSelectorLabels(redisRoleName, rf.Name) labels = util.MergeLabels(labels, selectorLabels) - defaultAnnotations := map[string]string{ - "prometheus.io/scrape": "true", - "prometheus.io/port": "http", - "prometheus.io/path": "/metrics", + + var annotations map[string]string + var ports []corev1.ServicePort + + if rf.Spec.Redis.DisableIPMode { + // Headless service (IP mode disabled): include Redis port, and exporter port if enabled + ports = []corev1.ServicePort{ + { + Name: "redis", + Port: rf.Spec.Redis.Port, + TargetPort: intstr.FromString("redis"), + Protocol: corev1.ProtocolTCP, + }, + } + if rf.Spec.Redis.Exporter.Enabled { + ports = append(ports, corev1.ServicePort{ + Port: exporterPort, + Protocol: corev1.ProtocolTCP, + Name: exporterPortName, + }) + defaultAnnotations := map[string]string{ + "prometheus.io/scrape": "true", + "prometheus.io/port": "http", + "prometheus.io/path": "/metrics", + } + annotations = util.MergeLabels(defaultAnnotations, rf.Spec.Redis.ServiceAnnotations) + } else { + annotations = rf.Spec.Redis.ServiceAnnotations + } + } else { + // Exporter-only service (non-headless) + defaultAnnotations := map[string]string{ + "prometheus.io/scrape": "true", + "prometheus.io/port": "http", + "prometheus.io/path": "/metrics", + } + annotations = util.MergeLabels(defaultAnnotations, rf.Spec.Redis.ServiceAnnotations) + ports = []corev1.ServicePort{ + { + Port: exporterPort, + Protocol: corev1.ProtocolTCP, + Name: exporterPortName, + }, + } } - annotations := util.MergeLabels(defaultAnnotations, rf.Spec.Redis.ServiceAnnotations) return &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -108,14 +147,8 @@ func generateRedisService(rf *redisfailoverv1.RedisFailover, labels map[string]s Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeClusterIP, ClusterIP: corev1.ClusterIPNone, - Ports: []corev1.ServicePort{ - { - Port: exporterPort, - Protocol: corev1.ProtocolTCP, - Name: exporterPortName, - }, - }, - Selector: selectorLabels, + Ports: ports, + Selector: selectorLabels, }, } } diff --git a/operator/redisfailover/service/heal.go b/operator/redisfailover/service/heal.go index d964f1f55..7b937f18c 100644 --- a/operator/redisfailover/service/heal.go +++ b/operator/redisfailover/service/heal.go @@ -120,7 +120,8 @@ func (r *RedisFailoverHealer) MakeMaster(ip string, rf *redisfailoverv1.RedisFai return err } for _, rp := range rps.Items { - if rp.Status.PodIP == ip { + podAddress := GetPodAddress(&rp, rf) + if podAddress == ip { err = r.setMasterLabelIfNecessary(rf.Namespace, rp) if err != nil { return err @@ -157,12 +158,13 @@ func (r *RedisFailoverHealer) SetOldestAsMaster(rf *redisfailoverv1.RedisFailove port := getRedisPort(rf.Spec.Redis.Port) newMasterIP := "" for _, pod := range ssp.Items { + podAddress := GetPodAddress(&pod, rf) if newMasterIP == "" { - newMasterIP = pod.Status.PodIP - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Infof("New master is %s with ip %s", pod.Name, newMasterIP) + newMasterIP = podAddress + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Infof("New master is %s with address %s", pod.Name, newMasterIP) if err := r.redisClient.MakeMaster(newMasterIP, port, password); err != nil { newMasterIP = "" - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make new master failed, master ip: %s, error: %v", pod.Status.PodIP, err) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make new master failed, master address: %s, error: %v", podAddress, err) continue } @@ -175,11 +177,11 @@ func (r *RedisFailoverHealer) SetOldestAsMaster(rf *redisfailoverv1.RedisFailove return err } - newMasterIP = pod.Status.PodIP + newMasterIP = podAddress } else { r.logger.Infof("Making pod %s slave of %s", pod.Name, newMasterIP) - if err := r.redisClient.MakeSlaveOfWithPort(pod.Status.PodIP, newMasterIP, port, password); err != nil { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make slave failed, slave pod ip: %s, master ip: %s, error: %v", pod.Status.PodIP, newMasterIP, err) + if err := r.redisClient.MakeSlaveOfWithPort(podAddress, newMasterIP, port, password); err != nil { + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make slave failed, slave pod address: %s, master address: %s, error: %v", podAddress, newMasterIP, err) } err = r.setSlaveLabelIfNecessary(rf.Namespace, pod) @@ -214,18 +216,19 @@ func (r *RedisFailoverHealer) SetMasterOnAll(masterIP string, rf *redisfailoverv port := getRedisPort(rf.Spec.Redis.Port) for _, pod := range ssp.Items { + podAddress := GetPodAddress(&pod, rf) //During this configuration process if there is a new master selected , bailout isMaster, err := r.redisClient.IsMaster(masterIP, port, password) if err != nil || !isMaster { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("check master failed maybe this node is not ready(ip changed), or sentinel made a switch: %s", masterIP) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("check master failed maybe this node is not ready(address changed), or sentinel made a switch: %s", masterIP) return err } else { - if pod.Status.PodIP == masterIP { + if podAddress == masterIP { continue } r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Infof("Making pod %s slave of %s", pod.Name, masterIP) - if err := r.redisClient.MakeSlaveOfWithPort(pod.Status.PodIP, masterIP, port, password); err != nil { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make slave failed, slave ip: %s, master ip: %s, error: %v", pod.Status.PodIP, masterIP, err) + if err := r.redisClient.MakeSlaveOfWithPort(podAddress, masterIP, port, password); err != nil { + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("Make slave failed, slave address: %s, master address: %s, error: %v", podAddress, masterIP, err) return err } @@ -256,8 +259,9 @@ func (r *RedisFailoverHealer) SetExternalMasterOnAll(masterIP, masterPort string } for _, pod := range ssp.Items { + podAddress := GetPodAddress(&pod, rf) r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Infof("Making pod %s slave of %s:%s", pod.Name, masterIP, masterPort) - if err := r.redisClient.MakeSlaveOfWithPort(pod.Status.PodIP, masterIP, masterPort, password); err != nil { + if err := r.redisClient.MakeSlaveOfWithPort(podAddress, masterIP, masterPort, password); err != nil { return err } @@ -307,8 +311,8 @@ func (r *RedisFailoverHealer) SetSentinelCustomConfig(ip string, rf *redisfailov } // SetRedisCustomConfig will call redis to set the configuration given in config -func (r *RedisFailoverHealer) SetRedisCustomConfig(ip string, rf *redisfailoverv1.RedisFailover) error { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Setting the custom config on redis %s...", ip) +func (r *RedisFailoverHealer) SetRedisCustomConfig(address string, rf *redisfailoverv1.RedisFailover) error { + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Debugf("Setting the custom config on redis %s...", address) password, err := k8s.GetRedisPassword(r.k8sService, rf) if err != nil { @@ -316,36 +320,75 @@ func (r *RedisFailoverHealer) SetRedisCustomConfig(ip string, rf *redisfailoverv } // Get memory usage for this Redis pod - podMemory, err := r.getRedisPodMemoryUsage(ip, rf) + podMemory, err := r.getRedisPodMemoryUsage(address, rf) if err != nil { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Failed to get memory usage for Redis IP %s: %v", ip, err) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Failed to get memory usage for Redis address %s: %v", address, err) // Continue with podMemory = 0, which will skip memory validation } // Validate and filter maxmemory configuration - validatedConfig, err := r.validateMaxMemoryConfig(rf.Spec.Redis.CustomConfig, podMemory, ip, rf) + validatedConfig, err := r.validateMaxMemoryConfig(rf.Spec.Redis.CustomConfig, podMemory, address, rf) if err != nil { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("maxmemory validation failed for Redis IP %s: %v", ip, err) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("maxmemory validation failed for Redis address %s: %v", address, err) + } + + // If IP mode is disabled, add replica-announce-ip with the pod's DNS name + // This ensures the master sees replicas by their DNS names in INFO replication + if rf.Spec.Redis.DisableIPMode { + // Get pods to find the DNS name for this address + pods, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf)) + if err == nil { + // Find the pod matching this address and get its DNS name + var replicaAnnounceIP string + for _, pod := range pods.Items { + podAddress := GetPodAddress(&pod, rf) + // Match by DNS name or by IP + if podAddress == address || pod.Status.PodIP == address { + // Get DNS name for this pod + if isPodReady(&pod) && pod.Status.PodIP != "" { + serviceName := GetRedisName(rf) + replicaAnnounceIP = fmt.Sprintf("%s.%s.%s.svc.cluster.local", pod.Name, serviceName, rf.Namespace) + } else { + // Pod not ready yet, skip setting replica-announce-ip + // It will be set on next reconciliation when pod is ready + break + } + break + } + } + // If we found a DNS name, add replica-announce-ip to the config + if replicaAnnounceIP != "" && strings.Contains(replicaAnnounceIP, ".svc.cluster.local") { + replicaAnnounceConfig := fmt.Sprintf("replica-announce-ip %s", replicaAnnounceIP) + validatedConfig = append(validatedConfig, replicaAnnounceConfig) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Infof("Adding replica-announce-ip %s for Redis pod at %s", replicaAnnounceIP, address) + } + } } port := getRedisPort(rf.Spec.Redis.Port) - return r.redisClient.SetCustomRedisConfig(ip, port, validatedConfig, password) + return r.redisClient.SetCustomRedisConfig(address, port, validatedConfig, password) } -// getRedisPodMemoryUsage retrieves the memory limit or request for a Redis pod by its IP -func (r *RedisFailoverHealer) getRedisPodMemoryUsage(redisIP string, rf *redisfailoverv1.RedisFailover) (int64, error) { - // Get the specific pod by listing with field selector for IP - pods, err := r.k8sService.ListPodsWithFieldSelector(rf.Namespace, "status.podIP="+redisIP) +// getRedisPodMemoryUsage retrieves the memory limit or request for a Redis pod by its address (IP or DNS) +func (r *RedisFailoverHealer) getRedisPodMemoryUsage(redisAddress string, rf *redisfailoverv1.RedisFailover) (int64, error) { + // Get all Redis pods and find the one matching the address + rps, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf)) if err != nil { - return 0, fmt.Errorf("failed to get pod with IP %s: %w", redisIP, err) + return 0, fmt.Errorf("failed to get pods: %w", err) } - if len(pods.Items) == 0 { - return 0, fmt.Errorf("no pod found with IP %s", redisIP) + var targetPod *v1.Pod + for _, pod := range rps.Items { + podAddress := GetPodAddress(&pod, rf) + if podAddress == redisAddress { + targetPod = &pod + break + } } - // Use the first pod (there should only be one with a specific IP) - targetPod := pods.Items[0] + if targetPod == nil { + return 0, fmt.Errorf("no pod found with address %s", redisAddress) + } // Check if the pod is running if targetPod.Status.Phase != v1.PodRunning { @@ -371,7 +414,7 @@ func (r *RedisFailoverHealer) getRedisPodMemoryUsage(redisIP string, rf *redisfa } // validateMaxMemoryConfig validates maxmemory configuration against pod memory using percentage-based threshold -func (r *RedisFailoverHealer) validateMaxMemoryConfig(customConfig []string, podMemory int64, ip string, rf *redisfailoverv1.RedisFailover) ([]string, error) { +func (r *RedisFailoverHealer) validateMaxMemoryConfig(customConfig []string, podMemory int64, address string, rf *redisfailoverv1.RedisFailover) ([]string, error) { validatedConfig := make([]string, 0, len(customConfig)) var validationErrors []error @@ -390,7 +433,7 @@ func (r *RedisFailoverHealer) validateMaxMemoryConfig(customConfig []string, pod maxMemoryStr := parts[1] maxMemoryBytes, err := ParseMemorySize(maxMemoryStr) if err != nil { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Failed to parse maxmemory value '%s' for Redis IP %s: %v, skipping this config line", maxMemoryStr, ip, err) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Failed to parse maxmemory value '%s' for Redis address %s: %v, skipping this config line", maxMemoryStr, address, err) validationErrors = append(validationErrors, fmt.Errorf("invalid maxmemory configuration '%s': %w", configLine, err)) continue // Skip this invalid config line but continue with others } @@ -400,7 +443,7 @@ func (r *RedisFailoverHealer) validateMaxMemoryConfig(customConfig []string, pod if podMemory > 0 { allowedMemory := podMemory * int64(100-reservedPodMemoryPercent) / 100 if maxMemoryBytes > allowedMemory { - r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("maxmemory configuration %d bytes exceeds allowed limit %d bytes (%d%% of pod memory %d bytes, overhead: %d%%) for Redis IP %s, skipping this config line", maxMemoryBytes, allowedMemory, 100-reservedPodMemoryPercent, podMemory, reservedPodMemoryPercent, ip) + r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Errorf("maxmemory configuration %d bytes exceeds allowed limit %d bytes (%d%% of pod memory %d bytes, overhead: %d%%) for Redis address %s, skipping this config line", maxMemoryBytes, allowedMemory, 100-reservedPodMemoryPercent, podMemory, reservedPodMemoryPercent, address) validationErrors = append(validationErrors, fmt.Errorf("maxmemory %d bytes exceeds allowed limit %d bytes (%d%% of pod memory %d bytes, overhead: %d%%)", maxMemoryBytes, allowedMemory, 100-reservedPodMemoryPercent, podMemory, reservedPodMemoryPercent)) continue // Skip this invalid maxmemory line but continue with others } diff --git a/operator/redisfailover/service/names.go b/operator/redisfailover/service/names.go index a70751e98..8c4be673f 100644 --- a/operator/redisfailover/service/names.go +++ b/operator/redisfailover/service/names.go @@ -2,8 +2,10 @@ package service import ( "fmt" + "strings" redisfailoverv1 "github.com/freshworks/redis-operator/api/redisfailover/v1" + corev1 "k8s.io/api/core/v1" ) // GetRedisShutdownConfigMapName returns the name for redis configmap @@ -45,3 +47,71 @@ func GetRedisSlaveName(rf *redisfailoverv1.RedisFailover) string { func generateName(typeName, metaName string) string { return fmt.Sprintf("%s%s-%s", baseName, typeName, metaName) } + +// isPodReady checks if a pod is in Ready state +func isPodReady(pod *corev1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady { + return condition.Status == corev1.ConditionTrue + } + } + return false +} + +// GetPodAddress returns either the DNS name (if IP mode is disabled and pod is ready) or PodIP +// DNS names are only available when pods are Ready +func GetPodAddress(pod *corev1.Pod, rf *redisfailoverv1.RedisFailover) string { + if rf.Spec.Redis.DisableIPMode && isPodReady(pod) && pod.Status.PodIP != "" { + serviceName := GetRedisName(rf) + namespace := rf.Namespace + + return fmt.Sprintf("%s.%s.%s.svc.cluster.local", pod.Name, serviceName, namespace) + } + // Fall back to PodIP if IP mode is enabled (default), pod is not ready, or no IP yet + return pod.Status.PodIP +} + +// GetPodIPFromAddress takes an address (DNS name or IP) and returns the PodIP +// This is needed for Sentinel monitoring which only accepts IP addresses +func GetPodIPFromAddress(address string, rf *redisfailoverv1.RedisFailover, pods *corev1.PodList) string { + // If it's already an IP address (doesn't contain ".svc.cluster.local"), return it + if !strings.Contains(address, ".svc.cluster.local") { + // Check if it's a valid IP format (has 3 dots) + parts := strings.Split(address, ".") + if len(parts) == 4 { + // Likely an IP address, return as-is + return address + } + } + + // It's a DNS name, find the matching pod and return its IP + // We need to match by DNS name or by extracting the pod ordinal from the DNS name + for _, pod := range pods.Items { + // Try matching by the address we'd generate for this pod + podAddress := GetPodAddress(&pod, rf) + if podAddress == address { + return pod.Status.PodIP + } + + // Also try matching by DNS name directly (in case pod isn't ready yet) + // Extract ordinal from DNS name: rfr-redisfailover-0.rfr-redisfailover.basic.svc.cluster.local -> 0 + if strings.Contains(address, ".svc.cluster.local") { + dnsParts := strings.Split(address, ".") + if len(dnsParts) > 0 { + hostnamePart := dnsParts[0] // e.g., "rfr-redisfailover-0" + hostnameParts := strings.Split(hostnamePart, "-") + if len(hostnameParts) > 0 { + ordinalStr := hostnameParts[len(hostnameParts)-1] + // Extract ordinal from pod name + podNameParts := strings.Split(pod.Name, "-") + if len(podNameParts) > 0 && podNameParts[len(podNameParts)-1] == ordinalStr { + return pod.Status.PodIP + } + } + } + } + } + + // If we can't find it, return the address as-is (fallback) + return address +} diff --git a/service/redis/client.go b/service/redis/client.go index 124d18883..5ae705482 100644 --- a/service/redis/client.go +++ b/service/redis/client.go @@ -48,7 +48,7 @@ const ( sentinelsNumberREString = "sentinels=([0-9]+)" slaveNumberREString = "slaves=([0-9]+)" sentinelStatusREString = "status=([a-z]+)" - redisMasterHostREString = "master_host:([0-9.]+)" + redisMasterHostREString = "master_host:([^\\r\\n]+)" redisRoleMaster = "role:master" redisSyncing = "master_sync_in_progress:1" redisMasterSillPending = "master_host:127.0.0.1" diff --git a/test/integration/redisfailover/creation_test.go b/test/integration/redisfailover/creation_test.go index 0afff9b06..2d8db6d25 100644 --- a/test/integration/redisfailover/creation_test.go +++ b/test/integration/redisfailover/creation_test.go @@ -8,6 +8,7 @@ import ( "fmt" "net" "path/filepath" + "strings" "testing" "time" @@ -792,3 +793,275 @@ func (c *clients) testMaxMemoryHealingValidation(t *testing.T, currentNamespace err = c.rfClient.DatabasesV1().RedisFailovers(currentNamespace).Delete(context.Background(), rfName, metav1.DeleteOptions{}) require.NoError(err) } + +func TestRedisFailoverDisableIPMode(t *testing.T) { + require := require.New(t) + currentNamespace := "disableipmode-" + namespace + + // Create signal channels. + stopC := make(chan struct{}) + errC := make(chan error) + ctx, cancel := context.WithCancel(context.Background()) + + flags := &utils.CMDFlags{ + KubeConfig: filepath.Join(homedir.HomeDir(), ".kube", "config"), + Development: true, + } + + // Kubernetes clients. + k8sClient, customClient, aeClientset, err := utils.CreateKubernetesClients(flags) + require.NoError(err) + + // Create the redis clients + redisClient := redis.New(metrics.Dummy) + + clients := clients{ + k8sClient: k8sClient, + rfClient: customClient, + aeClient: aeClientset, + redisClient: redisClient, + } + + // Create kubernetes service. + k8sservice := k8s.New(k8sClient, customClient, aeClientset, log.Dummy, metrics.Dummy) + + // Prepare namespace + prepErr := clients.prepareNS(currentNamespace) + require.NoError(prepErr) + + // Give time to the namespace to be ready + time.Sleep(15 * time.Second) + + // Create operator and run. + redisfailoverOperator, err := redisfailover.New(redisfailover.Config{}, k8sservice, k8sClient, currentNamespace, redisClient, metrics.Dummy, log.Dummy) + require.NoError(err) + + go func() { + errC <- redisfailoverOperator.Run(ctx) + }() + + // Prepare cleanup for when the test ends + defer cancel() + defer clients.cleanup(stopC, currentNamespace) + + // Give time to the operator to start + time.Sleep(15 * time.Second) + + // Create secret + secret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: authSecretPath, + Namespace: currentNamespace, + }, + Data: map[string][]byte{ + "password": []byte(testPass), + }, + } + _, err = k8sClient.CoreV1().Secrets(currentNamespace).Create(context.Background(), secret, metav1.CreateOptions{}) + require.NoError(err) + + // Check that if we create a RedisFailover with disableIPMode, it is created + ok := t.Run("Check Custom Resource Creation with DisableIPMode", func(t *testing.T) { + clients.testCRCreationWithDisableIPMode(t, currentNamespace) + }) + require.True(ok, "the custom resource has to be created to continue") + + // Giving time to the operator to create the resources + time.Sleep(3 * time.Minute) + + // Check that headless service is created + t.Run("Check Headless Service Creation", func(t *testing.T) { + clients.testHeadlessService(t, currentNamespace) + }) + + // Check that DNS names are resolvable + t.Run("Check DNS Name Resolution", func(t *testing.T) { + clients.testDNSNameResolution(t, currentNamespace) + }) + + // Check that Redis replication uses DNS names + t.Run("Check Redis Replication Uses DNS Names", func(t *testing.T) { + clients.testRedisReplicationDNSNames(t, currentNamespace) + }) + +} + +func (c *clients) testCRCreationWithDisableIPMode(t *testing.T, currentNamespace string) { + assert := assert.New(t) + rfName := "disableipmode-test" + toCreate := &redisfailoverv1.RedisFailover{ + ObjectMeta: metav1.ObjectMeta{ + Name: rfName, + Namespace: currentNamespace, + }, + Spec: redisfailoverv1.RedisFailoverSpec{ + Redis: redisfailoverv1.RedisSettings{ + Replicas: int32(3), + DisableIPMode: true, // Enable DNS mode + Exporter: redisfailoverv1.Exporter{ + Enabled: true, + }, + }, + Sentinel: redisfailoverv1.SentinelSettings{ + Replicas: int32(3), + DisableMyMaster: true, + }, + Auth: redisfailoverv1.AuthSettings{ + SecretPath: authSecretPath, + }, + }, + } + + _, err := c.rfClient.DatabasesV1().RedisFailovers(currentNamespace).Create(context.Background(), toCreate, metav1.CreateOptions{}) + assert.NoError(err) + + gotRF, err := c.rfClient.DatabasesV1().RedisFailovers(currentNamespace).Get(context.Background(), rfName, metav1.GetOptions{}) + assert.NoError(err) + assert.True(gotRF.Spec.Redis.DisableIPMode, "DisableIPMode should be enabled") +} + +func (c *clients) testHeadlessService(t *testing.T, currentNamespace string) { + assert := assert.New(t) + require := require.New(t) + rfName := "disableipmode-test" + + // Get the Redis service + serviceName := fmt.Sprintf("rfr-%s", rfName) + svc, err := c.k8sClient.CoreV1().Services(currentNamespace).Get(context.Background(), serviceName, metav1.GetOptions{}) + require.NoError(err, "Redis service should exist") + + // Verify it's a headless service (ClusterIP: None) + assert.Equal(corev1.ClusterIPNone, svc.Spec.ClusterIP, "Service should be headless (ClusterIP: None)") + + // Verify Redis port is included + redisPortFound := false + for _, port := range svc.Spec.Ports { + if port.Name == "redis" { + redisPortFound = true + assert.Equal(int32(6379), port.Port, "Redis port should be 6379") + break + } + } + assert.True(redisPortFound, "Redis port should be included in headless service") +} + +func (c *clients) testDNSNameResolution(t *testing.T, currentNamespace string) { + assert := assert.New(t) + require := require.New(t) + rfName := "disableipmode-test" + + // Get Redis StatefulSet + redisSS, err := c.k8sClient.AppsV1().StatefulSets(currentNamespace).Get(context.Background(), fmt.Sprintf("rfr-%s", rfName), metav1.GetOptions{}) + require.NoError(err) + + // Get all Redis pods + listOptions := metav1.ListOptions{ + LabelSelector: labels.FormatLabels(redisSS.Spec.Selector.MatchLabels), + } + redisPodList, err := c.k8sClient.CoreV1().Pods(currentNamespace).List(context.Background(), listOptions) + require.NoError(err) + require.True(len(redisPodList.Items) > 0, "Should have Redis pods") + + // Verify DNS names can be constructed and pods are ready + serviceName := fmt.Sprintf("rfr-%s", rfName) + for _, pod := range redisPodList.Items { + // Check pod is ready + podReady := false + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + podReady = true + break + } + } + if !podReady { + t.Logf("Pod %s is not ready yet, skipping DNS check", pod.Name) + continue + } + + // Extract ordinal from pod name (e.g., "rfr-disableipmode-test-0" -> "0") + // Pod name format: - + parts := strings.Split(pod.Name, "-") + if len(parts) == 0 { + t.Logf("Cannot extract ordinal from pod name %s", pod.Name) + continue + } + ordinal := parts[len(parts)-1] + expectedDNS := fmt.Sprintf("%s-%s.%s.%s.svc.cluster.local", + serviceName, + ordinal, + serviceName, + currentNamespace) + + // Verify pod has an IP (required for DNS resolution) + assert.NotEmpty(pod.Status.PodIP, "Pod should have an IP address for DNS resolution") + t.Logf("Pod %s should be accessible at DNS name: %s", pod.Name, expectedDNS) + } +} + +func (c *clients) testRedisReplicationDNSNames(t *testing.T, currentNamespace string) { + assert := assert.New(t) + require := require.New(t) + rfName := "disableipmode-test" + + // Get Redis StatefulSet + redisSS, err := c.k8sClient.AppsV1().StatefulSets(currentNamespace).Get(context.Background(), fmt.Sprintf("rfr-%s", rfName), metav1.GetOptions{}) + require.NoError(err) + + // Get all Redis pods + listOptions := metav1.ListOptions{ + LabelSelector: labels.FormatLabels(redisSS.Spec.Selector.MatchLabels), + } + redisPodList, err := c.k8sClient.CoreV1().Pods(currentNamespace).List(context.Background(), listOptions) + require.NoError(err) + require.True(len(redisPodList.Items) > 0, "Should have Redis pods") + + // Find master and slaves + var masterPod *corev1.Pod + var slavePods []corev1.Pod + + for _, pod := range redisPodList.Items { + if pod.Status.PodIP == "" { + continue + } + isMaster, err := c.redisClient.IsMaster(pod.Status.PodIP, "6379", testPass) + require.NoError(err) + if isMaster { + masterPod = &pod + } else { + slavePods = append(slavePods, pod) + } + } + + require.NotNil(masterPod, "Should have a master pod") + require.True(len(slavePods) > 0, "Should have at least one slave pod") + + // Construct expected DNS name for master + serviceName := fmt.Sprintf("rfr-%s", rfName) + masterParts := strings.Split(masterPod.Name, "-") + masterOrdinal := masterParts[len(masterParts)-1] + expectedMasterDNS := fmt.Sprintf("%s-%s.%s.%s.svc.cluster.local", + serviceName, masterOrdinal, serviceName, currentNamespace) + + // Check that slaves are configured to replicate from master + // Note: Redis INFO replication shows resolved IPs, but we can verify the replication works + // and check operator logs would show DNS names being used + for _, slavePod := range slavePods { + slaveOf, err := c.redisClient.GetSlaveOf(slavePod.Status.PodIP, "6379", testPass) + require.NoError(err) + // The slave should be pointing to the master's IP (Redis resolves DNS to IP) + assert.Equal(masterPod.Status.PodIP, slaveOf, "Slave %s should replicate from master %s", slavePod.Name, masterPod.Name) + + // Verify replication is working + slaveClient := rediscli.NewClient(&rediscli.Options{ + Addr: net.JoinHostPort(slavePod.Status.PodIP, "6379"), + Password: testPass, + DB: 0, + }) + defer slaveClient.Close() + + info, err := slaveClient.Info(context.TODO(), "replication").Result() + require.NoError(err) + assert.Contains(info, fmt.Sprintf("master_host:%s", masterPod.Status.PodIP), "Slave should show master IP in INFO replication") + t.Logf("Slave %s is replicating from master %s (DNS: %s, IP: %s)", slavePod.Name, masterPod.Name, expectedMasterDNS, masterPod.Status.PodIP) + } +}