Skip to content
Open
1 change: 1 addition & 0 deletions api/redisfailover/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type RedisSettings struct {
CustomStartupProbe *corev1.Probe `json:"customStartupProbe,omitempty"`
DisablePodDisruptionBudget bool `json:"disablePodDisruptionBudget,omitempty"`
PreventMasterEviction bool `json:"preventMasterEviction,omitempty"`
Headless bool `json:"headless,omitempty"`
Comment thread
prashanna-frsh marked this conversation as resolved.
Outdated
}

// SentinelSettings defines the specification of the sentinel cluster
Expand Down
2 changes: 1 addition & 1 deletion charts/redisoperator/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ appVersion: 1.3.0
apiVersion: v1
description: A Helm chart for the Spotahome Redis Operator
name: redis-operator
version: 3.3.2
version: 3.3.3
home: https://github.com/freshworks/redis-operator
keywords:
- "golang"
Expand Down
1 change: 1 addition & 0 deletions example/redisfailover/basic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ spec:
memory: 100Mi
redis:
replicas: 3
headless: true
resources:
requests:
cpu: 100m
Expand Down
2 changes: 2 additions & 0 deletions manifests/databases.spotahome.com_redisfailovers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6836,6 +6836,8 @@ spec:
type: integer
preventMasterEviction:
type: boolean
headless:
type: boolean
priorityClassName:
type: string
replicas:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6833,6 +6833,8 @@ spec:
type: integer
preventMasterEviction:
type: boolean
headless:
type: boolean
priorityClassName:
type: string
replicas:
Expand Down
31 changes: 31 additions & 0 deletions mocks/operator/redisfailover/service/RedisFailoverCheck.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 14 additions & 2 deletions operator/redisfailover/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (

redisfailoverv1 "github.com/freshworks/redis-operator/api/redisfailover/v1"
"github.com/freshworks/redis-operator/metrics"
rfservice "github.com/freshworks/redis-operator/operator/redisfailover/service"
)

// UpdateRedisesPods if the running version of pods are equal to the statefulset one
Expand Down Expand Up @@ -216,13 +217,24 @@ func (r *RedisFailoverHandler) CheckAndHeal(rf *redisfailoverv1.RedisFailover) e
return err
}

// Get pods to resolve DNS names to IPs for sentinel monitoring
// Sentinel only accepts IP addresses, not DNS names
// We need to get the pods to map DNS names back to IPs
pods, err := r.rfChecker.GetRedisesPods(rf)
if err != nil {
return err
}

// Convert master address (which might be DNS) to IP for sentinel monitoring
masterIP := rfservice.GetPodIPFromAddress(master, rf, pods)

port := getRedisPort(rf.Spec.Redis.Port)
for _, sip := range sentinels {
err = r.rfChecker.CheckSentinelMonitor(sip, rf.MasterName(), master, port)
err = r.rfChecker.CheckSentinelMonitor(sip, rf.MasterName(), masterIP, port)
setRedisCheckerMetrics(r.mClient, "sentinel", rf.Namespace, rf.Name, metrics.SENTINEL_WRONG_MASTER, sip, err)
if err != nil {
r.logger.WithField("redisfailover", rf.ObjectMeta.Name).WithField("namespace", rf.ObjectMeta.Namespace).Warningf("Fixing sentinel not monitoring expected master: %s", err.Error())
if err := r.rfHealer.NewSentinelMonitor(sip, master, rf); err != nil {
if err := r.rfHealer.NewSentinelMonitor(sip, masterIP, rf); err != nil {
return err
}
}
Expand Down
17 changes: 17 additions & 0 deletions operator/redisfailover/checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,23 @@ func TestCheckAndHeal(t *testing.T) {

if allowSentinels && !expErr && continueTests {
mrfc.On("GetSentinelsIPs", rf).Once().Return([]string{sentinel}, nil)
// Create a PodList for DNS resolution (GetRedisesPods is called to resolve DNS names to IPs)
// Only needed in non-bootstrapping mode - bootstrapping mode uses external host directly
if !test.bootstrapping {
podList := &corev1.PodList{
Items: []corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "rfr-redisfailover-0",
},
Status: corev1.PodStatus{
PodIP: master,
},
},
},
}
mrfc.On("GetRedisesPods", rf).Once().Return(podList, nil)
}
if test.sentinelMonitorOK {
if test.bootstrapping {
mrfc.On("CheckSentinelMonitor", sentinel, rf.MasterName(), bootstrapMaster, bootstrapMasterPort).Once().Return(nil)
Expand Down
3 changes: 2 additions & 1 deletion operator/redisfailover/ensurer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (

// Ensure is called to ensure all of the resources associated with a RedisFailover are created
func (w *RedisFailoverHandler) Ensure(rf *redisfailoverv1.RedisFailover, labels map[string]string, or []metav1.OwnerReference, metricsClient metrics.Recorder) error {
if rf.Spec.Redis.Exporter.Enabled {
// Create headless service if headless is enabled OR exporter is enabled
if rf.Spec.Redis.Headless || rf.Spec.Redis.Exporter.Enabled {
if err := w.rfService.EnsureRedisService(rf, labels, or); err != nil {
return err
}
Expand Down
83 changes: 69 additions & 14 deletions operator/redisfailover/service/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"strconv"
"strings"
"time"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -40,6 +41,7 @@ type RedisFailoverCheck interface {
IsRedisRunning(rFailover *redisfailoverv1.RedisFailover) bool
IsSentinelRunning(rFailover *redisfailoverv1.RedisFailover) bool
IsClusterRunning(rFailover *redisfailoverv1.RedisFailover) bool
GetRedisesPods(rFailover *redisfailoverv1.RedisFailover) (*corev1.PodList, error)
}

// RedisFailoverChecker is our implementation of RedisFailoverCheck interface
Expand Down Expand Up @@ -156,7 +158,8 @@ func (r *RedisFailoverChecker) CheckAllSlavesFromMaster(master string, rf *redis

rport := getRedisPort(rf.Spec.Redis.Port)
for _, rp := range rps.Items {
if rp.Status.PodIP == master {
podAddress := GetPodAddress(&rp, rf)
if podAddress == master {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have this is a enum or static variable rather than a dynamic string?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podAddress will be different for different clusters, don't think we'll be able to have it as ENUM or static variable

<statefulset-name>-<ordinal-number>.<service-name>.<namespace>.svc.cluster.local

err = r.setMasterLabelIfNecessary(rf.Namespace, rp)
if err != nil {
return err
Expand All @@ -176,13 +179,27 @@ func (r *RedisFailoverChecker) CheckAllSlavesFromMaster(master string, rf *redis
}
}

slave, err := r.redisClient.GetSlaveOf(rp.Status.PodIP, rport, password)
slave, err := r.redisClient.GetSlaveOf(podAddress, rport, password)
if err != nil {
r.logger.Errorf("Get slave of master failed, maybe this node is not ready, pod ip: %s", rp.Status.PodIP)
r.logger.Errorf("Get slave of master failed, maybe this node is not ready, pod address: %s", podAddress)
Comment thread
prashanna-frsh marked this conversation as resolved.
Outdated
return err
}
// Compare master with what Redis returns
// Redis may return either DNS name or IP depending on how it was configured
if slave != "" && slave != master {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: same as above comment

return fmt.Errorf("slave %s don't have the master %s, has %s", rp.Status.PodIP, master, slave)
// If they don't match directly, try resolving both to IPs for comparison
// (in case one is DNS and the other is IP, but they refer to the same pod)
masterIP := GetPodIPFromAddress(master, rf, rps)
slaveIP := GetPodIPFromAddress(slave, rf, rps)
if slaveIP != masterIP {
return fmt.Errorf("slave %s don't have the master %s, has %s", podAddress, master, slave)
}
// They resolve to the same IP, but formats differ
// If headless is enabled and master is DNS name but slave is IP, prefer DNS name
if rf.Spec.Redis.Headless && strings.Contains(master, ".svc.cluster.local") && !strings.Contains(slave, ".svc.cluster.local") {
// Master is DNS name, slave is IP - reconfigure to use DNS name
return fmt.Errorf("slave %s configured with IP %s but should use DNS name %s for headless mode stability", podAddress, slave, master)
}
}
}
return nil
Expand Down Expand Up @@ -221,12 +238,13 @@ func (r *RedisFailoverChecker) CheckIfMasterLocalhost(rFailover *redisfailoverv1
for _, sip := range redisIps {
master, err := r.redisClient.GetSlaveOf(sip, rport, password)
if err != nil {
r.logger.Warningf("CheckIfMasterLocalhost -- GetSlaveOf Failed")
r.logger.Warningf("CheckIfMasterLocalhost -- GetSlaveOf Failed for address %s", sip)
return false, err
} else if master == "" {
r.logger.Warningf("CheckIfMasterLocalhost -- Master already available ?? check manually")
return false, errors.New("unexpected master state, fix manually")
} else {
// Check if master is localhost (127.0.0.1) - this check works for both IP and DNS
if master == "127.0.0.1" {
lhmaster++
}
Expand Down Expand Up @@ -312,12 +330,22 @@ func (r *RedisFailoverChecker) CheckSentinelMonitor(sentinel, masterName string,
}

// GetMasterIP connects to all redis and returns the master of the redis failover
// When headless is enabled, returns DNS name if the master pod is Ready, otherwise returns IP
func (r *RedisFailoverChecker) GetMasterIP(rf *redisfailoverv1.RedisFailover) (string, error) {
rips, err := r.GetRedisesIPs(rf)
// Get pods first to avoid duplicate calls (GetRedisesIPs also calls GetStatefulSetPods)
pods, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf))
if err != nil {
return "", err
}

// Build list of Redis addresses from pods (same logic as GetRedisesIPs)
rips := []string{}
for _, rp := range pods.Items {
if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running pods
rips = append(rips, GetPodAddress(&rp, rf))
}
}

password, err := k8s.GetRedisPassword(r.k8sService, rf)
if err != nil {
return "", err
Expand All @@ -328,11 +356,30 @@ func (r *RedisFailoverChecker) GetMasterIP(rf *redisfailoverv1.RedisFailover) (s
for _, rip := range rips {
master, err := r.redisClient.IsMaster(rip, rport, password)
if err != nil {
r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod ip: %s", rip)
r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod address: %s", rip)
continue
}
if master {
masters = append(masters, rip)
// If rip is already a DNS name, use it directly
// Otherwise, find the pod and get its DNS name if headless is enabled
if strings.Contains(rip, ".svc.cluster.local") {
masters = append(masters, rip)
} else {
// rip is an IP, find the matching pod and get DNS name if headless is enabled
foundPod := false
for _, pod := range pods.Items {
if pod.Status.PodIP == rip {
masterAddress := GetPodAddress(&pod, rf)
masters = append(masters, masterAddress)
foundPod = true
break
}
}
// If we didn't find a matching pod, use the IP as fallback
if !foundPod {
masters = append(masters, rip)
}
}
}
}

Expand Down Expand Up @@ -361,7 +408,7 @@ func (r *RedisFailoverChecker) GetNumberMasters(rf *redisfailoverv1.RedisFailove
for _, rip := range rips {
master, err := r.redisClient.IsMaster(rip, rport, password)
if err != nil {
r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod ip: %s", rip)
r.logger.Errorf("Get redis info failed, maybe this node is not ready, pod address: %s", rip)
continue
}
if master {
Expand All @@ -371,7 +418,7 @@ func (r *RedisFailoverChecker) GetNumberMasters(rf *redisfailoverv1.RedisFailove
return nMasters, nil
}

// GetRedisesIPs returns the IPs of the Redis nodes
// GetRedisesIPs returns the addresses (IPs or DNS names) of the Redis nodes
func (r *RedisFailoverChecker) GetRedisesIPs(rf *redisfailoverv1.RedisFailover) ([]string, error) {
redises := []string{}
rps, err := r.k8sService.GetStatefulSetPods(rf.Namespace, GetRedisName(rf))
Expand All @@ -380,7 +427,7 @@ func (r *RedisFailoverChecker) GetRedisesIPs(rf *redisfailoverv1.RedisFailover)
}
for _, rp := range rps.Items {
if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running pods
redises = append(redises, rp.Status.PodIP)
redises = append(redises, GetPodAddress(&rp, rf))
}
}
return redises, nil
Expand Down Expand Up @@ -414,7 +461,8 @@ func (r *RedisFailoverChecker) GetMaxRedisPodTime(rf *redisfailoverv1.RedisFailo
}
start := redisNode.Status.StartTime.Round(time.Second)
alive := time.Since(start)
r.logger.Debugf("Pod %s has been alive for %.f seconds", redisNode.Status.PodIP, alive.Seconds())
podAddress := GetPodAddress(&redisNode, rf)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we are doing getPodAddress() call at many places and doing computation each and every time. Is it possible to do it in a base funtion and pass it around?

r.logger.Debugf("Pod %s (address: %s) has been alive for %.f seconds", redisNode.Name, podAddress, alive.Seconds())
if alive > maxTime {
maxTime = alive
}
Expand All @@ -438,7 +486,8 @@ func (r *RedisFailoverChecker) GetRedisesSlavesPods(rf *redisfailoverv1.RedisFai
rport := getRedisPort(rf.Spec.Redis.Port)
for _, rp := range rps.Items {
if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running
master, err := r.redisClient.IsMaster(rp.Status.PodIP, rport, password)
podAddress := GetPodAddress(&rp, rf)
master, err := r.redisClient.IsMaster(podAddress, rport, password)
if err != nil {
return []string{}, err
}
Expand All @@ -465,7 +514,8 @@ func (r *RedisFailoverChecker) GetRedisesMasterPod(rFailover *redisfailoverv1.Re
rport := getRedisPort(rFailover.Spec.Redis.Port)
for _, rp := range rps.Items {
if rp.Status.Phase == corev1.PodRunning && rp.DeletionTimestamp == nil { // Only work with running
master, err := r.redisClient.IsMaster(rp.Status.PodIP, rport, password)
podAddress := GetPodAddress(&rp, rFailover)
Comment thread
prashanna-frsh marked this conversation as resolved.
master, err := r.redisClient.IsMaster(podAddress, rport, password)
if err != nil {
return "", err
}
Expand Down Expand Up @@ -540,6 +590,11 @@ func (r *RedisFailoverChecker) IsClusterRunning(rFailover *redisfailoverv1.Redis
return r.IsSentinelRunning(rFailover) && r.IsRedisRunning(rFailover)
}

// GetRedisesPods returns the PodList of Redis pods
func (r *RedisFailoverChecker) GetRedisesPods(rFailover *redisfailoverv1.RedisFailover) (*corev1.PodList, error) {
return r.k8sService.GetStatefulSetPods(rFailover.Namespace, GetRedisName(rFailover))
}

func getRedisPort(p int32) string {
return strconv.Itoa(int(p))
}
Expand Down
Loading
Loading