Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new states for MariaDB Jira Blocker and Critical issues #704

Draft
wants to merge 27 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
48ebe45
add new alert for replication check
caffeinated92 Jul 9, 2024
8380ca3
replication check blocker
caffeinated92 Jul 9, 2024
7f99daf
Merge remote-tracking branch 'origin/develop' into alert
caffeinated92 Jul 11, 2024
cd3e503
mdev csv to json
caffeinated92 Jul 11, 2024
1253bd2
Merge remote-tracking branch 'origin/develop' into mdev
caffeinated92 Jul 13, 2024
9dc6ab0
parse and load csv to JSON
caffeinated92 Jul 13, 2024
bea3850
fix writing to JSON and add verbose option
caffeinated92 Jul 15, 2024
a5c3396
rename alerts.csv to mdev.csv
caffeinated92 Jul 15, 2024
ec96c9e
Init MDEV Issues
caffeinated92 Jul 15, 2024
efa83b9
populate mdev issues to ServerMonitor
caffeinated92 Jul 15, 2024
fbe37d1
Add state for MDEV
caffeinated92 Jul 15, 2024
e04d508
split bug to replication and service
caffeinated92 Jul 15, 2024
0dd96d0
fix trimmed code
caffeinated92 Jul 15, 2024
04a920d
use 0 than space
caffeinated92 Jul 15, 2024
c8d09f4
Merge remote-tracking branch 'origin/develop' into alert
caffeinated92 Jul 15, 2024
f4e2084
use MDEV as prefix for better readability
caffeinated92 Jul 15, 2024
2311238
Use MDEVIssues for checkblockerstate function
caffeinated92 Jul 15, 2024
937db5b
set default to false for compatibility with previous version
caffeinated92 Jul 15, 2024
631d6f7
Allow different server url send same ERR state
caffeinated92 Jul 16, 2024
358b4c6
per server state open/resolve
caffeinated92 Jul 16, 2024
4e246e6
set as comma separated string for serverURL
caffeinated92 Jul 16, 2024
b0af1f8
Merge remote-tracking branch 'origin/develop' into alert
caffeinated92 Jul 16, 2024
17a288b
log state mdev
caffeinated92 Jul 16, 2024
766720c
add failovercheckblocker
caffeinated92 Jul 16, 2024
0fd6220
enable check blocker as default
caffeinated92 Jul 16, 2024
779e724
Merge remote-tracking branch 'origin/develop' into alert
caffeinated92 Jul 16, 2024
c648b0f
Merge remote-tracking branch 'origin/develop' into alert
caffeinated92 Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ type Cluster struct {
crcTable *crc64.Table
SlavesOldestMasterFile SlavesOldestMasterFile
SlavesConnected int
clog *clog.Logger `json:"-"`
clog *clog.Logger `json:"-"`
MDevIssues *config.MDevIssueMap `json:"-"`
*ClusterGraphite
}

Expand Down Expand Up @@ -321,6 +322,7 @@ func (cluster *Cluster) Init(confs *config.ConfVersion, cfgGroup string, tlog *s
cluster.runUUID = runUUID
cluster.repmgrHostname = repmgrHostname
cluster.repmgrVersion = repmgrVersion
cluster.MDevIssues = config.NewMDevIssueMap()

cluster.InitFromConf()
cluster.NewClusterGraphite()
Expand Down
30 changes: 28 additions & 2 deletions cluster/cluster_chk.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ func (cluster *Cluster) isSlaveElectableForSwitchover(sl *ServerMonitor, forcing
// }
return false
}

// If cluster have bug in replication
if !cluster.runOnceAfterTopology && cluster.Conf.FailoverCheckBlocker && !cluster.CheckBlockerState(sl, forcingLog) {
return false
}

if cluster.Conf.SwitchGtidCheck && cluster.IsCurrentGTIDSync(sl, cluster.master) == false && cluster.Conf.RplChecks == true {
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModGeneral, config.LvlWarn, "Equal-GTID option is enabled and GTID position on slave %s differs from master. Skipping", sl.URL)
Expand Down Expand Up @@ -201,7 +207,7 @@ func (cluster *Cluster) isOneSlaveHeartbeatIncreasing() bool {
cluster.LogModulePrintf(cluster.Conf.Verbose, config.ConstLogModGeneral, config.LvlDbg, "SLAVE_RECEIVED_HEARTBEATS %d", status2["SLAVE_RECEIVED_HEARTBEATS"])
// }
if status2["SLAVE_RECEIVED_HEARTBEATS"] > saveheartbeats {
cluster.SetState("ERR00028", state.State{ErrType: config.LvlErr, ErrDesc: fmt.Sprintf(clusterError["ERR00028"], s.URL), ErrFrom: "CHECK"})
cluster.SetState("ERR00028", state.State{ErrType: config.LvlErr, ErrDesc: clusterError["ERR00028"], ErrFrom: "CHECK", ServerUrl: s.URL})
return true
}
}
Expand Down Expand Up @@ -637,7 +643,7 @@ func (cluster *Cluster) CheckTableChecksum(schema string, table string) {
if slaveSeq >= masterSeq {
break
} else {
cluster.SetState("WARN0086", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0086"], s.URL), ErrFrom: "MON", ServerUrl: s.URL})
cluster.SetState("WARN0086", state.State{ErrType: "WARNING", ErrDesc: clusterError["WARN0086"], ErrFrom: "MON", ServerUrl: s.URL})
}
time.Sleep(1 * time.Second)
}
Expand Down Expand Up @@ -856,3 +862,23 @@ func (cluster *Cluster) CheckDefaultUser(i bool) {
cluster.SetState("WARN0108", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["WARN0108"], out), ErrFrom: "CLUSTER"})
}
}

// This will check replication from bug, return true if valid or not activated
func (cluster *Cluster) CheckBlockerState(sl *ServerMonitor, forcingLog bool) bool {
// If regression check disabled
if !cluster.Conf.FailoverCheckBlocker {
return true
}

blockers := []string{
"MDEV-28310",
}

for _, mdev := range blockers {
if sl.MDevIssues.HasMdevBug(mdev) {
return false
}
}

return true
}
53 changes: 29 additions & 24 deletions cluster/cluster_fail.go
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ func (cluster *Cluster) electSwitchoverGroupReplicationCandidate(l []*ServerMoni
// Return one not ignored not full , not prefered
for i, sl := range l {
if sl.IsIgnored() {
cluster.SetState("ERR00037", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00037"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00037", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00037"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
if cluster.IsInPreferedHosts(sl) {
Expand Down Expand Up @@ -671,35 +671,35 @@ func (cluster *Cluster) electSwitchoverCandidate(l []*ServerMonitor, forcingLog

/* If server is in the ignore list, do not elect it in switchover */
if sl.IsIgnored() {
cluster.SetState("ERR00037", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00037"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00037", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00037"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
if sl.IsFull {
continue
}
//Need comment//
if sl.IsRelay {
cluster.SetState("ERR00036", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00036"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00036", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00036"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
if !sl.HasBinlog() && !sl.IsIgnored() {
cluster.SetState("ERR00013", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00013"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00013", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00013"], ErrFrom: "CHECK", ServerUrl: sl.URL})
continue
}
if cluster.Conf.MultiMaster == true && sl.State == stateMaster {
cluster.SetState("ERR00035", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00035"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00035", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00035"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}

// The tests below should run only in case of a switchover as they require the master to be up.

if cluster.isSlaveElectableForSwitchover(sl, forcingLog) == false {
cluster.SetState("ERR00034", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00034"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00034", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00034"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
/* binlog + ping */
if cluster.isSlaveElectable(sl, forcingLog) == false {
cluster.SetState("ERR00039", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00039"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00039", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00039"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}

Expand All @@ -712,14 +712,14 @@ func (cluster *Cluster) electSwitchoverCandidate(l []*ServerMonitor, forcingLog
return i
}
if sl.HaveNoMasterOnStart == true && cluster.Conf.FailRestartUnsafe == false {
cluster.SetState("ERR00084", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00084"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00084", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00084"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
ss, errss := sl.GetSlaveStatus(sl.ReplicationSourceName)
// not a slave
if errss != nil && cluster.Conf.FailRestartUnsafe == false {
//Skip slave in election %s have no master log file, slave might have failed
cluster.SetState("ERR00033", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00033"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00033", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00033"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
// Fake position if none as new slave
Expand Down Expand Up @@ -815,23 +815,23 @@ func (cluster *Cluster) electFailoverCandidate(l []*ServerMonitor, forcingLog bo

//Need comment//
if sl.IsRelay {
cluster.SetState("ERR00036", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00036"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00036", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00036"], ErrFrom: "CHECK", ServerUrl: sl.URL})
continue
}
if sl.IsFull {
continue
}
if cluster.Conf.MultiMaster == true && sl.State == stateMaster {
cluster.SetState("ERR00035", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00035"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00035", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00035"], ErrFrom: "CHECK", ServerUrl: sl.URL})
trackposList[i].Ignoredmultimaster = true
continue
}
if sl.HaveNoMasterOnStart == true && cluster.Conf.FailRestartUnsafe == false {
cluster.SetState("ERR00084", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00084"], sl.URL), ServerUrl: sl.URL, ErrFrom: "CHECK"})
cluster.SetState("ERR00084", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00084"], ServerUrl: sl.URL, ErrFrom: "CHECK"})
continue
}
if !sl.HasBinlog() && !sl.IsIgnored() {
cluster.SetState("ERR00013", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00013"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00013", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00013"], ErrFrom: "CHECK", ServerUrl: sl.URL})
continue
}
if cluster.GetTopology() == topoMultiMasterWsrep && cluster.vmaster != nil {
Expand All @@ -850,7 +850,7 @@ func (cluster *Cluster) electFailoverCandidate(l []*ServerMonitor, forcingLog bo
ss, errss := sl.GetSlaveStatus(sl.ReplicationSourceName)
// not a slave
if errss != nil && cluster.Conf.FailRestartUnsafe == false {
cluster.SetState("ERR00033", state.State{ErrType: config.LvlWarn, ErrDesc: fmt.Sprintf(clusterError["ERR00033"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00033", state.State{ErrType: config.LvlWarn, ErrDesc: clusterError["ERR00033"], ErrFrom: "CHECK", ServerUrl: sl.URL})
trackposList[i].Ignoredreplication = true
continue
}
Expand Down Expand Up @@ -1028,7 +1028,7 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo
}
//if master is alived and IO Thread stops then not a good candidate and not forced
if ss.SlaveIORunning.String == "No" && cluster.Conf.RplChecks && !cluster.IsMasterFailed() {
cluster.SetState("ERR00087", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00087"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00087", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00087"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Unsafe failover condition. Slave %s IO Thread is stopped %s. Skipping", sl.URL, ss.LastIOError.String)
// }
Expand All @@ -1037,14 +1037,14 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo

/* binlog + ping */
if dbhelper.CheckSlavePrerequisites(sl.Conn, sl.Host, sl.DBVersion) == false {
cluster.SetState("ERR00040", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00040"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00040", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00040"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Slave %s does not ping or has no binlogs. Skipping", sl.URL)
// }
return false
}
if sl.IsMaintenance {
cluster.SetState("ERR00047", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00047"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00047", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00047"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Slave %s is in maintenance. Skipping", sl.URL)
// }
Expand All @@ -1061,24 +1061,29 @@ func (cluster *Cluster) isSlaveElectable(sl *ServerMonitor, forcingLog bool) boo
}

if ss.SlaveSQLRunning.String == "No" && cluster.Conf.RplChecks {
cluster.SetState("ERR00042", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00042"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00042", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00042"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Unsafe failover condition. Slave %s SQL Thread is stopped. Skipping", sl.URL)
// }
return false
}

// If cluster have bug in replication
if !cluster.runOnceAfterTopology && cluster.Conf.FailoverCheckBlocker && !cluster.CheckBlockerState(sl, forcingLog) {
return false
}

//if master is alived and connection issues, we have to refetch password from vault
if ss.SlaveIORunning.String == "Connecting" && !cluster.IsMasterFailed() {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlDbg, "isSlaveElect lastIOErrno: %s", ss.LastIOErrno.String)
if ss.LastIOErrno.String == "1045" {
cluster.SetState("ERR00088", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00088"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00088", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00088"], ErrFrom: "CHECK", ServerUrl: sl.URL})
sl.SetReplicationCredentialsRotation(ss)
}
}

if sl.HaveSemiSync && sl.SemiSyncSlaveStatus == false && cluster.Conf.FailSync && cluster.Conf.RplChecks {
cluster.SetState("ERR00043", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00043"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00043", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00043"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Semi-sync slave %s is out of sync. Skipping", sl.URL)
// }
Expand All @@ -1101,7 +1106,7 @@ func (cluster *Cluster) isSlaveValidReader(sl *ServerMonitor, forcingLog bool) b
}

if sl.IsMaintenance {
cluster.SetState("ERR00047", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00047"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00047", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00047"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Slave %s is in maintenance. Skipping", sl.URL)
// }
Expand All @@ -1117,15 +1122,15 @@ func (cluster *Cluster) isSlaveValidReader(sl *ServerMonitor, forcingLog bool) b
return false
}
if sl.HaveSemiSync && sl.SemiSyncSlaveStatus == false && cluster.Conf.FailSync && cluster.Conf.RplChecks {
cluster.SetState("ERR00043", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00043"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00043", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00043"], ErrFrom: "CHECK", ServerUrl: sl.URL})
if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModGeneral,LvlWarn, "Semi-sync slave %s is out of sync. Skipping", sl.URL)
}
return false
}
*/
if ss.SlaveSQLRunning.String == "No" {
cluster.SetState("ERR00042", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00042"], sl.URL), ErrFrom: "CHECK", ServerUrl: sl.URL})
cluster.SetState("ERR00042", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00042"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModWriterElection, config.LvlWarn, "Unsafe failover condition. Slave %s SQL Thread is stopped. Skipping", sl.URL)
// }
Expand Down Expand Up @@ -1383,7 +1388,7 @@ func (cluster *Cluster) electVirtualCandidate(oldMaster *ServerMonitor, forcingL
for i, sl := range cluster.Servers {
/* If server is in the ignore list, do not elect it */
if sl.IsIgnored() {
cluster.SetState("ERR00037", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00037"], sl.URL), ErrFrom: "CHECK"})
cluster.SetState("ERR00037", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00037"], ErrFrom: "CHECK", ServerUrl: sl.URL})
// if cluster.Conf.LogLevel > 1 || forcingLog {
cluster.LogModulePrintf(forcingLog, config.ConstLogModGeneral, config.LvlDbg, "%s is in the ignore list. Skipping", sl.URL)
// }
Expand Down
17 changes: 14 additions & 3 deletions cluster/cluster_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,14 +459,21 @@ func (cluster *Cluster) LogPrintState(st state.State, resolved bool) int {

tag := config.GetTagsForLog(config.ConstLogModGeneral)
cliformat := format
format = "[" + cluster.Name + "] [" + tag + "] " + padright(level, " ", 5) + " - " + format
format = "[" + cluster.Name + "][" + tag + "] " + padright(level, " ", 5) + " - " + format
if st.ServerUrl != "" {
format = format + " [" + st.ServerUrl + "]"
}

if cluster.tlog != nil && cluster.tlog.Len > 0 {
cluster.tlog.Add(format)
}

if cluster.Conf.HttpServ {

httpformat := fmt.Sprintf("[%s] %s", tag, cliformat)
if st.ServerUrl != "" {
httpformat = fmt.Sprintf("[%s] %s. Servers: [%s]", tag, cliformat, st.ServerUrl)
}
msg := s18log.HttpMessage{
Group: cluster.Name,
Level: level,
Expand All @@ -478,11 +485,15 @@ func (cluster *Cluster) LogPrintState(st state.State, resolved bool) int {
}

if cluster.Conf.Daemon {
sURL := "none"
if st.ServerUrl != "" {
sURL = st.ServerUrl
}
// wrap logrus levels
if resolved {
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "state", "status": "RESOLV", "code": st.ErrKey, "channel": "StdOut"}).Warnf(st.ErrDesc)
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "state", "status": "RESOLV", "code": st.ErrKey, "channel": "StdOut", "server": sURL}).Warnf(st.ErrDesc)
} else {
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "state", "status": "OPENED", "code": st.ErrKey, "channel": "StdOut"}).Warnf(st.ErrDesc)
log.WithFields(log.Fields{"cluster": cluster.Name, "type": "state", "status": "OPENED", "code": st.ErrKey, "channel": "StdOut", "server": sURL}).Warnf(st.ErrDesc)
}

if cluster.Conf.TeamsUrl != "" && cluster.Conf.TeamsAlertState != "" {
Expand Down
6 changes: 6 additions & 0 deletions cluster/cluster_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -1767,6 +1767,12 @@ func (cluster *Cluster) SetMonitorCaptureTrigger(value string) {
cluster.Unlock()
}

func (cluster *Cluster) SetMDevList(value *config.MDevIssueMap) {
cluster.Lock()
cluster.MDevIssues = value
cluster.Unlock()
}

func (cluster *Cluster) SetMasterNil() {
cluster.master = nil
}
4 changes: 4 additions & 0 deletions cluster/cluster_tgl.go
Original file line number Diff line number Diff line change
Expand Up @@ -609,3 +609,7 @@ func (cluster *Cluster) SwitchDynamicTopology() {
func (cluster *Cluster) SwitchReplicationNoRelay() {
cluster.Conf.ReplicationNoRelay = !cluster.Conf.ReplicationNoRelay
}

func (cluster *Cluster) SwitchFailoverCheckBlocker() {
cluster.Conf.FailoverCheckBlocker = !cluster.Conf.FailoverCheckBlocker
}
4 changes: 2 additions & 2 deletions cluster/cluster_topo.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error {
if sl.HasCycling() {
hasCycling = true
if cluster.Conf.MultiMaster == false && len(cluster.Servers) == 2 {
cluster.SetState("ERR00011", state.State{ErrType: "WARNING", ErrDesc: fmt.Sprintf(clusterError["ERR00011"]), ErrFrom: "TOPO", ServerUrl: sl.URL})
cluster.SetState("ERR00011", state.State{ErrType: "WARNING", ErrDesc: clusterError["ERR00011"], ErrFrom: "TOPO", ServerUrl: sl.URL})
// if cluster.Conf.DynamicTopology {
cluster.Conf.MultiMaster = true
cluster.Topology = topoMultiMaster
Expand Down Expand Up @@ -435,7 +435,7 @@ func (cluster *Cluster) TopologyDiscover(wcg *sync.WaitGroup) error {
if cluster.master == nil {
// could not detect master
if cluster.GetMaster() == nil {
cluster.SetState("ERR00012", state.State{ErrType: "ERROR", ErrDesc: fmt.Sprintf(clusterError["ERR00012"]), ErrFrom: "TOPO"})
cluster.SetState("ERR00012", state.State{ErrType: "ERROR", ErrDesc: clusterError["ERR00012"], ErrFrom: "TOPO"})
}
} else {
cluster.master.HaveHealthyReplica = false
Expand Down
Loading