Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions example-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ default_alert_config:
# Severity threshold defines the minimum severity level at which the alerts are sent to this channel
severity_threshold: info

webhook:
# Send alerts to a generic webhook endpoint?
# The payload follows a Grafana-like format for broad compatibility
enabled: no
# The URL to send the webhook POST request to
url: https://your-webhook-endpoint.example.com/alerts
# Severity threshold defines the minimum severity level at which the alerts are sent to this channel
severity_threshold: info

# Alert defaults shared by all chains
# If the chain stops seeing new blocks, should an alert be sent?
stalled_enabled: yes
Expand Down Expand Up @@ -166,6 +175,10 @@ chains:
# an example for disabling the pagerduty alert channel, which is enabled by default
pagerduty:
enabled: no
# an example for overriding the webhook URL for this specific chain
# you can also disable the webhook for this chain by setting enabled: no
webhook:
url: https://chain-specific-webhook.example.com/osmosis-alerts

# This section covers our RPC providers. No LCD (aka REST) endpoints are used, only TM's RPC endpoints
# Multiple hosts are encouraged, and will be tried sequentially until a working endpoint is discovered.
Expand Down
106 changes: 106 additions & 0 deletions td2/alert.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type alertMsg struct {
disc bool
tg bool
slk bool
wh bool

severity string
resolved bool
Expand All @@ -43,6 +44,8 @@ type alertMsg struct {
slkHook string
slkMentions string

whURL string

alertConfig *AlertConfig
}

Expand All @@ -53,6 +56,7 @@ const (
tg
di
slk
wh
)

type alertMsgCache struct {
Expand All @@ -67,6 +71,7 @@ type alarmCache struct {
SentTgAlarms map[string]alertMsgCache `json:"sent_tg_alarms"`
SentDiAlarms map[string]alertMsgCache `json:"sent_di_alarms"`
SentSlkAlarms map[string]alertMsgCache `json:"sent_slk_alarms"`
SentWHAlarms map[string]alertMsgCache `json:"sent_wh_alarms"`
AllAlarms map[string]map[string]alertMsgCache `json:"sent_all_alarms"`
flappingAlarms map[string]map[string]alertMsgCache
notifyMux sync.RWMutex
Expand Down Expand Up @@ -125,6 +130,7 @@ var alarms = &alarmCache{
SentTgAlarms: make(map[string]alertMsgCache),
SentDiAlarms: make(map[string]alertMsgCache),
SentSlkAlarms: make(map[string]alertMsgCache),
SentWHAlarms: make(map[string]alertMsgCache),
AllAlarms: make(map[string]map[string]alertMsgCache),
flappingAlarms: make(map[string]map[string]alertMsgCache),
notifyMux: sync.RWMutex{},
Expand Down Expand Up @@ -160,6 +166,12 @@ func shouldNotify(msg *alertMsg, dest notifyDest) bool {
}
whichMap = alarms.SentSlkAlarms
service = "Slack"
case wh:
if !slices.Contains(SeverityThresholdToSeverities(msg.alertConfig.Webhook.SeverityThreshold), msg.severity) {
return false
}
whichMap = alarms.SentWHAlarms
service = "Webhook"
}

switch {
Expand Down Expand Up @@ -397,6 +409,98 @@ func notifyPagerduty(msg *alertMsg) (err error) {
return
}

// WebhookPayload represents the payload sent to a generic webhook endpoint
// The structure is inspired by Grafana's webhook contact point format
type WebhookPayload struct {
Status string `json:"status"`
Alerts []WebhookAlert `json:"alerts"`
Version string `json:"version"`
GroupKey string `json:"groupKey"`
}

// WebhookAlert represents a single alert in the webhook payload
type WebhookAlert struct {
Status string `json:"status"`
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
StartsAt string `json:"startsAt"`
EndsAt string `json:"endsAt"`
Fingerprint string `json:"fingerprint"`
GeneratorURL string `json:"generatorURL,omitempty"`
}

func notifyWebhook(msg *alertMsg) (err error) {
if !msg.wh {
return nil
}
if !shouldNotify(msg, wh) {
return nil
}

status := "firing"
if msg.resolved {
status = "resolved"
}

now := time.Now().UTC().Format(time.RFC3339)
endsAt := "0001-01-01T00:00:00Z"
if msg.resolved {
endsAt = now
}

alert := WebhookAlert{
Status: status,
Labels: map[string]string{
"alertname": msg.uniqueId,
"chain": msg.chain,
"severity": msg.severity,
"source": "tenderduty",
},
Annotations: map[string]string{
"summary": msg.message,
"description": msg.message,
},
StartsAt: now,
EndsAt: endsAt,
Fingerprint: msg.uniqueId,
}

payload := WebhookPayload{
Status: status,
Alerts: []WebhookAlert{alert},
Version: "1",
GroupKey: msg.chain,
}

data, err := json.Marshal(payload)
if err != nil {
l("⚠️ Could not marshal webhook payload!", err)
return err
}

req, err := http.NewRequest("POST", msg.whURL, bytes.NewBuffer(data))
if err != nil {
l("⚠️ Could not create webhook request!", err)
return err
}
req.Header.Set("Content-Type", "application/json")

client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
l("⚠️ Could not send webhook!", err)
return err
}
_ = resp.Body.Close()

if resp.StatusCode < 200 || resp.StatusCode >= 300 {
l("⚠️ Webhook returned non-success status:", resp.StatusCode)
return fmt.Errorf("webhook returned status %d for %s", resp.StatusCode, msg.chain)
}

return nil
}

func getAlarms(chain string) string {
alarms.notifyMux.RLock()
defer alarms.notifyMux.RUnlock()
Expand All @@ -422,6 +526,7 @@ func (c *Config) alert(chainName, message, severity string, resolved bool, id *s
disc: boolVal(c.DefaultAlertConfig.Discord.Enabled) && boolVal(c.Chains[chainName].Alerts.Discord.Enabled),
tg: boolVal(c.DefaultAlertConfig.Telegram.Enabled) && boolVal(c.Chains[chainName].Alerts.Telegram.Enabled),
slk: boolVal(c.DefaultAlertConfig.Slack.Enabled) && boolVal(c.Chains[chainName].Alerts.Slack.Enabled),
wh: boolVal(c.DefaultAlertConfig.Webhook.Enabled) && boolVal(c.Chains[chainName].Alerts.Webhook.Enabled),
severity: severity,
resolved: resolved,
chain: fmt.Sprintf("%s (%s)", chainName, c.Chains[chainName].ChainId),
Expand All @@ -434,6 +539,7 @@ func (c *Config) alert(chainName, message, severity string, resolved bool, id *s
discHook: c.Chains[chainName].Alerts.Discord.Webhook,
discMentions: strings.Join(c.Chains[chainName].Alerts.Discord.Mentions, " "),
slkHook: c.Chains[chainName].Alerts.Slack.Webhook,
whURL: c.Chains[chainName].Alerts.Webhook.URL,
alertConfig: &c.Chains[chainName].Alerts,
}
c.alertChan <- a
Expand Down
111 changes: 111 additions & 0 deletions td2/alert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ func createTestConfig() *Config {
Slack: SlackConfig{
Enabled: &falseBool,
},
Webhook: WebhookConfig{
Enabled: &falseBool,
},
},
},
},
Expand All @@ -51,6 +54,9 @@ func createTestConfig() *Config {
Slack: SlackConfig{
Enabled: &falseBool,
},
Webhook: WebhookConfig{
Enabled: &falseBool,
},
},
alertChan: make(chan *alertMsg, 10),
}
Expand Down Expand Up @@ -174,6 +180,7 @@ func TestShouldNotify(t *testing.T) {
SentTgAlarms: make(map[string]alertMsgCache),
SentDiAlarms: make(map[string]alertMsgCache),
SentSlkAlarms: make(map[string]alertMsgCache),
SentWHAlarms: make(map[string]alertMsgCache),
AllAlarms: make(map[string]map[string]alertMsgCache),
flappingAlarms: make(map[string]map[string]alertMsgCache),
notifyMux: sync.RWMutex{},
Expand Down Expand Up @@ -270,6 +277,7 @@ func TestShouldNotify(t *testing.T) {
testAlarms.SentTgAlarms = make(map[string]alertMsgCache)
testAlarms.SentDiAlarms = make(map[string]alertMsgCache)
testAlarms.SentSlkAlarms = make(map[string]alertMsgCache)
testAlarms.SentWHAlarms = make(map[string]alertMsgCache)
testAlarms.flappingAlarms = make(map[string]map[string]alertMsgCache)

tt.setupAlarms()
Expand Down Expand Up @@ -452,6 +460,96 @@ func TestNotifySlack(t *testing.T) {
}
}

func TestNotifyWebhook(t *testing.T) {
tests := []struct {
name string
msg *alertMsg
serverResponse int
expectError bool
}{
{
name: "successful notification",
msg: &alertMsg{
wh: true,
chain: "test-chain",
message: "test message",
severity: "critical",
uniqueId: "test_alert_1",
resolved: false,
whURL: "", // will be set to test server URL
alertConfig: &AlertConfig{
Webhook: WebhookConfig{SeverityThreshold: "info"},
},
},
serverResponse: 200,
expectError: false,
},
{
name: "server error",
msg: &alertMsg{
wh: true,
chain: "test-chain",
message: "test message",
severity: "critical",
uniqueId: "test_alert_2",
resolved: false,
whURL: "", // will be set to test server URL
alertConfig: &AlertConfig{
Webhook: WebhookConfig{SeverityThreshold: "info"},
},
},
serverResponse: 500,
expectError: true,
},
{
name: "webhook disabled",
msg: &alertMsg{
wh: false,
},
expectError: false,
},
}

// Setup test alarm cache
testAlarms := &alarmCache{
SentPdAlarms: make(map[string]alertMsgCache),
SentTgAlarms: make(map[string]alertMsgCache),
SentDiAlarms: make(map[string]alertMsgCache),
SentSlkAlarms: make(map[string]alertMsgCache),
SentWHAlarms: make(map[string]alertMsgCache),
AllAlarms: make(map[string]map[string]alertMsgCache),
flappingAlarms: make(map[string]map[string]alertMsgCache),
notifyMux: sync.RWMutex{},
}
originalAlarms := alarms
alarms = testAlarms
defer func() { alarms = originalAlarms }()

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Reset alarms for each test
testAlarms.SentWHAlarms = make(map[string]alertMsgCache)

if tt.msg.wh {
// Create test server
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(tt.serverResponse)
}))
defer server.Close()
tt.msg.whURL = server.URL
}

err := notifyWebhook(tt.msg)
if tt.expectError && err == nil {
t.Errorf("Expected error but got none")
}
if !tt.expectError && err != nil {
t.Errorf("Expected no error but got: %v", err)
}
})
}
}

func TestConfigAlert(t *testing.T) {
// Create test config
config := &Config{
Expand All @@ -477,6 +575,10 @@ func TestConfigAlert(t *testing.T) {
Slack: SlackConfig{
Enabled: &[]bool{false}[0],
},
Webhook: WebhookConfig{
Enabled: &[]bool{true}[0],
URL: "https://test-webhook.example.com",
},
},
},
},
Expand All @@ -493,6 +595,9 @@ func TestConfigAlert(t *testing.T) {
Slack: SlackConfig{
Enabled: &[]bool{true}[0],
},
Webhook: WebhookConfig{
Enabled: &[]bool{true}[0],
},
},
}

Expand Down Expand Up @@ -523,6 +628,12 @@ func TestConfigAlert(t *testing.T) {
if alertMsg.slk != false {
t.Errorf("Expected slack to be disabled")
}
if alertMsg.wh != true {
t.Errorf("Expected webhook to be enabled")
}
if alertMsg.whURL != "https://test-webhook.example.com" {
t.Errorf("Expected webhook URL 'https://test-webhook.example.com', got '%s'", alertMsg.whURL)
}
case <-time.After(time.Second):
t.Error("Alert was not sent to channel")
}
Expand Down
4 changes: 4 additions & 0 deletions td2/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ func Run(configFile, stateFile, chainConfigDirectory string, password *string, d
if e != nil {
l(msg.chain, "error sending alert to slack", e.Error())
}
e = notifyWebhook(msg)
if e != nil {
l(msg.chain, "error sending alert to webhook", e.Error())
}
}(alert)
case <-td.ctx.Done():
return
Expand Down
Loading
Loading