diff --git a/example-config.yml b/example-config.yml index aa3c440..4371962 100644 --- a/example-config.yml +++ b/example-config.yml @@ -67,6 +67,15 @@ default_alert_config: # Severity threshold defines the minimum severity level at which the alerts are sent to this channel severity_threshold: info + webhook: + # Send alerts to a generic webhook endpoint? + # The payload follows a Grafana-like format for broad compatibility + enabled: no + # The URL to send the webhook POST request to + url: https://your-webhook-endpoint.example.com/alerts + # Severity threshold defines the minimum severity level at which the alerts are sent to this channel + severity_threshold: info + # Alert defaults shared by all chains # If the chain stops seeing new blocks, should an alert be sent? stalled_enabled: yes @@ -166,6 +175,10 @@ chains: # an example for disabling the pagerduty alert channel, which is enabled by default pagerduty: enabled: no + # an example for overriding the webhook URL for this specific chain + # you can also disable the webhook for this chain by setting enabled: no + webhook: + url: https://chain-specific-webhook.example.com/osmosis-alerts # This section covers our RPC providers. No LCD (aka REST) endpoints are used, only TM's RPC endpoints # Multiple hosts are encouraged, and will be tried sequentially until a working endpoint is discovered. diff --git a/td2/alert.go b/td2/alert.go index 4cc9d32..b3fd7ab 100644 --- a/td2/alert.go +++ b/td2/alert.go @@ -25,6 +25,7 @@ type alertMsg struct { disc bool tg bool slk bool + wh bool severity string resolved bool @@ -43,6 +44,8 @@ type alertMsg struct { slkHook string slkMentions string + whURL string + alertConfig *AlertConfig } @@ -53,6 +56,7 @@ const ( tg di slk + wh ) type alertMsgCache struct { @@ -67,6 +71,7 @@ type alarmCache struct { SentTgAlarms map[string]alertMsgCache `json:"sent_tg_alarms"` SentDiAlarms map[string]alertMsgCache `json:"sent_di_alarms"` SentSlkAlarms map[string]alertMsgCache `json:"sent_slk_alarms"` + SentWHAlarms map[string]alertMsgCache `json:"sent_wh_alarms"` AllAlarms map[string]map[string]alertMsgCache `json:"sent_all_alarms"` flappingAlarms map[string]map[string]alertMsgCache notifyMux sync.RWMutex @@ -125,6 +130,7 @@ var alarms = &alarmCache{ SentTgAlarms: make(map[string]alertMsgCache), SentDiAlarms: make(map[string]alertMsgCache), SentSlkAlarms: make(map[string]alertMsgCache), + SentWHAlarms: make(map[string]alertMsgCache), AllAlarms: make(map[string]map[string]alertMsgCache), flappingAlarms: make(map[string]map[string]alertMsgCache), notifyMux: sync.RWMutex{}, @@ -160,6 +166,12 @@ func shouldNotify(msg *alertMsg, dest notifyDest) bool { } whichMap = alarms.SentSlkAlarms service = "Slack" + case wh: + if !slices.Contains(SeverityThresholdToSeverities(msg.alertConfig.Webhook.SeverityThreshold), msg.severity) { + return false + } + whichMap = alarms.SentWHAlarms + service = "Webhook" } switch { @@ -397,6 +409,98 @@ func notifyPagerduty(msg *alertMsg) (err error) { return } +// WebhookPayload represents the payload sent to a generic webhook endpoint +// The structure is inspired by Grafana's webhook contact point format +type WebhookPayload struct { + Status string `json:"status"` + Alerts []WebhookAlert `json:"alerts"` + Version string `json:"version"` + GroupKey string `json:"groupKey"` +} + +// WebhookAlert represents a single alert in the webhook payload +type WebhookAlert struct { + Status string `json:"status"` + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + StartsAt string `json:"startsAt"` + EndsAt string `json:"endsAt"` + Fingerprint string `json:"fingerprint"` + GeneratorURL string `json:"generatorURL,omitempty"` +} + +func notifyWebhook(msg *alertMsg) (err error) { + if !msg.wh { + return nil + } + if !shouldNotify(msg, wh) { + return nil + } + + status := "firing" + if msg.resolved { + status = "resolved" + } + + now := time.Now().UTC().Format(time.RFC3339) + endsAt := "0001-01-01T00:00:00Z" + if msg.resolved { + endsAt = now + } + + alert := WebhookAlert{ + Status: status, + Labels: map[string]string{ + "alertname": msg.uniqueId, + "chain": msg.chain, + "severity": msg.severity, + "source": "tenderduty", + }, + Annotations: map[string]string{ + "summary": msg.message, + "description": msg.message, + }, + StartsAt: now, + EndsAt: endsAt, + Fingerprint: msg.uniqueId, + } + + payload := WebhookPayload{ + Status: status, + Alerts: []WebhookAlert{alert}, + Version: "1", + GroupKey: msg.chain, + } + + data, err := json.Marshal(payload) + if err != nil { + l("⚠️ Could not marshal webhook payload!", err) + return err + } + + req, err := http.NewRequest("POST", msg.whURL, bytes.NewBuffer(data)) + if err != nil { + l("⚠️ Could not create webhook request!", err) + return err + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Do(req) + if err != nil { + l("⚠️ Could not send webhook!", err) + return err + } + _ = resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + l("⚠️ Webhook returned non-success status:", resp.StatusCode) + return fmt.Errorf("webhook returned status %d for %s", resp.StatusCode, msg.chain) + } + + return nil +} + func getAlarms(chain string) string { alarms.notifyMux.RLock() defer alarms.notifyMux.RUnlock() @@ -422,6 +526,7 @@ func (c *Config) alert(chainName, message, severity string, resolved bool, id *s disc: boolVal(c.DefaultAlertConfig.Discord.Enabled) && boolVal(c.Chains[chainName].Alerts.Discord.Enabled), tg: boolVal(c.DefaultAlertConfig.Telegram.Enabled) && boolVal(c.Chains[chainName].Alerts.Telegram.Enabled), slk: boolVal(c.DefaultAlertConfig.Slack.Enabled) && boolVal(c.Chains[chainName].Alerts.Slack.Enabled), + wh: boolVal(c.DefaultAlertConfig.Webhook.Enabled) && boolVal(c.Chains[chainName].Alerts.Webhook.Enabled), severity: severity, resolved: resolved, chain: fmt.Sprintf("%s (%s)", chainName, c.Chains[chainName].ChainId), @@ -434,6 +539,7 @@ func (c *Config) alert(chainName, message, severity string, resolved bool, id *s discHook: c.Chains[chainName].Alerts.Discord.Webhook, discMentions: strings.Join(c.Chains[chainName].Alerts.Discord.Mentions, " "), slkHook: c.Chains[chainName].Alerts.Slack.Webhook, + whURL: c.Chains[chainName].Alerts.Webhook.URL, alertConfig: &c.Chains[chainName].Alerts, } c.alertChan <- a diff --git a/td2/alert_test.go b/td2/alert_test.go index f79cdca..3f47bd2 100644 --- a/td2/alert_test.go +++ b/td2/alert_test.go @@ -35,6 +35,9 @@ func createTestConfig() *Config { Slack: SlackConfig{ Enabled: &falseBool, }, + Webhook: WebhookConfig{ + Enabled: &falseBool, + }, }, }, }, @@ -51,6 +54,9 @@ func createTestConfig() *Config { Slack: SlackConfig{ Enabled: &falseBool, }, + Webhook: WebhookConfig{ + Enabled: &falseBool, + }, }, alertChan: make(chan *alertMsg, 10), } @@ -174,6 +180,7 @@ func TestShouldNotify(t *testing.T) { SentTgAlarms: make(map[string]alertMsgCache), SentDiAlarms: make(map[string]alertMsgCache), SentSlkAlarms: make(map[string]alertMsgCache), + SentWHAlarms: make(map[string]alertMsgCache), AllAlarms: make(map[string]map[string]alertMsgCache), flappingAlarms: make(map[string]map[string]alertMsgCache), notifyMux: sync.RWMutex{}, @@ -270,6 +277,7 @@ func TestShouldNotify(t *testing.T) { testAlarms.SentTgAlarms = make(map[string]alertMsgCache) testAlarms.SentDiAlarms = make(map[string]alertMsgCache) testAlarms.SentSlkAlarms = make(map[string]alertMsgCache) + testAlarms.SentWHAlarms = make(map[string]alertMsgCache) testAlarms.flappingAlarms = make(map[string]map[string]alertMsgCache) tt.setupAlarms() @@ -452,6 +460,96 @@ func TestNotifySlack(t *testing.T) { } } +func TestNotifyWebhook(t *testing.T) { + tests := []struct { + name string + msg *alertMsg + serverResponse int + expectError bool + }{ + { + name: "successful notification", + msg: &alertMsg{ + wh: true, + chain: "test-chain", + message: "test message", + severity: "critical", + uniqueId: "test_alert_1", + resolved: false, + whURL: "", // will be set to test server URL + alertConfig: &AlertConfig{ + Webhook: WebhookConfig{SeverityThreshold: "info"}, + }, + }, + serverResponse: 200, + expectError: false, + }, + { + name: "server error", + msg: &alertMsg{ + wh: true, + chain: "test-chain", + message: "test message", + severity: "critical", + uniqueId: "test_alert_2", + resolved: false, + whURL: "", // will be set to test server URL + alertConfig: &AlertConfig{ + Webhook: WebhookConfig{SeverityThreshold: "info"}, + }, + }, + serverResponse: 500, + expectError: true, + }, + { + name: "webhook disabled", + msg: &alertMsg{ + wh: false, + }, + expectError: false, + }, + } + + // Setup test alarm cache + testAlarms := &alarmCache{ + SentPdAlarms: make(map[string]alertMsgCache), + SentTgAlarms: make(map[string]alertMsgCache), + SentDiAlarms: make(map[string]alertMsgCache), + SentSlkAlarms: make(map[string]alertMsgCache), + SentWHAlarms: make(map[string]alertMsgCache), + AllAlarms: make(map[string]map[string]alertMsgCache), + flappingAlarms: make(map[string]map[string]alertMsgCache), + notifyMux: sync.RWMutex{}, + } + originalAlarms := alarms + alarms = testAlarms + defer func() { alarms = originalAlarms }() + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Reset alarms for each test + testAlarms.SentWHAlarms = make(map[string]alertMsgCache) + + if tt.msg.wh { + // Create test server + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(tt.serverResponse) + })) + defer server.Close() + tt.msg.whURL = server.URL + } + + err := notifyWebhook(tt.msg) + if tt.expectError && err == nil { + t.Errorf("Expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("Expected no error but got: %v", err) + } + }) + } +} + func TestConfigAlert(t *testing.T) { // Create test config config := &Config{ @@ -477,6 +575,10 @@ func TestConfigAlert(t *testing.T) { Slack: SlackConfig{ Enabled: &[]bool{false}[0], }, + Webhook: WebhookConfig{ + Enabled: &[]bool{true}[0], + URL: "https://test-webhook.example.com", + }, }, }, }, @@ -493,6 +595,9 @@ func TestConfigAlert(t *testing.T) { Slack: SlackConfig{ Enabled: &[]bool{true}[0], }, + Webhook: WebhookConfig{ + Enabled: &[]bool{true}[0], + }, }, } @@ -523,6 +628,12 @@ func TestConfigAlert(t *testing.T) { if alertMsg.slk != false { t.Errorf("Expected slack to be disabled") } + if alertMsg.wh != true { + t.Errorf("Expected webhook to be enabled") + } + if alertMsg.whURL != "https://test-webhook.example.com" { + t.Errorf("Expected webhook URL 'https://test-webhook.example.com', got '%s'", alertMsg.whURL) + } case <-time.After(time.Second): t.Error("Alert was not sent to channel") } diff --git a/td2/run.go b/td2/run.go index 73f2edc..a1dd5fe 100644 --- a/td2/run.go +++ b/td2/run.go @@ -53,6 +53,10 @@ func Run(configFile, stateFile, chainConfigDirectory string, password *string, d if e != nil { l(msg.chain, "error sending alert to slack", e.Error()) } + e = notifyWebhook(msg) + if e != nil { + l(msg.chain, "error sending alert to webhook", e.Error()) + } }(alert) case <-td.ctx.Done(): return diff --git a/td2/types.go b/td2/types.go index ad3da5e..2f72bfd 100644 --- a/td2/types.go +++ b/td2/types.go @@ -307,6 +307,8 @@ type AlertConfig struct { Telegram TeleConfig `yaml:"telegram"` // Slack webhook information Slack SlackConfig `yaml:"slack"` + // Generic webhook information + Webhook WebhookConfig `yaml:"webhook"` } // NodeConfig holds the basic information for a node to connect to. @@ -354,6 +356,14 @@ type SlackConfig struct { SeverityThreshold string `yaml:"severity_threshold"` } +// WebhookConfig holds the information needed to send alerts to a generic webhook endpoint +// The payload follows a Grafana-like format for broad compatibility +type WebhookConfig struct { + Enabled *bool `yaml:"enabled"` + URL string `yaml:"url"` + SeverityThreshold string `yaml:"severity_threshold"` +} + // HealthcheckConfig holds the information needed to send pings to a healthcheck endpoint type HealthcheckConfig struct { Enabled bool `yaml:"enabled"` @@ -597,6 +607,7 @@ func loadConfig(yamlFile, stateFile, chainConfigDirectory string, password *stri SentTgAlarms: make(map[string]alertMsgCache), SentDiAlarms: make(map[string]alertMsgCache), SentSlkAlarms: make(map[string]alertMsgCache), + SentWHAlarms: make(map[string]alertMsgCache), AllAlarms: make(map[string]map[string]alertMsgCache), notifyMux: sync.RWMutex{}, } @@ -640,6 +651,10 @@ func loadConfig(yamlFile, stateFile, chainConfigDirectory string, password *stri alarms.SentSlkAlarms = saved.Alarms.SentSlkAlarms clearStale(alarms.SentSlkAlarms, "Slack", boolVal(c.DefaultAlertConfig.Pagerduty.Enabled), staleHours) } + if saved.Alarms.SentWHAlarms != nil { + alarms.SentWHAlarms = saved.Alarms.SentWHAlarms + clearStale(alarms.SentWHAlarms, "Webhook", boolVal(c.DefaultAlertConfig.Webhook.Enabled), staleHours) + } if saved.Alarms.AllAlarms != nil { alarms.AllAlarms = saved.Alarms.AllAlarms for _, alrm := range saved.Alarms.AllAlarms {