diff --git a/alertmanager/alerts.go b/alertmanager/alerts.go index 54f5a0f0e..a97703bc6 100644 --- a/alertmanager/alerts.go +++ b/alertmanager/alerts.go @@ -15,15 +15,18 @@ import ( "golang.org/x/xerrors" "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-jsonrpc" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/go-state-types/big" + "github.com/filecoin-project/curio/api" "github.com/filecoin-project/curio/build" "github.com/filecoin-project/curio/deps/config" "github.com/filecoin-project/curio/harmony/harmonydb" "github.com/filecoin-project/lotus/chain/actors/builtin/miner" "github.com/filecoin-project/lotus/chain/types" + cliutil "github.com/filecoin-project/lotus/cli/util" ) type AlertNow struct { @@ -612,3 +615,97 @@ func wnPostCheck(al *alerts) { } } } + +func chainSyncCheck(al *alerts) { + Name := "ChainSync" + al.alertMap[Name] = &alertOut{} + + type minimalApiInfo struct { + Apis struct { + ChainApiInfo []string + } + } + + rpcInfos := map[string]minimalApiInfo{} // config name -> api info + confNameToAddr := map[string]string{} // config name -> api address + + // Get all config from DB + rows, err := al.db.Query(al.ctx, `SELECT title, config FROM harmony_config`) + if err != nil { + al.alertMap[Name].err = xerrors.Errorf("getting db configs: %w", err) + return + } + + configs := make(map[string]string) + for rows.Next() { + var title, cfg string + if err := rows.Scan(&title, &cfg); err != nil { + al.alertMap[Name].err = xerrors.Errorf("scanning db configs: %w", err) + return + } + configs[title] = cfg + } + + // Parse all configs minimal to get API + for name, tomlStr := range configs { + var info minimalApiInfo + if err := toml.Unmarshal([]byte(tomlStr), &info); err != nil { + al.alertMap[Name].err = xerrors.Errorf("unmarshaling %s config: %w", name, err) + continue + } + + if len(info.Apis.ChainApiInfo) == 0 { + continue + } + + rpcInfos[name] = info + + for _, addr := range info.Apis.ChainApiInfo { + ai := cliutil.ParseApiInfo(addr) + confNameToAddr[name] = ai.Addr + } + } + + dedup := map[string]bool{} // for dedup by address + + // For each unique API (chain), check if in sync + for _, info := range rpcInfos { + ai := cliutil.ParseApiInfo(info.Apis.ChainApiInfo[0]) + if dedup[ai.Addr] { + continue + } + dedup[ai.Addr] = true + + addr, err := ai.DialArgs("v1") + if err != nil { + al.alertMap[Name].err = xerrors.Errorf("could not get DialArgs: %w", err) + continue + } + + var res api.ChainStruct + closer, err := jsonrpc.NewMergeClient(al.ctx, addr, "Filecoin", + api.GetInternalStructs(&res), ai.AuthHeader(), []jsonrpc.Option{jsonrpc.WithErrors(jsonrpc.NewErrors())}...) + if err != nil { + al.alertMap[Name].err = xerrors.Errorf("error creating jsonrpc client: %v", err) + continue + } + defer closer() + + full := &res + + head, err := full.ChainHead(al.ctx) + if err != nil { + al.alertMap[Name].err = xerrors.Errorf("ChainHead: %w", err) + continue + } + + switch { + case time.Now().Unix()-int64(head.MinTimestamp()) < int64(build.BlockDelaySecs*3/2): // within 1.5 epochs + continue + case time.Now().Unix()-int64(head.MinTimestamp()) < int64(build.BlockDelaySecs*5): // within 5 epochs + log.Debugf("Chain Sync status: %s: slow (%s behind)", addr, time.Since(time.Unix(int64(head.MinTimestamp()), 0)).Truncate(time.Second)) + default: + al.alertMap[Name].alertString += fmt.Sprintf("behind (%s behind)", time.Since(time.Unix(int64(head.MinTimestamp()), 0)).Truncate(time.Second)) + } + } +} diff --git a/alertmanager/task_alert.go b/alertmanager/task_alert.go index 9042ef2cb..474f57140 100644 --- a/alertmanager/task_alert.go +++ b/alertmanager/task_alert.go @@ -69,6 +69,7 @@ var AlertFuncs = []AlertFunc{ wdPostCheck, wnPostCheck, NowCheck, + chainSyncCheck, } func NewAlertTask(