Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docker/prometheus/rules/uptime.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
groups:
- name: UptimePackage
rules:
# - alert: UptimeNoCCVUpdate
# expr: |
# cvms_block_height - on(chain_id) cvms_uptime_last_ccv_update > 5000
# labels:
# severity: critical
# annotations:
# summary: The chain has not received a validator set update in the last 5000 blocks.
# - alert: IncreasingMissCounterOver30%During1h
# expr: |
# # 30mins ago, the validator is unjailed. but got jailed
Expand Down
16 changes: 12 additions & 4 deletions internal/packages/consensus/uptime/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,24 @@ func GetConsumserUptimeStatus(exporter *common.Exporter, chainID string) (types.
return types.CommonUptimeStatus{}, errors.Cause(err)
}
exporter.Debugf("got total consumer validator uptime: %d", len(validatorUptimeStatus))

// 5. get on-chain slashing parameter
signedBlocksWindow, minSignedPerWindow, err := getUptimeParams(consumerClient, exporter.ChainName)
if err != nil {
return types.CommonUptimeStatus{}, errors.Cause(err)
}

// 6. get consumer channel uptime status
consumerUptimeStatus := &types.ConsumerUptimeStatus{}
channelStatus, err := getLastCCVUpdate(consumerClient)
if err != nil {
return types.CommonUptimeStatus{}, errors.Wrap(err, "failed to get CCV channel status")
}
consumerUptimeStatus.LastCCVUpdate = channelStatus

return types.CommonUptimeStatus{
SignedBlocksWindow: signedBlocksWindow,
MinSignedPerWindow: minSignedPerWindow,
Validators: validatorUptimeStatus,
SignedBlocksWindow: signedBlocksWindow,
MinSignedPerWindow: minSignedPerWindow,
Validators: validatorUptimeStatus,
ConsumerUptimeStatus: consumerUptimeStatus,
}, nil
}
46 changes: 46 additions & 0 deletions internal/packages/consensus/uptime/api/uptime.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package api
import (
"context"
"encoding/hex"
"encoding/json"
"fmt"
"net/http"
"net/url"
"sort"
"strconv"
"sync"
Expand Down Expand Up @@ -300,3 +303,46 @@ func sliceStakingValidatorByVP(stakingValidators []commontypes.CosmosStakingVali
})
return stakingValidators[:totalConsensusValidators]
}

func getLastCCVUpdate(c common.CommonClient) (uint64, error) {
ctx, cancel := context.WithTimeout(context.Background(), common.Timeout)
defer cancel()

requester := c.APIClient.R().SetContext(ctx)
queryParams := url.Values{}
queryParams.Add("query", "ccv_packet.valset_update_id>1")
queryParams.Add("order_by", "ORDER_BY_DESC")
queryParams.Add("page", "1")
queryParams.Add("limit", "1")
endpoint := "/cosmos/tx/v1beta1/txs?" + queryParams.Encode()
c.Infof("endpoint: %s", endpoint)
resp, err := requester.Get(endpoint)
if err != nil {
return 0, errors.Cause(err)
}
if resp.StatusCode() != http.StatusOK {
return 0, errors.Errorf("api error: got %d code from %s", resp.StatusCode(), resp.Request.URL)
}

var result struct {
TxResponses []struct {
Height string `json:"height"`
} `json:"tx_responses"`
}

if err := json.Unmarshal(resp.Body(), &result); err != nil {
return 0, errors.Cause(err)
}

if len(result.TxResponses) == 0 {
c.Warnf("No CCV update found in consumer chain")
return 0, nil
}

height, err := strconv.ParseUint(result.TxResponses[0].Height, 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse height from tx (%+v): %w", result, err)
}

return height, nil
}
12 changes: 12 additions & 0 deletions internal/packages/consensus/uptime/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
JailedMetricName = "jailed"
SignedBlocksWindowMetricName = "signed_blocks_window"
MinSignedPerWindowMetricName = "min_signed_per_window"
LastCCVUpdateMetricName = "last_ccv_update"
)

func Start(p common.Packager) error {
Expand Down Expand Up @@ -102,6 +103,12 @@ func loop(exporter *common.Exporter, p common.Packager) {
Name: MinSignedPerWindowMetricName,
ConstLabels: packageLabels,
})
lastCCVUpdateMetric := p.Factory.NewGauge(prometheus.GaugeOpts{
Namespace: common.Namespace,
Subsystem: Subsystem,
Name: LastCCVUpdateMetricName,
ConstLabels: packageLabels,
})

isUnhealth := false
for {
Expand Down Expand Up @@ -189,6 +196,11 @@ func loop(exporter *common.Exporter, p common.Packager) {
signedBlocksWindowMetric.Set(status.SignedBlocksWindow)
minSignedPerWindowMetric.Set(status.MinSignedPerWindow)

// Update consumer metrics
if status.ConsumerUptimeStatus != nil {
lastCCVUpdateMetric.Set(float64(status.ConsumerUptimeStatus.LastCCVUpdate))
}

exporter.Infof("updated metrics successfully and going to sleep %s ...", SubsystemSleep.String())

// update health and ops
Expand Down
12 changes: 9 additions & 3 deletions internal/packages/consensus/uptime/types/types_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,15 @@ var (

// common
type CommonUptimeStatus struct {
MinSignedPerWindow float64 `json:"slash_winodw"`
SignedBlocksWindow float64 `json:"vote_period"`
Validators []ValidatorUptimeStatus `json:"validators"`
MinSignedPerWindow float64 `json:"slash_winodw"`
SignedBlocksWindow float64 `json:"vote_period"`
Validators []ValidatorUptimeStatus `json:"validators"`
ConsumerUptimeStatus *ConsumerUptimeStatus `json:"consumer_uptime_status"`
}

// consumer uptime status
type ConsumerUptimeStatus struct {
LastCCVUpdate uint64 `json:"last_ccv_update"`
}

// cosmos uptime status
Expand Down