Skip to content

Commit fbe5ab7

Browse files
committed
Add basic Prometheus metrics for binlog-collector
The following metrics have been added: - pxc_binlog_collector_success_total - pxc_binlog_collector_failure_total - pxc_binlog_collector_last_processing_timestamp - pxc_binlog_collector_last_upload_timestamp - pxc_binlog_collector_gap_detected_total
1 parent 137e4f9 commit fbe5ab7

File tree

3 files changed

+87
-0
lines changed

3 files changed

+87
-0
lines changed

cmd/pitr/collector/collector.go

+51
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,53 @@ import (
1616

1717
"github.com/go-sql-driver/mysql"
1818
"github.com/pkg/errors"
19+
"github.com/prometheus/client_golang/prometheus"
1920

2021
"github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/pxc"
2122
"github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup/storage"
2223
)
2324

25+
var (
26+
pxcBinlogCollectorBackupSuccess = prometheus.NewCounter(
27+
prometheus.CounterOpts{
28+
Name: "pxc_binlog_collector_success_total",
29+
Help: "Total number of successful binlog backups",
30+
},
31+
)
32+
pxcBinlogCollectorBackupFailure = prometheus.NewCounter(
33+
prometheus.CounterOpts{
34+
Name: "pxc_binlog_collector_failure_total",
35+
Help: "Total number of failed binlog backups",
36+
},
37+
)
38+
pxcBinlogCollectorLastProcessingTime = prometheus.NewGauge(
39+
prometheus.GaugeOpts{
40+
Name: "pxc_binlog_collector_last_processing_timestamp",
41+
Help: "Timestamp of the last successful binlog processing",
42+
},
43+
)
44+
pxcBinlogCollectorLastUploadTime = prometheus.NewGauge(
45+
prometheus.GaugeOpts{
46+
Name: "pxc_binlog_collector_last_upload_timestamp",
47+
Help: "Timestamp of the last successful binlog upload",
48+
},
49+
)
50+
pxcBinlogCollectorGapDetected = prometheus.NewCounter(
51+
prometheus.CounterOpts{
52+
Name: "pxc_binlog_collector_gap_detected_total",
53+
Help: "Total number of times the gap was detected in binlog",
54+
},
55+
)
56+
)
57+
58+
func init() {
59+
prometheus.MustRegister(pxcBinlogCollectorBackupSuccess)
60+
prometheus.MustRegister(pxcBinlogCollectorBackupFailure)
61+
prometheus.MustRegister(pxcBinlogCollectorLastProcessingTime)
62+
prometheus.MustRegister(pxcBinlogCollectorLastUploadTime)
63+
prometheus.MustRegister(pxcBinlogCollectorGapDetected)
64+
}
65+
2466
type Collector struct {
2567
db *pxc.PXC
2668
storage storage.Storage
@@ -103,6 +145,7 @@ func New(ctx context.Context, c Config) (*Collector, error) {
103145
func (c *Collector) Run(ctx context.Context) error {
104146
err := c.newDB(ctx)
105147
if err != nil {
148+
pxcBinlogCollectorBackupFailure.Inc()
106149
return errors.Wrap(err, "new db connection")
107150
}
108151
defer c.close()
@@ -113,9 +156,11 @@ func (c *Collector) Run(ctx context.Context) error {
113156

114157
err = c.CollectBinLogs(ctx)
115158
if err != nil {
159+
pxcBinlogCollectorBackupFailure.Inc()
116160
return errors.Wrap(err, "collect binlog files")
117161
}
118162

163+
pxcBinlogCollectorBackupSuccess.Inc()
119164
return nil
120165
}
121166

@@ -369,6 +414,7 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
369414
if lastUploadedBinlogName == "" {
370415
log.Println("ERROR: Couldn't find the binlog that contains GTID set:", c.lastUploadedSet.Raw())
371416
log.Println("ERROR: Gap detected in the binary logs. Binary logs will be uploaded anyway, but full backup needed for consistent recovery.")
417+
pxcBinlogCollectorGapDetected.Inc()
372418
if err := createGapFile(c.lastUploadedSet); err != nil {
373419
return errors.Wrap(err, "create gap file")
374420
}
@@ -382,6 +428,7 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
382428

383429
if len(list) == 0 {
384430
log.Println("No binlogs to upload")
431+
pxcBinlogCollectorLastProcessingTime.SetToCurrentTime()
385432
return nil
386433
}
387434

@@ -402,6 +449,8 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
402449
return errors.Wrap(err, "manage binlog")
403450
}
404451

452+
pxcBinlogCollectorLastUploadTime.SetToCurrentTime()
453+
405454
lastTs, err := c.db.GetBinLogLastTimestamp(ctx, binlog.Name)
406455
if err != nil {
407456
return errors.Wrap(err, "get last timestamp")
@@ -411,6 +460,8 @@ func (c *Collector) CollectBinLogs(ctx context.Context) error {
411460
return errors.Wrap(err, "update timeline file")
412461
}
413462
}
463+
464+
pxcBinlogCollectorLastProcessingTime.SetToCurrentTime()
414465
return nil
415466
}
416467

cmd/pitr/main.go

+26
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"log"
8+
"net/http"
89
"os"
910
"os/signal"
1011
"syscall"
@@ -14,6 +15,7 @@ import (
1415
"github.com/percona/percona-xtradb-cluster-operator/cmd/pitr/recoverer"
1516

1617
"github.com/caarlos0/env"
18+
"github.com/prometheus/client_golang/prometheus/promhttp"
1719
)
1820

1921
func main() {
@@ -23,6 +25,23 @@ func main() {
2325
}
2426
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, os.Interrupt)
2527
defer stop()
28+
29+
srv := &http.Server{Addr: ":8080"}
30+
go func() {
31+
http.Handle("/metrics", promhttp.Handler())
32+
http.HandleFunc("/health", healthHandler)
33+
if err := srv.ListenAndServe(); err != http.ErrServerClosed {
34+
log.Printf("ERROR: HTTP server error: %v", err)
35+
}
36+
}()
37+
38+
go func() {
39+
<-ctx.Done()
40+
if err := srv.Shutdown(context.Background()); err != nil {
41+
log.Printf("ERROR: HTTP server shutdown: %v", err)
42+
}
43+
}()
44+
2645
switch command {
2746
case "collect":
2847
runCollector(ctx)
@@ -34,6 +53,13 @@ func main() {
3453
}
3554
}
3655

56+
func healthHandler(w http.ResponseWriter, _ *http.Request) {
57+
w.WriteHeader(http.StatusOK)
58+
if _, err := w.Write([]byte("ok")); err != nil {
59+
log.Println("ERROR: writing health response:", err)
60+
}
61+
}
62+
3763
func runCollector(ctx context.Context) {
3864
config, err := getCollectorConfig()
3965
if err != nil {

pkg/pxc/app/deployment/binlog-collector.go

+10
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ func GetBinlogCollectorDeployment(cr *api.PerconaXtraDBCluster, initImage string
9191
},
9292
},
9393
}
94+
95+
if cr.CompareVersionWith("1.17.0") >= 0 {
96+
container.Ports = []corev1.ContainerPort{
97+
{
98+
ContainerPort: 8080,
99+
Name: "metrics",
100+
},
101+
}
102+
}
103+
94104
replicas := int32(1)
95105

96106
var initContainers []corev1.Container

0 commit comments

Comments
 (0)