From 527da5c8697f0700572c5d31336c7fde9a2711c0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 10 Oct 2024 16:22:40 +0100 Subject: [PATCH 1/4] Fix concurrent map write panic in monitoring middleware --- .../middleware/monitoring_middleware.go | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/internal/beater/middleware/monitoring_middleware.go b/internal/beater/middleware/monitoring_middleware.go index 67c0125c285..26855264ef4 100644 --- a/internal/beater/middleware/monitoring_middleware.go +++ b/internal/beater/middleware/monitoring_middleware.go @@ -20,6 +20,7 @@ package middleware import ( "context" "net/http" + "sync" "time" "go.opentelemetry.io/otel" @@ -36,9 +37,13 @@ const ( type monitoringMiddleware struct { meter metric.Meter - ints map[request.ResultID]*monitoring.Int - counters map[string]metric.Int64Counter - histograms map[string]metric.Int64Histogram + ints map[request.ResultID]*monitoring.Int + + counters map[string]metric.Int64Counter + countersRWMutex sync.RWMutex + + histograms map[string]metric.Int64Histogram + histogramsRWMutex sync.RWMutex } func (m *monitoringMiddleware) Middleware() Middleware { @@ -79,10 +84,19 @@ func (m *monitoringMiddleware) inc(id request.ResultID) { func (m *monitoringMiddleware) getCounter(n string) metric.Int64Counter { name := "http.server." + n + + m.countersRWMutex.RLock() if met, ok := m.counters[name]; ok { + m.countersRWMutex.RUnlock() return met } + m.countersRWMutex.RUnlock() + m.countersRWMutex.Lock() + defer m.countersRWMutex.Unlock() + if met, ok := m.counters[name]; ok { + return met + } nm, _ := m.meter.Int64Counter(name) m.counters[name] = nm return nm @@ -90,10 +104,20 @@ func (m *monitoringMiddleware) getCounter(n string) metric.Int64Counter { func (m *monitoringMiddleware) getHistogram(n string, opts ...metric.Int64HistogramOption) metric.Int64Histogram { name := "http.server." + n + + m.histogramsRWMutex.RLock() if met, ok := m.histograms[name]; ok { + m.histogramsRWMutex.RUnlock() return met } + m.histogramsRWMutex.RUnlock() + m.histogramsRWMutex.Lock() + defer m.histogramsRWMutex.Unlock() + + if met, ok := m.histograms[name]; ok { + return met + } nm, _ := m.meter.Int64Histogram(name, opts...) m.histograms[name] = nm return nm From 26496c15d33615a88f657106f620e327241e1ee0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 10 Oct 2024 16:30:30 +0100 Subject: [PATCH 2/4] Add changelog --- changelogs/head.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/changelogs/head.asciidoc b/changelogs/head.asciidoc index ab1a5e60bcd..d1ce40a63ee 100644 --- a/changelogs/head.asciidoc +++ b/changelogs/head.asciidoc @@ -7,6 +7,7 @@ https://github.com/elastic/apm-server/compare/8.15\...main[View commits] ==== Bug fixes - Track all bulk request response status codes {pull}13574[13574] +- Fix a concurrent map write panic in monitoring middleware {pull}14335[14335] [float] ==== Breaking Changes From 06ff323c78279a5f16d35af9820faf1f7d4c3388 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 10 Oct 2024 16:43:08 +0100 Subject: [PATCH 3/4] Revert "Fix concurrent map write panic in monitoring middleware" This reverts commit 527da5c8697f0700572c5d31336c7fde9a2711c0. --- .../middleware/monitoring_middleware.go | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/internal/beater/middleware/monitoring_middleware.go b/internal/beater/middleware/monitoring_middleware.go index 26855264ef4..67c0125c285 100644 --- a/internal/beater/middleware/monitoring_middleware.go +++ b/internal/beater/middleware/monitoring_middleware.go @@ -20,7 +20,6 @@ package middleware import ( "context" "net/http" - "sync" "time" "go.opentelemetry.io/otel" @@ -37,13 +36,9 @@ const ( type monitoringMiddleware struct { meter metric.Meter - ints map[request.ResultID]*monitoring.Int - - counters map[string]metric.Int64Counter - countersRWMutex sync.RWMutex - - histograms map[string]metric.Int64Histogram - histogramsRWMutex sync.RWMutex + ints map[request.ResultID]*monitoring.Int + counters map[string]metric.Int64Counter + histograms map[string]metric.Int64Histogram } func (m *monitoringMiddleware) Middleware() Middleware { @@ -84,19 +79,10 @@ func (m *monitoringMiddleware) inc(id request.ResultID) { func (m *monitoringMiddleware) getCounter(n string) metric.Int64Counter { name := "http.server." + n - - m.countersRWMutex.RLock() if met, ok := m.counters[name]; ok { - m.countersRWMutex.RUnlock() return met } - m.countersRWMutex.RUnlock() - m.countersRWMutex.Lock() - defer m.countersRWMutex.Unlock() - if met, ok := m.counters[name]; ok { - return met - } nm, _ := m.meter.Int64Counter(name) m.counters[name] = nm return nm @@ -104,20 +90,10 @@ func (m *monitoringMiddleware) getCounter(n string) metric.Int64Counter { func (m *monitoringMiddleware) getHistogram(n string, opts ...metric.Int64HistogramOption) metric.Int64Histogram { name := "http.server." + n - - m.histogramsRWMutex.RLock() if met, ok := m.histograms[name]; ok { - m.histogramsRWMutex.RUnlock() return met } - m.histogramsRWMutex.RUnlock() - m.histogramsRWMutex.Lock() - defer m.histogramsRWMutex.Unlock() - - if met, ok := m.histograms[name]; ok { - return met - } nm, _ := m.meter.Int64Histogram(name, opts...) m.histograms[name] = nm return nm From 00bbe477203584dd60ae9614f65f4f845748bbdf Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Thu, 10 Oct 2024 16:59:17 +0100 Subject: [PATCH 4/4] Use sync.Map instead --- .../middleware/monitoring_middleware.go | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/internal/beater/middleware/monitoring_middleware.go b/internal/beater/middleware/monitoring_middleware.go index 67c0125c285..e34b4b52ebc 100644 --- a/internal/beater/middleware/monitoring_middleware.go +++ b/internal/beater/middleware/monitoring_middleware.go @@ -20,6 +20,7 @@ package middleware import ( "context" "net/http" + "sync" "time" "go.opentelemetry.io/otel" @@ -37,8 +38,8 @@ type monitoringMiddleware struct { meter metric.Meter ints map[request.ResultID]*monitoring.Int - counters map[string]metric.Int64Counter - histograms map[string]metric.Int64Histogram + counters sync.Map + histograms sync.Map } func (m *monitoringMiddleware) Middleware() Middleware { @@ -79,23 +80,23 @@ func (m *monitoringMiddleware) inc(id request.ResultID) { func (m *monitoringMiddleware) getCounter(n string) metric.Int64Counter { name := "http.server." + n - if met, ok := m.counters[name]; ok { - return met + if met, ok := m.counters.Load(name); ok { + return met.(metric.Int64Counter) } nm, _ := m.meter.Int64Counter(name) - m.counters[name] = nm + m.counters.LoadOrStore(name, nm) return nm } func (m *monitoringMiddleware) getHistogram(n string, opts ...metric.Int64HistogramOption) metric.Int64Histogram { name := "http.server." + n - if met, ok := m.histograms[name]; ok { - return met + if met, ok := m.histograms.Load(name); ok { + return met.(metric.Int64Histogram) } nm, _ := m.meter.Int64Histogram(name, opts...) - m.histograms[name] = nm + m.histograms.LoadOrStore(name, nm) return nm } @@ -109,8 +110,8 @@ func MonitoringMiddleware(m map[request.ResultID]*monitoring.Int, mp metric.Mete mid := &monitoringMiddleware{ meter: mp.Meter("internal/beater/middleware"), ints: m, - counters: map[string]metric.Int64Counter{}, - histograms: map[string]metric.Int64Histogram{}, + counters: sync.Map{}, + histograms: sync.Map{}, } return mid.Middleware()