diff --git a/cmd/exporters/prometheus/cache.go b/cmd/exporters/prometheus/cache.go
index ae73864c8..bccceb91a 100644
--- a/cmd/exporters/prometheus/cache.go
+++ b/cmd/exporters/prometheus/cache.go
@@ -5,6 +5,7 @@
package prometheus
import (
+ "github.com/netapp/harvest/v2/pkg/set"
"sync"
"time"
)
@@ -28,7 +29,7 @@ func (c *cache) Get() map[string][][]byte {
return c.data
}
-func (c *cache) Put(key string, data [][]byte) {
+func (c *cache) Put(key string, data [][]byte, _ *set.Set) {
c.data[key] = data
c.timers[key] = time.Now()
}
diff --git a/cmd/exporters/prometheus/disk_cache.go b/cmd/exporters/prometheus/disk_cache.go
new file mode 100644
index 000000000..b08b9a849
--- /dev/null
+++ b/cmd/exporters/prometheus/disk_cache.go
@@ -0,0 +1,340 @@
+package prometheus
+
+import (
+ "bufio"
+ "context"
+ "github.com/netapp/harvest/v2/pkg/set"
+ "github.com/netapp/harvest/v2/pkg/slogx"
+ "io"
+ "log/slog"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+ "time"
+)
+
+// CacheStats holds statistics about cached metrics
+type CacheStats struct {
+ NumCollectors int
+ NumObjects int
+ NumMetrics int
+ UniqueData map[string]map[string][]string
+}
+
+type diskCache struct {
+ *sync.Mutex
+ files map[string]string // key -> filepath
+ timers map[string]time.Time // key -> timestamp
+ metricNames map[string]*set.Set // key -> metric names
+ metricCounts map[string]int // key -> number of metric lines
+ expire time.Duration
+ baseDir string
+ logger *slog.Logger
+ ctx context.Context
+ cancel context.CancelFunc
+ writerPool *sync.Pool
+ readerPool *sync.Pool
+ keyReplacer *strings.Replacer
+}
+
+func newDiskCache(d time.Duration, baseDir string, logger *slog.Logger) *diskCache {
+ if d <= 0 {
+ logger.Warn("invalid expire duration, using default 5 minutes", slog.Duration("provided", d))
+ d = 5 * time.Minute
+ }
+ if baseDir == "" {
+ logger.Warn("empty base directory provided")
+ return nil
+ }
+
+ _ = os.RemoveAll(baseDir)
+ if err := os.MkdirAll(baseDir, 0750); err != nil {
+ logger.Warn("failed to create cache directory", slogx.Err(err), slog.String("dir", baseDir))
+ return nil
+ }
+
+ ctx, cancel := context.WithCancel(context.Background())
+ dc := &diskCache{
+ Mutex: &sync.Mutex{},
+ files: make(map[string]string),
+ timers: make(map[string]time.Time),
+ metricNames: make(map[string]*set.Set),
+ metricCounts: make(map[string]int),
+ expire: d,
+ baseDir: baseDir,
+ logger: logger,
+ ctx: ctx,
+ cancel: cancel,
+ writerPool: &sync.Pool{
+ New: func() any {
+ return bufio.NewWriterSize(nil, 64*1024)
+ },
+ },
+ readerPool: &sync.Pool{
+ New: func() any {
+ return bufio.NewReaderSize(nil, 64*1024)
+ },
+ },
+ keyReplacer: strings.NewReplacer("/", "_", "\\", "_", ":", "_"),
+ }
+
+ go dc.cleanup()
+ return dc
+}
+
+// GetStats returns cache statistics.
+func (dc *diskCache) GetStats() (*CacheStats, error) {
+ stats := &CacheStats{
+ UniqueData: make(map[string]map[string][]string),
+ }
+
+ seenCollectors := make(map[string]struct{})
+ seenObjects := make(map[string]struct{})
+
+ for key := range dc.files {
+ if dc.isExpired(key) {
+ continue
+ }
+
+ parts := strings.Split(key, ".")
+ if len(parts) < 2 {
+ continue
+ }
+
+ collector := parts[0]
+ object := parts[1]
+
+ if strings.HasPrefix(object, "metadata_") {
+ continue
+ }
+
+ metricNames, exists := dc.metricNames[key]
+ if !exists || metricNames == nil || metricNames.Size() == 0 {
+ continue
+ }
+
+ stats.NumMetrics += metricNames.Size()
+
+ if _, exists := stats.UniqueData[collector]; !exists {
+ stats.UniqueData[collector] = make(map[string][]string)
+ seenCollectors[collector] = struct{}{}
+ }
+
+ objectKey := collector + "." + object
+ if _, exists := stats.UniqueData[collector][object]; !exists {
+ seenObjects[objectKey] = struct{}{}
+ }
+
+ stats.UniqueData[collector][object] = metricNames.Values()
+ }
+
+ stats.NumCollectors = len(seenCollectors)
+ stats.NumObjects = len(seenObjects)
+
+ return stats, nil
+}
+
+// GetMetricCount returns the total number of cached metrics.
+func (dc *diskCache) GetMetricCount() int {
+ count := 0
+ for key := range dc.files {
+ if dc.isExpired(key) {
+ continue
+ }
+ if metricCount, exists := dc.metricCounts[key]; exists {
+ count += metricCount
+ }
+ }
+ return count
+}
+
+// Put stores metrics to disk and updates cache metadata.
+func (dc *diskCache) Put(key string, data [][]byte, metricNames *set.Set) {
+ filePath := dc.generateFilepath(key)
+
+ if err := dc.writeToDisk(filePath, data); err != nil {
+ dc.logger.Warn("failed to write cache file",
+ slogx.Err(err),
+ slog.String("key", key),
+ slog.String("file", filePath))
+ return
+ }
+
+ dc.files[key] = filePath
+ dc.timers[key] = time.Now()
+ if metricNames != nil && metricNames.Size() > 0 {
+ dc.metricNames[key] = metricNames
+ } else {
+ dc.metricNames[key] = nil
+ }
+ dc.metricCounts[key] = len(data)
+
+ dc.logger.Debug("cached metrics to disk",
+ slog.String("key", key),
+ slog.String("file", filePath),
+ slog.Int("metrics_count", len(data)))
+}
+
+// StreamToWriter streams all non-expired cache files to the writer.
+func (dc *diskCache) StreamToWriter(w io.Writer) error {
+ var resultErr error
+ errorCount := 0
+ totalCount := 0
+
+ for key, path := range dc.files {
+ if dc.isExpired(key) {
+ continue
+ }
+ totalCount++
+
+ if err := dc.streamFile(path, w); err != nil {
+ errorCount++
+ if resultErr == nil {
+ resultErr = err
+ }
+ dc.logger.Debug("failed to stream cache file",
+ slogx.Err(err), slog.String("file", path))
+ }
+ }
+
+ if resultErr != nil {
+ dc.logger.Warn("failed to stream some cache files",
+ slog.Int("failed_count", errorCount),
+ slog.Int("total_count", totalCount))
+ }
+ return resultErr
+}
+
+func (dc *diskCache) openFile(filePath string) (*os.File, error) {
+ file, err := os.Open(filePath)
+ if os.IsNotExist(err) {
+ return nil, nil
+ }
+ return file, err
+}
+
+func (dc *diskCache) closeFile(file *os.File) {
+ if err := file.Close(); err != nil {
+ dc.logger.Debug("failed to close file", slogx.Err(err))
+ }
+}
+
+func (dc *diskCache) streamFile(filePath string, w io.Writer) error {
+ file, err := dc.openFile(filePath)
+ if err != nil {
+ return err
+ }
+ if file == nil {
+ dc.logger.Debug("file is nil", slog.String("filePath", filePath))
+ return nil
+ }
+ defer dc.closeFile(file)
+
+ reader := dc.readerPool.Get().(*bufio.Reader)
+ reader.Reset(file)
+ defer dc.readerPool.Put(reader)
+
+ _, err = io.Copy(w, reader)
+ return err
+}
+
+func (dc *diskCache) Clean() {
+ dc.Lock()
+ defer dc.Unlock()
+
+ for key, timestamp := range dc.timers {
+ if time.Since(timestamp) <= dc.expire {
+ continue
+ }
+ filePath := dc.files[key]
+
+ delete(dc.files, key)
+ delete(dc.timers, key)
+ delete(dc.metricNames, key)
+ delete(dc.metricCounts, key)
+
+ if err := os.Remove(filePath); err != nil && !os.IsNotExist(err) {
+ dc.logger.Debug("failed to remove expired cache file",
+ slogx.Err(err),
+ slog.String("file", filePath))
+ }
+
+ dc.logger.Debug("expired cache entry", slog.String("key", key))
+ }
+
+ entries, err := os.ReadDir(dc.baseDir)
+ if err != nil {
+ dc.logger.Debug("failed to read cache directory", slogx.Err(err), slog.String("baseDir", dc.baseDir))
+ return
+ }
+
+ knownFiles := make(map[string]struct{}, len(dc.files))
+ for _, path := range dc.files {
+ knownFiles[path] = struct{}{}
+ }
+
+ for _, entry := range entries {
+ fullPath := filepath.Join(dc.baseDir, entry.Name())
+
+ if _, found := knownFiles[fullPath]; !found {
+ _ = os.Remove(fullPath)
+ }
+ }
+}
+
+func (dc *diskCache) generateFilepath(key string) string {
+ safeKey := dc.keyReplacer.Replace(key)
+ return filepath.Join(dc.baseDir, safeKey+".metrics")
+}
+
+func (dc *diskCache) writeToDisk(filePath string, data [][]byte) error {
+ file, err := os.Create(filePath)
+ if err != nil {
+ return err
+ }
+ defer dc.closeFile(file)
+
+ writer := dc.writerPool.Get().(*bufio.Writer)
+ writer.Reset(file)
+ defer dc.writerPool.Put(writer)
+
+ for _, line := range data {
+ if _, err := writer.Write(line); err != nil {
+ return err
+ }
+ if err := writer.WriteByte('\n'); err != nil {
+ return err
+ }
+ }
+
+ return writer.Flush()
+}
+
+// isExpired checks if a key is expired.
+func (dc *diskCache) isExpired(key string) bool {
+ if timer, exists := dc.timers[key]; exists {
+ return time.Since(timer) >= dc.expire
+ }
+ return true
+}
+
+func (dc *diskCache) cleanup() {
+ ticker := time.NewTicker(dc.expire / 2) // Clean twice per expiry period
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-dc.ctx.Done():
+ return
+ case <-ticker.C:
+ dc.Clean()
+ }
+ }
+}
+
+func (dc *diskCache) Shutdown() {
+ if dc.cancel != nil {
+ dc.cancel()
+ }
+}
diff --git a/cmd/exporters/prometheus/httpd.go b/cmd/exporters/prometheus/httpd.go
index 02f4a0ce8..4b6e62e6e 100644
--- a/cmd/exporters/prometheus/httpd.go
+++ b/cmd/exporters/prometheus/httpd.go
@@ -10,8 +10,6 @@ import (
"bytes"
"errors"
"fmt"
- "github.com/netapp/harvest/v2/pkg/set"
- "github.com/netapp/harvest/v2/pkg/slogx"
"io"
"log/slog"
"net"
@@ -22,6 +20,9 @@ import (
"strconv"
"strings"
"time"
+
+ "github.com/netapp/harvest/v2/pkg/set"
+ "github.com/netapp/harvest/v2/pkg/slogx"
)
func (p *Prometheus) startHTTPD(addr string, port int) {
@@ -138,14 +139,25 @@ func (p *Prometheus) ServeMetrics(w http.ResponseWriter, r *http.Request) {
return
}
- p.cache.Lock()
- tagsSeen := make(map[string]struct{})
-
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
w.Header().Set("X-Content-Type-Options", "nosniff")
- for _, metrics := range p.cache.Get() {
- count += p.writeMetrics(w, metrics, tagsSeen)
+ tagsSeen := make(map[string]struct{})
+
+ if p.useDiskCache {
+ p.diskCache.Lock()
+ count = p.diskCache.GetMetricCount()
+ err := p.diskCache.StreamToWriter(w)
+ p.diskCache.Unlock()
+ if err != nil {
+ p.Logger.Error("failed to stream metrics from disk cache", slogx.Err(err))
+ }
+ } else {
+ p.memoryCache.Lock()
+ for _, metrics := range p.memoryCache.Get() {
+ count += p.writeMetrics(w, metrics, tagsSeen)
+ }
+ p.memoryCache.Unlock()
}
// serve our own metadata
@@ -153,8 +165,6 @@ func (p *Prometheus) ServeMetrics(w http.ResponseWriter, r *http.Request) {
md, _ := p.render(p.Metadata)
count += p.writeMetrics(w, md, tagsSeen)
- p.cache.Unlock()
-
// update metadata
p.Metadata.Reset()
err := p.Metadata.LazySetValueInt64("time", "http", time.Since(start).Microseconds())
@@ -250,45 +260,63 @@ func (p *Prometheus) ServeInfo(w http.ResponseWriter, r *http.Request) {
uniqueData := map[string]map[string][]string{}
- // copy cache so we don't lock it
- p.cache.Lock()
- cache := make(map[string][][]byte)
- for key, data := range p.cache.Get() {
- cache[key] = make([][]byte, len(data))
- copy(cache[key], data)
- }
- p.cache.Unlock()
+ if p.useDiskCache {
+ p.diskCache.Lock()
+ stats, err := p.diskCache.GetStats()
+ p.diskCache.Unlock()
+ if err != nil {
+ p.Logger.Error("failed to get cache statistics", slogx.Err(err))
+ http.Error(w, "Failed to collect cache statistics", http.StatusInternalServerError)
+ return
+ }
- p.Logger.Debug("fetching cached elements", slog.Int("count", len(cache)))
+ numCollectors = stats.NumCollectors
+ numObjects = stats.NumObjects
+ numMetrics = stats.NumMetrics
+ uniqueData = stats.UniqueData
+ } else {
+ p.memoryCache.Lock()
+ cacheData := make(map[string][][]byte)
+ for key, data := range p.memoryCache.Get() {
+ cacheData[key] = make([][]byte, len(data))
+ copy(cacheData[key], data)
+ }
+ p.memoryCache.Unlock()
- for key, data := range cache {
- var collector, object string
+ p.Logger.Debug("fetching cached elements", slog.Int("count", len(cacheData)))
- if keys := strings.Split(key, "."); len(keys) == 3 {
- collector = keys[0]
- object = keys[1]
- } else {
- continue
- }
+ for key, data := range cacheData {
+ var collector, object string
- // skip metadata
- if strings.HasPrefix(object, "metadata_") {
- continue
- }
+ if keys := strings.Split(key, "."); len(keys) == 3 {
+ collector = keys[0]
+ object = keys[1]
+ } else {
+ continue
+ }
- metricNames := set.New()
- for _, m := range data {
- if x := strings.Split(string(m), "{"); len(x) >= 2 && x[0] != "" {
- metricNames.Add(x[0])
+ // skip metadata
+ if strings.HasPrefix(object, "metadata_") {
+ continue
}
- }
- numMetrics += metricNames.Size()
- if _, exists := uniqueData[collector]; !exists {
- uniqueData[collector] = make(map[string][]string)
- }
- uniqueData[collector][object] = metricNames.Values()
+ metricNames := set.New()
+ for _, m := range data {
+ if x := strings.Split(string(m), "{"); len(x) >= 2 && x[0] != "" {
+ metricNames.Add(x[0])
+ }
+ }
+ numMetrics += metricNames.Size()
+ if _, exists := uniqueData[collector]; !exists {
+ uniqueData[collector] = make(map[string][]string)
+ numCollectors++
+ }
+ if _, exists := uniqueData[collector][object]; !exists {
+ numObjects++
+ }
+ uniqueData[collector][object] = metricNames.Values()
+ }
}
for col, perObject := range uniqueData {
@@ -301,11 +329,9 @@ func (p *Prometheus) ServeInfo(w http.ResponseWriter, r *http.Request) {
}
}
objects = append(objects, fmt.Sprintf(objectTemplate, obj, strings.Join(metrics, "\n")))
- numObjects++
}
body = append(body, fmt.Sprintf(collectorTemplate, col, strings.Join(objects, "\n")))
- numCollectors++
}
poller := p.Options.Poller
diff --git a/cmd/exporters/prometheus/prometheus.go b/cmd/exporters/prometheus/prometheus.go
index 9cc69f57f..73fa275fa 100644
--- a/cmd/exporters/prometheus/prometheus.go
+++ b/cmd/exporters/prometheus/prometheus.go
@@ -30,6 +30,7 @@ import (
"github.com/netapp/harvest/v2/pkg/set"
"github.com/netapp/harvest/v2/pkg/slogx"
"log/slog"
+ "path/filepath"
"regexp"
"slices"
"sort"
@@ -48,7 +49,8 @@ const (
type Prometheus struct {
*exporter.AbstractExporter
- cache *cache
+ memoryCache *cache
+ diskCache *diskCache
allowAddrs []string
allowAddrsRegex []*regexp.Regexp
cacheAddrs map[string]bool
@@ -56,12 +58,39 @@ type Prometheus struct {
addMetaTags bool
globalPrefix string
replacer *strings.Replacer
+ useDiskCache bool
}
func New(abc *exporter.AbstractExporter) exporter.Exporter {
return &Prometheus{AbstractExporter: abc}
}
+func (p *Prometheus) createCache(d time.Duration) {
+ if p.useDiskCache {
+ // Path is mandatory when disk cache is enabled
+ if p.Params.DiskCache == nil || p.Params.DiskCache.Path == "" {
+ p.Logger.Error("disk cache enabled but path is not specified")
+ return
+ }
+
+ cacheDir := p.Params.DiskCache.Path
+
+ // Include poller name in cache directory to avoid collisions between multiple pollers
+ if p.Options.Poller != "" {
+ cacheDir = filepath.Join(cacheDir, p.Options.Poller)
+ }
+
+ p.diskCache = newDiskCache(d, cacheDir, p.Logger)
+
+ if p.diskCache != nil {
+ p.Logger.Debug("disk cache configured",
+ slog.String("cacheDir", cacheDir))
+ }
+ } else {
+ p.memoryCache = newCache(d)
+ }
+}
+
func (p *Prometheus) Init() error {
if err := p.InitAbc(); err != nil {
@@ -99,25 +128,39 @@ func (p *Prometheus) Init() error {
p.addMetaTags = true
}
+ // Check if disk cache is enabled (path is mandatory)
+ if p.Params.DiskCache != nil && p.Params.DiskCache.Path != "" {
+ p.useDiskCache = true
+ p.Logger.Debug("disk cache enabled - will use disk-based caching for RSS optimization",
+ slog.String("path", p.Params.DiskCache.Path))
+ } else {
+ p.useDiskCache = false
+ p.Logger.Debug("disk cache disabled - using memory-based caching")
+ }
+
// all other parameters are only relevant to the HTTP daemon
if x := p.Params.CacheMaxKeep; x != nil {
if d, err := time.ParseDuration(*x); err == nil {
p.Logger.Debug("using custom cache_max_keep", slog.String("cacheMaxKeep", *x))
- p.cache = newCache(d)
+ p.createCache(d)
} else {
p.Logger.Error("cache_max_keep", slogx.Err(err), slog.String("x", *x))
}
}
- if p.cache == nil {
+ if p.memoryCache == nil && p.diskCache == nil {
p.Logger.Debug("using default cache_max_keep", slog.String("cacheMaxKeep", cacheMaxKeep))
if d, err := time.ParseDuration(cacheMaxKeep); err == nil {
- p.cache = newCache(d)
+ p.createCache(d)
} else {
return err
}
}
+ if p.memoryCache == nil && p.diskCache == nil {
+ return errs.New(errs.ErrInvalidParam, "cache initialization failed")
+ }
+
// allow access to metrics only from the given plain addresses
if x := p.Params.AllowedAddrs; x != nil {
p.allowAddrs = *x
@@ -223,13 +266,34 @@ func (p *Prometheus) Export(data *matrix.Matrix) (exporter.Stats, error) {
// fix render time for metadata
d := time.Since(start)
+ // Extract metric names from matrix for cache statistics
+ var prefix string
+ if data.Object == "" {
+ prefix = strings.TrimSuffix(p.globalPrefix, "_")
+ } else {
+ prefix = p.globalPrefix + data.Object
+ }
+
+ metricNames := set.New()
+ for _, metric := range data.GetMetrics() {
+ if metric.IsExportable() {
+ metricNames.Add(prefix + "_" + metric.GetName())
+ }
+ }
+
// store metrics in cache
key := data.UUID + "." + data.Object + "." + data.Identifier
// lock cache, to prevent HTTPd reading while we are mutating it
- p.cache.Lock()
- p.cache.Put(key, metrics)
- p.cache.Unlock()
+ if p.useDiskCache {
+ p.diskCache.Lock()
+ p.diskCache.Put(key, metrics, metricNames)
+ p.diskCache.Unlock()
+ } else {
+ p.memoryCache.Lock()
+ p.memoryCache.Put(key, metrics, metricNames)
+ p.memoryCache.Unlock()
+ }
// update metadata
p.AddExportCount(uint64(len(metrics)))
@@ -506,7 +570,21 @@ func (p *Prometheus) render(data *matrix.Matrix) ([][]byte, exporter.Stats) {
if p.Params.SortLabels {
sort.Strings(metricLabels)
}
- x := prefix + "_" + metric.GetName() + "{" + joinedKeys + "," + strings.Join(metricLabels, ",") + "} " + value
+
+ buf.Reset()
+ buf.WriteString(prefix)
+ buf.WriteString("_")
+ buf.WriteString(metric.GetName())
+ buf.WriteString("{")
+ buf.WriteString(joinedKeys)
+ buf.WriteString(",")
+ buf.WriteString(strings.Join(metricLabels, ","))
+ buf.WriteString("} ")
+ buf.WriteString(value)
+
+ xbr := buf.Bytes()
+ metricLine := make([]byte, len(xbr))
+ copy(metricLine, xbr)
prefixedName := prefix + "_" + metric.GetName()
if tagged != nil && !tagged.Has(prefixedName) {
@@ -517,8 +595,8 @@ func (p *Prometheus) render(data *matrix.Matrix) ([][]byte, exporter.Stats) {
renderedBytes += uint64(len(help)) + uint64(len(typeT))
}
- rendered = append(rendered, []byte(x))
- renderedBytes += uint64(len(x))
+ rendered = append(rendered, metricLine)
+ renderedBytes += uint64(len(metricLine))
// scalar metric
} else {
buf.Reset()
diff --git a/cmd/exporters/prometheus/prometheus_test.go b/cmd/exporters/prometheus/prometheus_test.go
index 16d268984..a38dcb244 100644
--- a/cmd/exporters/prometheus/prometheus_test.go
+++ b/cmd/exporters/prometheus/prometheus_test.go
@@ -5,15 +5,16 @@
package prometheus
import (
+ "slices"
+ "strings"
+ "testing"
+
"github.com/google/go-cmp/cmp"
"github.com/netapp/harvest/v2/assert"
"github.com/netapp/harvest/v2/cmd/poller/exporter"
"github.com/netapp/harvest/v2/cmd/poller/options"
"github.com/netapp/harvest/v2/pkg/conf"
"github.com/netapp/harvest/v2/pkg/matrix"
- "slices"
- "strings"
- "testing"
)
func TestFilterMetaTags(t *testing.T) {
@@ -147,7 +148,8 @@ net_app_bike_max_speed{} 3`, "bike"},
prom := p.(*Prometheus)
var lines []string
- for _, metrics := range prom.cache.Get() {
+
+ for _, metrics := range prom.memoryCache.Get() {
for _, metric := range metrics {
lines = append(lines, string(metric))
}
@@ -184,7 +186,8 @@ netapp_change_log{category="metric",cluster="umeng-aff300-01-02",object="volume"
prom := p.(*Prometheus)
var lines []string
- for _, metrics := range prom.cache.Get() {
+
+ for _, metrics := range prom.memoryCache.Get() {
for _, metric := range metrics {
lines = append(lines, string(metric))
}
@@ -258,7 +261,8 @@ func TestRenderHistogramExample(t *testing.T) {
prom := p.(*Prometheus)
var lines []string
- for _, metrics := range prom.cache.Get() {
+
+ for _, metrics := range prom.memoryCache.Get() {
for _, metricLine := range metrics {
sline := string(metricLine)
if !strings.HasPrefix(sline, "#") {
diff --git a/docs/prometheus-exporter.md b/docs/prometheus-exporter.md
index 683419bc5..b60738be2 100644
--- a/docs/prometheus-exporter.md
+++ b/docs/prometheus-exporter.md
@@ -58,6 +58,7 @@ An overview of all parameters:
| [`allow_addrs`](#allow_addrs) | list of strings, optional | allow access only if host matches any of the provided addresses | |
| [`allow_addrs_regex`](#allow_addrs_regex) | list of strings, optional | allow access only if host address matches at least one of the regular expressions | |
| `cache_max_keep` | string (Go duration format), optional | maximum amount of time metrics are cached (in case Prometheus does not timely collect the metrics) | `5m` |
+| [`disk_cache`](#disk_cache) | object, optional | disk-based cache configuration | |
| `global_prefix` | string, optional | add a prefix to all metrics (e.g. `netapp_`) | |
| `local_http_addr` | string, optional | address of the HTTP server Harvest starts for Prometheus to scrape:
use `localhost` to serve only on the local machine
use `0.0.0.0` (default) if Prometheus is scrapping from another machine | `0.0.0.0` |
| `port_range` | int-int (range), overrides `port` if specified | lower port to upper port (inclusive) of the HTTP end-point to create when a poller specifies this exporter. Starting at lower port, each free port will be tried sequentially up to the upper port. | |
@@ -221,6 +222,45 @@ Exporters:
Access will only be allowed from the IP4 range `192.168.0.0`-`192.168.0.255`.
+### disk_cache
+The `disk_cache` parameter enables disk-based staging of metrics before they are served to Prometheus. Instead of storing formatted metrics in memory, Harvest flushes them to disk files. When Prometheus scrapes the `/metrics` endpoint, Harvest reads these cached files from disk and streams them directly to Prometheus. This approach reduces memory overhead, making it ideal for large deployments with many metrics.
+
+**Configuration:**
+
+The `disk_cache` parameter requires a `path` field that specifies the directory where cache files will be stored. The path is **mandatory** when using disk cache.
+
+**Notes:**
+
+- The `path` is **required** when using `disk_cache`
+- Harvest will automatically create a subdirectory for each poller to avoid conflicts between multiple pollers
+- The cache directory is cleared on startup
+- Ensure the specified directory is writable by the Harvest process
+
+**Example:**
+
+```yaml
+Exporters:
+ prom_disk:
+ exporter: Prometheus
+ port_range: 13000-13100
+ disk_cache:
+ path: /var/lib/harvest/cache
+
+Pollers:
+ cluster-01:
+ addr: 10.0.1.1
+ exporters:
+ - prom_disk
+ cluster-02:
+ addr: 10.0.1.2
+ exporters:
+ - prom_disk
+```
+
+In this example, cache files will be created in:
+- `/var/lib/harvest/cache/cluster-01/`
+- `/var/lib/harvest/cache/cluster-02/`
+
## Configure Prometheus to scrape Harvest pollers
There are two ways to tell Prometheus how to scrape Harvest: using HTTP service discovery (SD) or listing each poller
diff --git a/pkg/conf/conf.go b/pkg/conf/conf.go
index 2a4949d96..c65fa96a0 100644
--- a/pkg/conf/conf.go
+++ b/pkg/conf/conf.go
@@ -785,6 +785,10 @@ func ZapiPoller(n *node.Node) *Poller {
return &p
}
+type DiskCacheConfig struct {
+ Path string `yaml:"path"`
+}
+
type Exporter struct {
Port *int `yaml:"port,omitempty"`
PortRange *IntRange `yaml:"port_range,omitempty"`
@@ -804,12 +808,13 @@ type Exporter struct {
TLS TLS `yaml:"tls,omitempty"`
// InfluxDB specific
- Bucket *string `yaml:"bucket,omitempty"`
- Org *string `yaml:"org,omitempty"`
- Token *string `yaml:"token,omitempty"`
- Precision *string `yaml:"precision,omitempty"`
- ClientTimeout *string `yaml:"client_timeout,omitempty"`
- Version *string `yaml:"version,omitempty"`
+ Bucket *string `yaml:"bucket,omitempty"`
+ Org *string `yaml:"org,omitempty"`
+ Token *string `yaml:"token,omitempty"`
+ Precision *string `yaml:"precision,omitempty"`
+ ClientTimeout *string `yaml:"client_timeout,omitempty"`
+ Version *string `yaml:"version,omitempty"`
+ DiskCache *DiskCacheConfig `yaml:"disk_cache,omitempty"`
IsTest bool `yaml:"-"` // true when run from unit tests
IsEmbedded bool `yaml:"-"` // true when the exporter is embedded in a poller