Skip to content

Commit b780fe2

Browse files
committed
feat: add metric for non-evictable memory
The goal of this PR is to have a cAdvisor metric which (as accurately as possible) describes the amount of container memory which is not evictable by the kernel. This new metric can be used to accurately graph and alert on container memory usage regardless of its evictable memory usage patterns (e.g large active page cache). working_set_bytes today does not always align with non-evictable memory. For example, two containers in a pod sharing files in an emptyDir will increase total_active_file cache as one container writes and another container reads over time, dramatically increasing working_set_bytes. Under increasing non-evictable memory demands from the file owning process, total_active_file will decrease, and working_set_bytes's value will hover around ~90% of the cgroup memory limit. This makes alerting difficult, as working_set_bytes does not accurately detail that the pod has evictable active page cache that the kernel is slowly draining. In other words, total_active_file memory can be evicted by the kernel, but is included in working_set_bytes. Alternatively to a new metric, working_set_bytes could be updated to represent non evictable memory and exclude total_active_file (along with any other evictable fields).
1 parent 04006e5 commit b780fe2

File tree

13 files changed

+74
-15
lines changed

13 files changed

+74
-15
lines changed

cmd/internal/storage/bigquery/bigquery.go

+5
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ const (
5050
colMemoryUsage string = "memory_usage"
5151
// Working set size
5252
colMemoryWorkingSet string = "memory_working_set"
53+
// Non-evictable set size
54+
colMemoryNonEvictableSet string = "memory_non_evictable_set"
5355
// Container page fault
5456
colMemoryContainerPgfault string = "memory_container_pgfault"
5557
// Constainer major page fault
@@ -226,6 +228,9 @@ func (s *bigqueryStorage) containerStatsToRows(
226228
// Working set size
227229
row[colMemoryWorkingSet] = stats.Memory.WorkingSet
228230

231+
// Non-evictable set size
232+
row[colMemoryNonEvictableSet] = stats.Memory.NonEvictableSet
233+
229234
// container page fault
230235
row[colMemoryContainerPgfault] = stats.Memory.ContainerData.Pgfault
231236

cmd/internal/storage/influxdb/influxdb.go

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ const (
7070
serMemoryMappedFile string = "memory_mapped_file"
7171
// Working set size
7272
serMemoryWorkingSet string = "memory_working_set"
73+
// Non-evictable set size
74+
serMemoryNonEvictableSet string = "memory_non_evictable_set"
7375
// Number of memory usage hits limits
7476
serMemoryFailcnt string = "memory_failcnt"
7577
// Cumulative count of memory allocation failures
@@ -256,6 +258,8 @@ func (s *influxdbStorage) memoryStatsToPoints(
256258
points = append(points, makePoint(serMemoryMappedFile, stats.Memory.MappedFile))
257259
// Working Set Size
258260
points = append(points, makePoint(serMemoryWorkingSet, stats.Memory.WorkingSet))
261+
// Non-evictable Set Size
262+
points = append(points, makePoint(serMemoryNonEvictableSet, stats.Memory.NonEvictableSet))
259263
// Number of memory usage hits limits
260264
points = append(points, makePoint(serMemoryFailcnt, stats.Memory.Failcnt))
261265

cmd/internal/storage/influxdb/influxdb_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ func (self *influxDbTestStorageDriver) StatsEq(a, b *info.ContainerStats) bool {
7575
return false
7676
}
7777

78+
if a.Memory.NonEvictableSet != b.Memory.NonEvictableSet {
79+
return false
80+
}
81+
7882
if !reflect.DeepEqual(a.Network, b.Network) {
7983
return false
8084
}
@@ -253,6 +257,7 @@ func TestContainerStatsToPoints(t *testing.T) {
253257
assertContainsPointWithValue(t, points, serMemoryMappedFile, stats.Memory.MappedFile)
254258
assertContainsPointWithValue(t, points, serMemoryUsage, stats.Memory.Usage)
255259
assertContainsPointWithValue(t, points, serMemoryWorkingSet, stats.Memory.WorkingSet)
260+
assertContainsPointWithValue(t, points, serMemoryNonEvictableSet, stats.Memory.NonEvictableSet)
256261
assertContainsPointWithValue(t, points, serMemoryFailcnt, stats.Memory.Failcnt)
257262
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.ContainerData.Pgfault)
258263
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.ContainerData.Pgmajfault)
@@ -353,6 +358,7 @@ func createTestStats() (*info.ContainerInfo, *info.ContainerStats) {
353358
Swap: 1024,
354359
MappedFile: 1025327104,
355360
WorkingSet: 23630012416,
361+
NonEvictableSet: 29459246253,
356362
Failcnt: 1,
357363
ContainerData: info.MemoryStatsMemoryData{Pgfault: 100328455, Pgmajfault: 97},
358364
HierarchicalData: info.MemoryStatsMemoryData{Pgfault: 100328454, Pgmajfault: 96},

cmd/internal/storage/statsd/statsd.go

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ const (
5757
serMemoryMappedFile string = "memory_mapped_file"
5858
// Working set size
5959
serMemoryWorkingSet string = "memory_working_set"
60+
// Non-evictable set size
61+
serMemoryNonEvictableSet string = "memory_non_evictable_set"
6062
// Number of memory usage hits limits
6163
serMemoryFailcnt string = "memory_failcnt"
6264
// Cumulative count of memory allocation failures
@@ -159,6 +161,8 @@ func (s *statsdStorage) memoryStatsToValues(series *map[string]uint64, stats *in
159161
(*series)[serMemoryMappedFile] = stats.Memory.MappedFile
160162
// Working Set Size
161163
(*series)[serMemoryWorkingSet] = stats.Memory.WorkingSet
164+
// Non-evictable Set Size
165+
(*series)[serMemoryNonEvictableSet] = stats.Memory.NonEvictableSet
162166
// Number of memory usage hits limits
163167
(*series)[serMemoryFailcnt] = stats.Memory.Failcnt
164168

cmd/internal/storage/stdout/stdout.go

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ const (
5959
serMemoryMappedFile string = "memory_mapped_file"
6060
// Working set size
6161
serMemoryWorkingSet string = "memory_working_set"
62+
// Non-evictable set size
63+
serMemoryNonEvictableSet string = "memory_non_evictable_set"
6264
// Number of memory usage hits limits
6365
serMemoryFailcnt string = "memory_failcnt"
6466
// Cumulative count of memory allocation failures
@@ -164,6 +166,8 @@ func (driver *stdoutStorage) memoryStatsToValues(series *map[string]uint64, stat
164166
(*series)[serMemoryMappedFile] = stats.Memory.MappedFile
165167
// Working Set Size
166168
(*series)[serMemoryWorkingSet] = stats.Memory.WorkingSet
169+
// Non-evictable Set Size
170+
(*series)[serMemoryNonEvictableSet] = stats.Memory.NonEvictableSet
167171
// Number of memory usage hits limits
168172
(*series)[serMemoryFailcnt] = stats.Memory.Failcnt
169173

container/libcontainer/handler.go

+18-7
Original file line numberDiff line numberDiff line change
@@ -834,15 +834,26 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
834834
inactiveFileKeyName = "inactive_file"
835835
}
836836

837-
workingSet := ret.Memory.Usage
838-
if v, ok := s.MemoryStats.Stats[inactiveFileKeyName]; ok {
839-
if workingSet < v {
840-
workingSet = 0
841-
} else {
842-
workingSet -= v
837+
activeFileKeyName := "total_active_file"
838+
if cgroups.IsCgroup2UnifiedMode() {
839+
activeFileKeyName = "active_file"
840+
}
841+
842+
ret.Memory.WorkingSet = subtractStats(ret.Memory.Usage, s.MemoryStats.Stats, []string{inactiveFileKeyName})
843+
ret.Memory.NonEvictableSet = subtractStats(ret.Memory.Usage, s.MemoryStats.Stats, []string{inactiveFileKeyName, activeFileKeyName})
844+
}
845+
846+
func subtractStats(value uint64, stats map[string]uint64, keys []string) uint64 {
847+
for _, key := range keys {
848+
if v, ok := stats[key]; ok {
849+
if value < v {
850+
value = 0
851+
} else {
852+
value -= v
853+
}
843854
}
844855
}
845-
ret.Memory.WorkingSet = workingSet
856+
return value
846857
}
847858

848859
func setCPUSetStats(s *cgroups.Stats, ret *info.ContainerStats) {

info/v1/container.go

+5
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,11 @@ type MemoryStats struct {
393393
// Units: Bytes.
394394
WorkingSet uint64 `json:"working_set"`
395395

396+
// The amount of non-evictable memory, this gives an aproximate figure
397+
// to determine when a container near OOM-ing.
398+
// Units: Bytes.
399+
NonEvictableSet uint64 `json:"non_evictable_set"`
400+
396401
Failcnt uint64 `json:"failcnt"`
397402

398403
// Size of kernel memory allocated in bytes.

info/v2/conversion_test.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,12 @@ func TestContainerStatsFromV1(t *testing.T) {
137137
v1Stats := v1.ContainerStats{
138138
Timestamp: timestamp,
139139
Memory: v1.MemoryStats{
140-
Usage: 1,
141-
Cache: 2,
142-
RSS: 3,
143-
WorkingSet: 4,
144-
Failcnt: 5,
140+
Usage: 1,
141+
Cache: 2,
142+
RSS: 3,
143+
WorkingSet: 4,
144+
Failcnt: 5,
145+
NonEvictableSet: 6,
145146
ContainerData: v1.MemoryStatsMemoryData{
146147
Pgfault: 1,
147148
Pgmajfault: 2,

integration/tests/api/test_utils.go

+4
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,12 @@ func checkMemoryStats(t *testing.T, stat info.MemoryStats) {
6969

7070
assert.NotEqual(0, stat.Usage, "Memory usage should not be zero")
7171
assert.NotEqual(0, stat.WorkingSet, "Memory working set should not be zero")
72+
assert.NotEqual(0, stat.NonEvictableSet, "Memory non-evictable set should not be zero")
7273
if stat.WorkingSet > stat.Usage {
7374
t.Errorf("Memory working set (%d) should be at most equal to memory usage (%d)", stat.WorkingSet, stat.Usage)
7475
}
76+
if stat.NonEvictableSet > stat.Usage {
77+
t.Errorf("Memory non-evictable set (%d) should be at most equal to memory usage (%d)", stat.NonEvictableSet, stat.Usage)
78+
}
7579
// TODO(vmarmol): Add checks for ContainerData and HierarchicalData
7680
}

metrics/prometheus.go

+8
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,14 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
431431
return metricValues{{value: float64(s.Memory.WorkingSet), timestamp: s.Timestamp}}
432432
},
433433
},
434+
{
435+
name: "container_memory_non_evictable_set_bytes",
436+
help: "Current non-evictable set in bytes.",
437+
valueType: prometheus.GaugeValue,
438+
getValues: func(s *info.ContainerStats) metricValues {
439+
return metricValues{{value: float64(s.Memory.NonEvictableSet), timestamp: s.Timestamp}}
440+
},
441+
},
434442
{
435443
name: "container_memory_failures_total",
436444
help: "Cumulative count of memory allocation failures.",

metrics/prometheus_fake.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -329,9 +329,10 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
329329
LoadAverage: 2,
330330
},
331331
Memory: info.MemoryStats{
332-
Usage: 8,
333-
MaxUsage: 8,
334-
WorkingSet: 9,
332+
Usage: 8,
333+
MaxUsage: 8,
334+
WorkingSet: 9,
335+
NonEvictableSet: 7,
335336
ContainerData: info.MemoryStatsMemoryData{
336337
Pgfault: 10,
337338
Pgmajfault: 11,

metrics/testdata/prometheus_metrics

+3
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ container_memory_usage_bytes{container_env_foo_env="prod",container_label_foo_la
186186
# HELP container_memory_working_set_bytes Current working set in bytes.
187187
# TYPE container_memory_working_set_bytes gauge
188188
container_memory_working_set_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 9 1395066363000
189+
# HELP container_memory_non_evictable_set_bytes Current non-evictable set in bytes.
190+
# TYPE container_memory_non_evictable_set_bytes gauge
191+
container_memory_non_evictable_set_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 7 1395066363000
189192
# HELP container_network_advance_tcp_stats_total advance tcp connections statistic for container
190193
# TYPE container_network_advance_tcp_stats_total gauge
191194
container_network_advance_tcp_stats_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="activeopens",zone_name="hello"} 1.1038621e+07 1395066363000

metrics/testdata/prometheus_metrics_whitelist_filtered

+3
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ container_memory_usage_bytes{container_env_foo_env="prod",id="testcontainer",ima
186186
# HELP container_memory_working_set_bytes Current working set in bytes.
187187
# TYPE container_memory_working_set_bytes gauge
188188
container_memory_working_set_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 9 1395066363000
189+
# HELP container_memory_non_evictable_set_bytes Current non-evictable set in bytes.
190+
# TYPE container_memory_non_evictable_set_bytes gauge
191+
container_memory_non_evictable_set_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 7 1395066363000
189192
# HELP container_network_advance_tcp_stats_total advance tcp connections statistic for container
190193
# TYPE container_network_advance_tcp_stats_total gauge
191194
container_network_advance_tcp_stats_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",tcp_state="activeopens",zone_name="hello"} 1.1038621e+07 1395066363000

0 commit comments

Comments
 (0)