Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module github.com/google/cadvisor/cmd

go 1.23.0
go 1.24.0

toolchain go1.24.0
toolchain go1.24.2

// Record that the cmd module requires the cadvisor library module.
// The github.com/google/cadvisor/cmd module is built using the Makefile
Expand All @@ -29,6 +29,7 @@ require (
golang.org/x/oauth2 v0.30.0
google.golang.org/api v0.235.0
gopkg.in/olivere/elastic.v2 v2.0.61
k8s.io/cri-api v0.34.1 // indirect
k8s.io/klog/v2 v2.130.1
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979
)
Expand Down
2 changes: 2 additions & 0 deletions cmd/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.2 h1:kG1BFyqVHuQoVQiR1bWGnfz/fmHvvuiSPIV7rvl360E=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
k8s.io/cri-api v0.34.1 h1:n2bU++FqqJq0CNjP/5pkOs0nIx7aNpb1Xa053TecQkM=
k8s.io/cri-api v0.34.1/go.mod h1:4qVUjidMg7/Z9YGZpqIDygbkPWkg3mkS1PvOx/kpHTE=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg=
Expand Down
14 changes: 14 additions & 0 deletions container/containerd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/credentials/insecure"
emptypb "google.golang.org/protobuf/types/known/emptypb"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"

"github.com/google/cadvisor/container/containerd/containers"
"github.com/google/cadvisor/container/containerd/pkg/dialer"
Expand All @@ -40,12 +41,14 @@ type client struct {
containerService containersapi.ContainersClient
taskService tasksapi.TasksClient
versionService versionapi.VersionClient
runtimeService runtimeapi.RuntimeServiceClient
}

type ContainerdClient interface {
LoadContainer(ctx context.Context, id string) (*containers.Container, error)
TaskPid(ctx context.Context, id string) (uint32, error)
Version(ctx context.Context) (string, error)
ContainerStats(ctx context.Context, id string) (*runtimeapi.ContainerStats, error)
}

var (
Expand Down Expand Up @@ -104,6 +107,7 @@ func Client(address, namespace string) (ContainerdClient, error) {
containerService: containersapi.NewContainersClient(conn),
taskService: tasksapi.NewTasksClient(conn),
versionService: versionapi.NewVersionClient(conn),
runtimeService: runtimeapi.NewRuntimeServiceClient(conn),
}
})
return ctrdClient, retErr
Expand Down Expand Up @@ -140,6 +144,16 @@ func (c *client) Version(ctx context.Context) (string, error) {
return response.Version, nil
}

func (c *client) ContainerStats(ctx context.Context, id string) (*runtimeapi.ContainerStats, error) {
resp, err := c.runtimeService.ContainerStats(ctx, &runtimeapi.ContainerStatsRequest{
ContainerId: id,
})
if err != nil {
return nil, err
}
return resp.Stats, nil
}

func containerFromProto(containerpb *containersapi.Container) *containers.Container {
var runtime containers.RuntimeInfo
// TODO: is nil check required for containerpb
Expand Down
17 changes: 17 additions & 0 deletions container/containerd/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"fmt"

"github.com/google/cadvisor/container/containerd/containers"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
)

type containerdClientMock struct {
Expand All @@ -45,6 +46,22 @@ func (c *containerdClientMock) TaskPid(ctx context.Context, id string) (uint32,
return 2389, nil
}

func (c *containerdClientMock) ContainerStats(ctx context.Context, id string) (*runtimeapi.ContainerStats, error) {
if c.returnErr != nil {
return nil, c.returnErr
}
// Return mock stats with filesystem usage
return &runtimeapi.ContainerStats{
Attributes: &runtimeapi.ContainerAttributes{
Id: id,
},
WritableLayer: &runtimeapi.FilesystemUsage{
UsedBytes: &runtimeapi.UInt64Value{Value: 1024 * 1024}, // 1MB
InodesUsed: &runtimeapi.UInt64Value{Value: 100},
},
}, nil
}

func mockcontainerdClient(cntrs map[string]*containers.Container, returnErr error) ContainerdClient {
return &containerdClientMock{
cntrs: cntrs,
Expand Down
136 changes: 133 additions & 3 deletions container/containerd/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"strings"
"sync"
"time"

"github.com/containerd/errdefs"
Expand All @@ -34,6 +35,26 @@ import (
info "github.com/google/cadvisor/info/v1"
)

// fsUsageCache caches filesystem usage data to avoid excessive disk I/O
type fsUsageCache struct {
timestamp time.Time
usedBytes uint64
inodesUsed uint64
cacheDuration time.Duration
}

// newFsUsageCache creates a new filesystem usage cache with default 30s cache duration
func newFsUsageCache() *fsUsageCache {
return &fsUsageCache{
cacheDuration: 30 * time.Second,
}
}

// isValid checks if the cached data is still valid
func (c *fsUsageCache) isValid() bool {
return time.Since(c.timestamp) < c.cacheDuration
}

type containerdContainerHandler struct {
machineInfoFactory info.MachineInfoFactory
// Absolute path to the cgroup hierarchies of this container.
Expand All @@ -50,6 +71,16 @@ type containerdContainerHandler struct {
includedMetrics container.MetricSet

libcontainerHandler *containerlibcontainer.Handler

// Filesystem usage cache with timestamp to avoid excessive disk I/O
fsUsageCache *fsUsageCache
fsUsageCacheLock sync.RWMutex

// Container snapshot key for filesystem usage calculation
snapshotKey string
snapshotter string
// CRI client for stats collection
client ContainerdClient
}

var _ container.ContainerHandler = &containerdContainerHandler{}
Expand Down Expand Up @@ -143,6 +174,10 @@ func newContainerdContainerHandler(
includedMetrics: metrics,
reference: containerReference,
libcontainerHandler: libcontainerHandler,
fsUsageCache: newFsUsageCache(),
snapshotKey: cntr.SnapshotKey,
snapshotter: cntr.Snapshotter,
client: client,
}
// Add the name and bare ID as aliases of the container.
handler.image = cntr.Image
Expand Down Expand Up @@ -171,9 +206,8 @@ func (h *containerdContainerHandler) ContainerReference() (info.ContainerReferen
}

func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
// TODO: Since we dont collect disk usage stats for containerd, we set hasFilesystem
// to false. Revisit when we support disk usage stats for containerd
hasFilesystem := false
// Enable filesystem stats collection for containerd containers with disk usage metrics
hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics) && h.canCollectFilesystemStats()
hasNet := h.includedMetrics.Has(container.NetworkUsageMetrics)
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNet, hasFilesystem)
spec.Labels = h.labels
Expand All @@ -183,6 +217,92 @@ func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
return spec, err
}

// canCollectFilesystemStats determines if filesystem stats can be collected for this container
func (h *containerdContainerHandler) canCollectFilesystemStats() bool {
// Only collect filesystem stats for regular containers (not pause/sandbox containers)
// and when we have snapshot information
if h.labels["io.cri-containerd.kind"] == "sandbox" {
return false
}
return h.snapshotKey != "" && h.snapshotter != ""
}

// collectFilesystemUsage collects filesystem usage statistics using CRI stats API
func (h *containerdContainerHandler) collectFilesystemUsage(stats *info.ContainerStats) error {
setStatsFromCache := func() {
stats.Filesystem = []info.FsStats{{
Device: h.snapshotter + ":" + h.snapshotKey,
Type: "containerd-snapshotter",
Usage: h.fsUsageCache.usedBytes,
HasInodes: true,
Inodes: h.fsUsageCache.inodesUsed,
}}
}

h.fsUsageCacheLock.RLock()
if h.fsUsageCache.isValid() {
// Use cached data
setStatsFromCache()
h.fsUsageCacheLock.RUnlock()
return nil
}
h.fsUsageCacheLock.RUnlock()

// Cache miss or expired, collect fresh data using CRI stats API
h.fsUsageCacheLock.Lock()
defer h.fsUsageCacheLock.Unlock()

// Double-check after acquiring write lock
if h.fsUsageCache.isValid() {
setStatsFromCache()
return nil
}

// Get filesystem usage from CRI stats API
usedBytes, inodesUsed, err := h.getCRIFilesystemUsage()
if err != nil {
return err
}

// Update cache
h.fsUsageCache.timestamp = time.Now()
h.fsUsageCache.usedBytes = usedBytes
h.fsUsageCache.inodesUsed = inodesUsed

// Set filesystem stats
setStatsFromCache()

return nil
}

// getCRIFilesystemUsage gets filesystem usage from CRI stats API
func (h *containerdContainerHandler) getCRIFilesystemUsage() (uint64, uint64, error) {
ctx := context.Background()
containerStats, err := h.client.ContainerStats(ctx, h.reference.Id)
if err != nil {
return 0, 0, fmt.Errorf("failed to get CRI container stats: %v", err)
}

if containerStats == nil {
return 0, 0, fmt.Errorf("container stats is nil")
}

// Extract filesystem usage from CRI stats
var usedBytes, inodesUsed uint64

// Get writable layer usage (container's filesystem usage)
if containerStats.WritableLayer != nil {
if containerStats.WritableLayer.UsedBytes != nil {
usedBytes = containerStats.WritableLayer.UsedBytes.Value
}
if containerStats.WritableLayer.InodesUsed != nil {
inodesUsed = containerStats.WritableLayer.InodesUsed.Value
}
}

return usedBytes, inodesUsed, nil
}

func (h *containerdContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := h.machineInfoFactory.GetMachineInfo()
if err != nil {
Expand All @@ -192,6 +312,16 @@ func (h *containerdContainerHandler) getFsStats(stats *info.ContainerStats) erro
if h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}

// Collect filesystem usage stats if enabled and possible
if h.includedMetrics.Has(container.DiskUsageMetrics) && h.canCollectFilesystemStats() {
if err := h.collectFilesystemUsage(stats); err != nil {
// Log error but don't fail the entire stats collection
// This maintains backward compatibility
return nil
}
}

return nil
}

Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module github.com/google/cadvisor

go 1.23.0
go 1.24.0

toolchain go1.24.0
toolchain go1.24.2

require (
cloud.google.com/go/compute/metadata v0.7.0
Expand Down Expand Up @@ -33,6 +33,7 @@ require (
golang.org/x/sys v0.33.0
google.golang.org/grpc v1.72.2
google.golang.org/protobuf v1.36.6
k8s.io/cri-api v0.34.1
k8s.io/klog/v2 v2.130.1
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.0.2 h1:kG1BFyqVHuQoVQiR1bWGnfz/fmHvvuiSPIV7rvl360E=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
k8s.io/cri-api v0.34.1 h1:n2bU++FqqJq0CNjP/5pkOs0nIx7aNpb1Xa053TecQkM=
k8s.io/cri-api v0.34.1/go.mod h1:4qVUjidMg7/Z9YGZpqIDygbkPWkg3mkS1PvOx/kpHTE=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg=
Expand Down