Skip to content

Commit

Permalink
cache: add tracing to prune
Browse files Browse the repository at this point in the history
Adds tracing to the cache manager prune operation. The attribute will
show both explicit prunes from a user and background prunes initiated
after solve requests are processed.

Signed-off-by: Jonathan A. Sternberg <[email protected]>
  • Loading branch information
jsternberg committed Jan 13, 2025
1 parent 174fb8a commit 9f8eaf2
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 9 deletions.
53 changes: 51 additions & 2 deletions cache/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ import (
"github.com/moby/buildkit/util/disk"
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/progress"
"github.com/moby/buildkit/util/tracing"
digest "github.com/opencontainers/go-digest"
imagespecidentity "github.com/opencontainers/image-spec/identity"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
)

Expand All @@ -38,6 +41,17 @@ var (
errInvalid = errors.New("invalid")
)

var (
PruneFilterAttribute = attribute.Key("moby.buildkit.prune.filter")
PruneAllAttribute = attribute.Key("moby.buildkit.prune.all")
PruneGCPolicyKeepDuration = attribute.Key("moby.buildkit.prune.gcpolicy.keepduration")
PruneGCPolicyReservedSpace = attribute.Key("moby.buildkit.prune.gcpolicy.reservedspace")
PruneGCPolicyMaxUsedSpace = attribute.Key("moby.buildkit.prune.gcpolicy.maxusedspace")
PruneGCPolicyMinFreeSpace = attribute.Key("moby.buildkit.prune.gcpolicy.minfreespace")
PruneDeleteSizeAttribute = attribute.Key("moby.buildkit.prune.delete.size")
PruneDeleteCountAttribute = attribute.Key("moby.buildkit.prune.delete.count")
)

const maxPruneBatch = 10 // maximum number of refs to prune while holding the manager lock

type ManagerOpt struct {
Expand All @@ -51,6 +65,7 @@ type ManagerOpt struct {
MetadataStore *metadata.Store
Root string
MountPoolRoot string
TracerProvider trace.TracerProvider
}

type Accessor interface {
Expand Down Expand Up @@ -95,7 +110,8 @@ type cacheManager struct {
Differ diff.Comparer
MetadataStore *metadata.Store

root string
root string
tracer trace.Tracer

mountPool sharableMountPool

Expand All @@ -114,6 +130,7 @@ func NewManager(opt ManagerOpt) (Manager, error) {
Differ: opt.Differ,
MetadataStore: opt.MetadataStore,
root: opt.Root,
tracer: tracing.Tracer(opt.TracerProvider),
records: make(map[string]*cacheRecord),
}

Expand Down Expand Up @@ -1009,6 +1026,9 @@ func (cm *cacheManager) createDiffRef(ctx context.Context, parents parentRefs, d
}

func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opts ...client.PruneInfo) error {
ctx, span := cm.tracer.Start(ctx, "cacheManager.Prune")
defer span.End()

cm.muPrune.Lock()

for _, opt := range opts {
Expand All @@ -1030,6 +1050,18 @@ func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opt
}

func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt client.PruneInfo) error {
ctx, span := cm.tracer.Start(ctx, "cacheManager.prune",
trace.WithAttributes(
PruneFilterAttribute.StringSlice(opt.Filter),
PruneAllAttribute.Bool(opt.All),
PruneGCPolicyKeepDuration.Int64(int64(opt.KeepDuration)),
PruneGCPolicyReservedSpace.Int64(opt.ReservedSpace),
PruneGCPolicyMaxUsedSpace.Int64(opt.MaxUsedSpace),
PruneGCPolicyMinFreeSpace.Int64(opt.MinFreeSpace),
),
)
defer span.End()

filter, err := filters.ParseAll(opt.Filter...)
if err != nil {
return errors.Wrapf(err, "failed to parse prune filters %v", opt.Filter)
Expand Down Expand Up @@ -1066,6 +1098,17 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt
}
}

var (
totalReleasedSize int64
totalReleasedCount int64
)
defer func() {
span.SetAttributes(
PruneDeleteSizeAttribute.Int64(totalReleasedSize),
PruneDeleteCountAttribute.Int64(totalReleasedCount),
)
}()

popt := pruneOpt{
filter: filter,
all: opt.All,
Expand All @@ -1076,9 +1119,15 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt
}
for {
releasedSize, releasedCount, err := cm.pruneOnce(ctx, ch, popt)
if err != nil || releasedCount == 0 {
if err != nil {
return err
}
totalReleasedSize += releasedSize
totalReleasedCount += releasedCount

if releasedCount == 0 {
return nil
}
popt.totalSize -= releasedSize
}
}
Expand Down
7 changes: 5 additions & 2 deletions cmd/buildkitd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ import (
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
tracev1 "go.opentelemetry.io/proto/otlp/collector/trace/v1"
"golang.org/x/sync/errgroup"
"google.golang.org/grpc"
Expand All @@ -95,6 +96,7 @@ type workerInitializerOpt struct {
config *config.Config
sessionManager *session.Manager
traceSocket string
tracerProvider trace.TracerProvider
}

type workerInitializer struct {
Expand Down Expand Up @@ -340,7 +342,7 @@ func main() {
return err
}

controller, err := newController(ctx, c, &cfg)
controller, err := newController(ctx, c, &cfg, tp)
if err != nil {
return err
}
Expand Down Expand Up @@ -751,7 +753,7 @@ func serverCredentials(cfg config.TLSConfig) (*tls.Config, error) {
return tlsConf, nil
}

func newController(ctx context.Context, c *cli.Context, cfg *config.Config) (*control.Controller, error) {
func newController(ctx context.Context, c *cli.Context, cfg *config.Config, tp trace.TracerProvider) (*control.Controller, error) {
sessionManager, err := session.NewManager()
if err != nil {
return nil, err
Expand Down Expand Up @@ -780,6 +782,7 @@ func newController(ctx context.Context, c *cli.Context, cfg *config.Config) (*co
config: cfg,
sessionManager: sessionManager,
traceSocket: traceSocket,
tracerProvider: tp,
})
if err != nil {
return nil, err
Expand Down
1 change: 1 addition & 0 deletions cmd/buildkitd/main_containerd_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ func containerdWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([
opt.GCPolicy = getGCPolicy(cfg.GCConfig, common.config.Root)
opt.BuildkitVersion = getBuildkitVersion()
opt.RegistryHosts = resolverFunc(common.config)
opt.TracerProvider = common.tracerProvider

if platformsStr := cfg.Platforms; len(platformsStr) != 0 {
platforms, err := parsePlatforms(platformsStr)
Expand Down
1 change: 1 addition & 0 deletions cmd/buildkitd/main_oci_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker
opt.GCPolicy = getGCPolicy(cfg.GCConfig, common.config.Root)
opt.BuildkitVersion = getBuildkitVersion()
opt.RegistryHosts = hosts
opt.TracerProvider = common.tracerProvider

if platformsStr := cfg.Platforms; len(platformsStr) != 0 {
platforms, err := parsePlatforms(platformsStr)
Expand Down
55 changes: 50 additions & 5 deletions util/tracing/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ import (
"fmt"
"net/http"
"net/http/httptrace"
"runtime"
"strings"
"sync"

"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/stack"
"github.com/pkg/errors"
"go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
Expand All @@ -14,18 +20,57 @@ import (
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
)

"github.com/pkg/errors"
var noopTracer = sync.OnceValue(func() trace.TracerProvider {
return noop.NewTracerProvider()
})

"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/stack"
)
// Tracer is a utility function for creating a tracer. It will create
// a tracer with the name matching the package of the caller with the given options.
//
// This function isn't meant to be called in a tight loop. It is intended to be
// called on initialization and the tracer is supposed to be retained for future use.
func Tracer(tp trace.TracerProvider, options ...trace.TracerOption) trace.Tracer {
if tp == nil {
// Potentially consider using otel.GetTracerProvider here, but
// we know of some issues where the default tracer provider can cause
// memory leaks if it is never initialized so just being cautious
// and using the noop tracer because buildkit itself doesn't use the global
// tracer provider.
return noopTracer().Tracer("", options...)
}

var (
name string
callers [1]uintptr
)

if runtime.Callers(2, callers[:]) > 0 {
frames := runtime.CallersFrames(callers[:])
frame, _ := frames.Next()

if frame.Function != "" {
lastSlash := strings.LastIndex(frame.Function, "/")
if lastSlash < 0 {
lastSlash = 0
}

if funcStart := strings.Index(frame.Function[lastSlash:], "."); funcStart >= 0 {
name = frame.Function[:lastSlash+funcStart]
} else {
name = frame.Function
}
}
}
return tp.Tracer(name, options...)
}

// StartSpan starts a new span as a child of the span in context.
// If there is no span in context then this is a no-op.
func StartSpan(ctx context.Context, operationName string, opts ...trace.SpanStartOption) (trace.Span, context.Context) {
parent := trace.SpanFromContext(ctx)
tracer := noop.NewTracerProvider().Tracer("")
tracer := noopTracer().Tracer("")
if parent != nil && parent.SpanContext().IsValid() {
tracer = parent.TracerProvider().Tracer("")
}
Expand Down
3 changes: 3 additions & 0 deletions worker/base/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
digest "github.com/opencontainers/go-digest"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
)
Expand Down Expand Up @@ -82,6 +83,7 @@ type WorkerOpt struct {
MetadataStore *metadata.Store
MountPoolRoot string
ResourceMonitor *resources.Monitor
TracerProvider trace.TracerProvider
}

// Worker is a local worker instance with dedicated snapshotter, cache, and so on.
Expand Down Expand Up @@ -113,6 +115,7 @@ func NewWorker(ctx context.Context, opt WorkerOpt) (*Worker, error) {
MetadataStore: opt.MetadataStore,
Root: opt.Root,
MountPoolRoot: opt.MountPoolRoot,
TracerProvider: opt.TracerProvider,
})
if err != nil {
return nil, err
Expand Down

0 comments on commit 9f8eaf2

Please sign in to comment.