Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache: add tracing to prune #5627

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 51 additions & 2 deletions cache/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,13 @@ import (
"github.com/moby/buildkit/util/disk"
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/progress"
"github.com/moby/buildkit/util/tracing"
digest "github.com/opencontainers/go-digest"
imagespecidentity "github.com/opencontainers/image-spec/identity"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
)

Expand All @@ -38,6 +41,17 @@ var (
errInvalid = errors.New("invalid")
)

var (
PruneFilterAttribute = attribute.Key("moby.buildkit.prune.filter")
PruneAllAttribute = attribute.Key("moby.buildkit.prune.all")
PruneGCPolicyKeepDuration = attribute.Key("moby.buildkit.prune.gcpolicy.keepduration")
PruneGCPolicyReservedSpace = attribute.Key("moby.buildkit.prune.gcpolicy.reservedspace")
PruneGCPolicyMaxUsedSpace = attribute.Key("moby.buildkit.prune.gcpolicy.maxusedspace")
PruneGCPolicyMinFreeSpace = attribute.Key("moby.buildkit.prune.gcpolicy.minfreespace")
PruneDeleteSizeAttribute = attribute.Key("moby.buildkit.prune.delete.size")
PruneDeleteCountAttribute = attribute.Key("moby.buildkit.prune.delete.count")
)

const maxPruneBatch = 10 // maximum number of refs to prune while holding the manager lock

type ManagerOpt struct {
Expand All @@ -51,6 +65,7 @@ type ManagerOpt struct {
MetadataStore *metadata.Store
Root string
MountPoolRoot string
TracerProvider trace.TracerProvider
}

type Accessor interface {
Expand Down Expand Up @@ -95,7 +110,8 @@ type cacheManager struct {
Differ diff.Comparer
MetadataStore *metadata.Store

root string
root string
tracer trace.Tracer

mountPool sharableMountPool

Expand All @@ -114,6 +130,7 @@ func NewManager(opt ManagerOpt) (Manager, error) {
Differ: opt.Differ,
MetadataStore: opt.MetadataStore,
root: opt.Root,
tracer: tracing.Tracer(opt.TracerProvider),
records: make(map[string]*cacheRecord),
}

Expand Down Expand Up @@ -1009,6 +1026,9 @@ func (cm *cacheManager) createDiffRef(ctx context.Context, parents parentRefs, d
}

func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opts ...client.PruneInfo) error {
ctx, span := cm.tracer.Start(ctx, "(*cacheManager).Prune")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The prunes coming from background GC should have a root span based on the GC function, not for individual prune calls for each rule. (Can even trace the reason for starting the GC).

For Prunes coming through grpc they should trace to the tracing context already in ctx.

So afaics these should use the tracing.StartSpan(ctx) like the rest of the tracers and control.gc() should have a way to inject root span to ctx before it calls into cm.Prune().

defer span.End()

cm.muPrune.Lock()

for _, opt := range opts {
Expand All @@ -1030,6 +1050,18 @@ func (cm *cacheManager) Prune(ctx context.Context, ch chan client.UsageInfo, opt
}

func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt client.PruneInfo) error {
ctx, span := cm.tracer.Start(ctx, "(*cacheManager).prune",
trace.WithAttributes(
PruneFilterAttribute.StringSlice(opt.Filter),
PruneAllAttribute.Bool(opt.All),
PruneGCPolicyKeepDuration.Int64(int64(opt.KeepDuration)),
PruneGCPolicyReservedSpace.Int64(opt.ReservedSpace),
PruneGCPolicyMaxUsedSpace.Int64(opt.MaxUsedSpace),
PruneGCPolicyMinFreeSpace.Int64(opt.MinFreeSpace),
),
)
defer span.End()

filter, err := filters.ParseAll(opt.Filter...)
if err != nil {
return errors.Wrapf(err, "failed to parse prune filters %v", opt.Filter)
Expand Down Expand Up @@ -1066,6 +1098,17 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt
}
}

var (
totalReleasedSize int64
totalReleasedCount int64
)
defer func() {
span.SetAttributes(
PruneDeleteSizeAttribute.Int64(totalReleasedSize),
PruneDeleteCountAttribute.Int64(totalReleasedCount),
)
}()

popt := pruneOpt{
filter: filter,
all: opt.All,
Expand All @@ -1076,9 +1119,15 @@ func (cm *cacheManager) prune(ctx context.Context, ch chan client.UsageInfo, opt
}
for {
releasedSize, releasedCount, err := cm.pruneOnce(ctx, ch, popt)
if err != nil || releasedCount == 0 {
if err != nil {
return err
}
totalReleasedSize += releasedSize
totalReleasedCount += releasedCount

if releasedCount == 0 {
return nil
}
popt.totalSize -= releasedSize
}
}
Expand Down
7 changes: 5 additions & 2 deletions cmd/buildkitd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import (
"go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/trace"
tracev1 "go.opentelemetry.io/proto/otlp/collector/trace/v1"
"golang.org/x/sync/errgroup"
"google.golang.org/grpc"
Expand All @@ -90,6 +91,7 @@ type workerInitializerOpt struct {
config *config.Config
sessionManager *session.Manager
traceSocket string
tracerProvider trace.TracerProvider
}

type workerInitializer struct {
Expand Down Expand Up @@ -336,7 +338,7 @@ func main() {
return err
}

controller, err := newController(ctx, c, &cfg)
controller, err := newController(ctx, c, &cfg, tp)
if err != nil {
return err
}
Expand Down Expand Up @@ -747,7 +749,7 @@ func serverCredentials(cfg config.TLSConfig) (*tls.Config, error) {
return tlsConf, nil
}

func newController(ctx context.Context, c *cli.Context, cfg *config.Config) (*control.Controller, error) {
func newController(ctx context.Context, c *cli.Context, cfg *config.Config, tp trace.TracerProvider) (*control.Controller, error) {
sessionManager, err := session.NewManager()
if err != nil {
return nil, err
Expand Down Expand Up @@ -776,6 +778,7 @@ func newController(ctx context.Context, c *cli.Context, cfg *config.Config) (*co
config: cfg,
sessionManager: sessionManager,
traceSocket: traceSocket,
tracerProvider: tp,
})
if err != nil {
return nil, err
Expand Down
1 change: 1 addition & 0 deletions cmd/buildkitd/main_containerd_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ func containerdWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([
opt.GCPolicy = getGCPolicy(cfg.GCConfig, common.config.Root)
opt.BuildkitVersion = getBuildkitVersion()
opt.RegistryHosts = resolverFunc(common.config)
opt.TracerProvider = common.tracerProvider

if platformsStr := cfg.Platforms; len(platformsStr) != 0 {
platforms, err := parsePlatforms(platformsStr)
Expand Down
1 change: 1 addition & 0 deletions cmd/buildkitd/main_oci_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ func ociWorkerInitializer(c *cli.Context, common workerInitializerOpt) ([]worker
opt.GCPolicy = getGCPolicy(cfg.GCConfig, common.config.Root)
opt.BuildkitVersion = getBuildkitVersion()
opt.RegistryHosts = hosts
opt.TracerProvider = common.tracerProvider

if platformsStr := cfg.Platforms; len(platformsStr) != 0 {
platforms, err := parsePlatforms(platformsStr)
Expand Down
55 changes: 50 additions & 5 deletions util/tracing/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ import (
"fmt"
"net/http"
"net/http/httptrace"
"runtime"
"strings"
"sync"

"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/stack"
"github.com/pkg/errors"
"go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel/attribute"
Expand All @@ -14,18 +20,57 @@ import (
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
)

"github.com/pkg/errors"
var noopTracer = sync.OnceValue(func() trace.TracerProvider {
return noop.NewTracerProvider()
})

"github.com/moby/buildkit/util/bklog"
"github.com/moby/buildkit/util/stack"
)
// Tracer is a utility function for creating a tracer. It will create
// a tracer with the name matching the package of the caller with the given options.
//
// This function isn't meant to be called in a tight loop. It is intended to be
// called on initialization and the tracer is supposed to be retained for future use.
func Tracer(tp trace.TracerProvider, options ...trace.TracerOption) trace.Tracer {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be called TracerForCurrentFrame or smth like that

if tp == nil {
// Potentially consider using otel.GetTracerProvider here, but
// we know of some issues where the default tracer provider can cause
// memory leaks if it is never initialized so just being cautious
// and using the noop tracer because buildkit itself doesn't use the global
// tracer provider.
return noopTracer().Tracer("", options...)
}

var (
name string
callers [1]uintptr
)

if runtime.Callers(2, callers[:]) > 0 {
frames := runtime.CallersFrames(callers[:])
frame, _ := frames.Next()

if frame.Function != "" {
lastSlash := strings.LastIndex(frame.Function, "/")
if lastSlash < 0 {
lastSlash = 0
}

if funcStart := strings.Index(frame.Function[lastSlash:], "."); funcStart >= 0 {
name = frame.Function[:lastSlash+funcStart]
} else {
name = frame.Function
}
}
}
return tp.Tracer(name, options...)
}

// StartSpan starts a new span as a child of the span in context.
// If there is no span in context then this is a no-op.
func StartSpan(ctx context.Context, operationName string, opts ...trace.SpanStartOption) (trace.Span, context.Context) {
parent := trace.SpanFromContext(ctx)
tracer := noop.NewTracerProvider().Tracer("")
tracer := noopTracer().Tracer("")
if parent != nil && parent.SpanContext().IsValid() {
tracer = parent.TracerProvider().Tracer("")
}
Expand Down
3 changes: 3 additions & 0 deletions worker/base/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
digest "github.com/opencontainers/go-digest"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
)
Expand Down Expand Up @@ -82,6 +83,7 @@ type WorkerOpt struct {
MetadataStore *metadata.Store
MountPoolRoot string
ResourceMonitor *resources.Monitor
TracerProvider trace.TracerProvider
}

// Worker is a local worker instance with dedicated snapshotter, cache, and so on.
Expand Down Expand Up @@ -113,6 +115,7 @@ func NewWorker(ctx context.Context, opt WorkerOpt) (*Worker, error) {
MetadataStore: opt.MetadataStore,
Root: opt.Root,
MountPoolRoot: opt.MountPoolRoot,
TracerProvider: opt.TracerProvider,
})
if err != nil {
return nil, err
Expand Down
Loading