From 72a3b804f7e78d433e0d5e8b5c45408fe2b53edb Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:17:12 -0400 Subject: [PATCH 01/15] feat: Allow utf-8 characters in label names This feature is a relaxation in previous validation for label names. Given the backward incompatible nature of this change (for example, PromQL queries must be [updated to support utf-8 label names](https://prometheus.io/docs/guides/utf8/#querying)), it is gated behind a "client capability": a key/val that a client must set in the `Accept:` header. --- cmd/profilecli/client.go | 18 +++++++++ pkg/distributor/distributor.go | 53 +++++++++++++++++---------- pkg/distributor/model/push.go | 5 +++ pkg/featureflags/client_capability.go | 52 ++++++++++++++++++++++++++ pkg/validation/validate.go | 17 ++++++++- 5 files changed, 124 insertions(+), 21 deletions(-) create mode 100644 pkg/featureflags/client_capability.go diff --git a/cmd/profilecli/client.go b/cmd/profilecli/client.go index b23c7c0821..24e811ffee 100644 --- a/cmd/profilecli/client.go +++ b/cmd/profilecli/client.go @@ -3,6 +3,7 @@ package main import ( "fmt" "net/http" + "strings" "connectrpc.com/connect" "github.com/prometheus/common/version" @@ -17,7 +18,21 @@ const ( protocolTypeGRPCWeb = "grpc-web" ) +var acceptHeaderFeatureFlags = []string{ + "allow-utf8-labelnames=true", +} + var userAgentHeader = fmt.Sprintf("pyroscope/%s", version.Version) +var acceptHeaderMimeType = "*/*" + +func buildAcceptHeader(featureFlags []string) string { + acceptHeader := acceptHeaderMimeType + if len(acceptHeaderFeatureFlags) > 0 { + acceptHeader += "; " + strings.Join(featureFlags, "; ") + } + + return acceptHeader +} type phlareClient struct { TenantID string @@ -46,7 +61,10 @@ func (a *authRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) } } + acceptHeader := buildAcceptHeader(acceptHeaderFeatureFlags) + req.Header.Set("Accept", acceptHeader) req.Header.Set("User-Agent", userAgentHeader) + return a.next.RoundTrip(req) } diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 35a7e9788f..0ef3235b3a 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -46,6 +46,7 @@ import ( distributormodel "github.com/grafana/pyroscope/pkg/distributor/model" "github.com/grafana/pyroscope/pkg/distributor/sampling" "github.com/grafana/pyroscope/pkg/distributor/writepath" + "github.com/grafana/pyroscope/pkg/featureflags" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/model/pprofsplit" "github.com/grafana/pyroscope/pkg/model/relabel" @@ -248,7 +249,14 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1. Series: make([]*distributormodel.ProfileSeries, 0, len(grpcReq.Msg.Series)), RawProfileType: distributormodel.RawProfileTypePPROF, } + allErrors := multierror.New() + + clientCapabilities, parseErr := featureflags.ParseClientCapabilities(grpcReq.Header()) + if parseErr != nil { + allErrors.Add(parseErr) + } + for _, grpcSeries := range grpcReq.Msg.Series { for _, grpcSample := range grpcSeries.Samples { profile, err := pprof.RawFromBytes(grpcSample.RawProfile) @@ -257,10 +265,11 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1. continue } series := &distributormodel.ProfileSeries{ - Labels: grpcSeries.Labels, - Profile: profile, - RawProfile: grpcSample.RawProfile, - ID: grpcSample.ID, + Labels: grpcSeries.Labels, + Profile: profile, + RawProfile: grpcSample.RawProfile, + ID: grpcSample.ID, + ClientCapabilities: clientCapabilities, } req.Series = append(req.Series, series) } @@ -630,10 +639,11 @@ func (d *Distributor) aggregate(ctx context.Context, req *distributormodel.Profi return handleErr } aggregated := &distributormodel.ProfileSeries{ - TenantID: req.TenantID, - Labels: labels, - Profile: pprof.RawFromProto(p.Profile()), - Annotations: annotations, + TenantID: req.TenantID, + Labels: labels, + Profile: pprof.RawFromProto(p.Profile()), + Annotations: annotations, + ClientCapabilities: req.ClientCapabilities, } return d.router.Send(localCtx, aggregated) })() @@ -1132,9 +1142,10 @@ func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit var result []*distributormodel.ProfileSeries usageGroups := d.usageGroupEvaluator.GetMatch(s.TenantID, usageConfig, s.Labels) visitor := &sampleSeriesVisitor{ - tenantID: s.TenantID, - limits: d.limits, - profile: s.Profile, + tenantID: s.TenantID, + limits: d.limits, + profile: s.Profile, + clientCapabilities: s.ClientCapabilities, } if err := visit(s.Profile.Profile, s.Labels, relabelingRules, visitor); err != nil { validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalProfiles)) @@ -1164,24 +1175,28 @@ func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit } type sampleSeriesVisitor struct { - tenantID string - limits Limits - profile *pprof.Profile - exp *pprof.SampleExporter - series []*distributormodel.ProfileSeries + tenantID string + limits Limits + profile *pprof.Profile + exp *pprof.SampleExporter + series []*distributormodel.ProfileSeries + clientCapabilities []*featureflags.ClientCapability discardedBytes int discardedProfiles int } func (v *sampleSeriesVisitor) ValidateLabels(labels phlaremodel.Labels) error { - return validation.ValidateLabels(v.limits, v.tenantID, labels) + capability := featureflags.GetClientCapability(v.clientCapabilities, featureflags.AllowUtf8LabelNamesCapabilityName) + utf8LabelNamesEnabled := capability != nil && capability.Value == "true" + return validation.ValidateLabels(v.limits, v.tenantID, utf8LabelNamesEnabled, labels) } func (v *sampleSeriesVisitor) VisitProfile(labels phlaremodel.Labels) { v.series = append(v.series, &distributormodel.ProfileSeries{ - Profile: v.profile, - Labels: labels, + Profile: v.profile, + Labels: labels, + ClientCapabilities: v.clientCapabilities, }) } diff --git a/pkg/distributor/model/push.go b/pkg/distributor/model/push.go index 6b2e74b2f9..534a85aa64 100644 --- a/pkg/distributor/model/push.go +++ b/pkg/distributor/model/push.go @@ -5,6 +5,7 @@ import ( "github.com/grafana/pyroscope/pkg/distributor/annotation" "github.com/grafana/pyroscope/pkg/distributor/ingestlimits" "github.com/grafana/pyroscope/pkg/distributor/sampling" + "github.com/grafana/pyroscope/pkg/featureflags" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/pprof" ) @@ -30,6 +31,9 @@ type ProfileSeries struct { RawProfile []byte // may be nil if the Profile is composed not from pprof ( e.g. jfr) ID string + // List of features the client supports + ClientCapabilities []*featureflags.ClientCapability + // todo split // Transient state TenantID string @@ -89,6 +93,7 @@ func (req *ProfileSeries) Clone() *ProfileSeries { ID: req.ID, Language: req.Language, Annotations: req.Annotations, + ClientCapabilities: req.ClientCapabilities, } return c } diff --git a/pkg/featureflags/client_capability.go b/pkg/featureflags/client_capability.go new file mode 100644 index 0000000000..0d37d30225 --- /dev/null +++ b/pkg/featureflags/client_capability.go @@ -0,0 +1,52 @@ +package featureflags + +import ( + "fmt" + "mime" + "net/http" +) + +const ( + // Capability names + AllowUtf8LabelNamesCapabilityName = "allow-utf8-labelnames" +) + +type ClientCapability struct { + Name string + Value string +} + +func ParseClientCapabilities(header http.Header) ([]*ClientCapability, error) { + acceptHeader := header.Get("Accept") + if acceptHeader != "" { + if _, params, err := mime.ParseMediaType(acceptHeader); err != nil { + return nil, err + } else { + capabilities := make([]*ClientCapability, 0, len(params)) + seenCapabilityNames := make(map[string]struct{}) + for k, v := range params { + // Check for duplicates + if _, ok := seenCapabilityNames[k]; ok { + return nil, fmt.Errorf("duplicate client capabilities parsed from `Accept:` header: '%s'", + params) + } + seenCapabilityNames[k] = struct{}{} + + capabilities = append(capabilities, &ClientCapability{Name: k, Value: v}) + } + + return capabilities, nil + } + } + + return []*ClientCapability{}, nil +} + +func GetClientCapability(capabilities []*ClientCapability, capabilityName string) *ClientCapability { + for _, capability := range capabilities { + if capability.Name == capabilityName { + return capability + } + } + return nil +} diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index a30bd49962..32762ab334 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -115,7 +115,7 @@ type LabelValidationLimits interface { } // ValidateLabels validates the labels of a profile. -func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1.LabelPair) error { +func ValidateLabels(limits LabelValidationLimits, tenantID string, utf8LabelNamesEnabled bool, ls []*typesv1.LabelPair) error { if len(ls) == 0 { return NewErrorf(MissingLabels, MissingLabelsErrorMsg) } @@ -146,7 +146,12 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1 if len(l.Value) > limits.MaxLabelValueLength(tenantID) { return NewErrorf(LabelValueTooLong, LabelValueTooLongErrorMsg, phlaremodel.LabelPairsString(ls), l.Value) } - if origName, newName, ok := SanitizeLabelName(l.Name); ok && origName != newName { + // Note this conditional falls back on legacy logic if not valid utf-8 label name + if ok := ValidateUtf8LabelName(l.Name); utf8LabelNamesEnabled && ok { + idx += 1 + continue + } else if origName, newName, ok := SanitizeLabelName(l.Name); ok && origName != newName { + // Legacy logic if client does not specify utf8LabelNamesEnabled var err error ls, idx, err = handleSanitizedLabel(ls, idx, origName, newName) if err != nil { @@ -211,6 +216,14 @@ func handleSanitizedLabel(ls []*typesv1.LabelPair, origIdx int, origName, newNam return ls[:len(newSlice)], finalIdx, nil } +// ValidateUtf8LabelName validates the name is not empty and is a valid utf-8 string +func ValidateUtf8LabelName(name string) bool { + if len(name) == 0 { + return false + } + return utf8.ValidString(name) +} + // SanitizeLabelName reports whether the label name is valid, // and returns the sanitized value. // From cbb8eb255a58ea9ac53eae97f563064f32f41e25 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Thu, 18 Sep 2025 14:42:26 -0400 Subject: [PATCH 02/15] Address feedback - Moved all client capability logic to its own module - Move legacy (i.e. non utf-8 enabled) validation logic to query path - Implemented in v2 LabelNames and Series APIs - Relax write path to allow any utf-8 valid label names - Created HTTP and gRPC middleware for client capability - Updates og UI to work with utf-8 label name selection - Removed utf8 label name from feature flags [since unrelated to feature flags now] --- pkg/clientcapability/client_capability.go | 128 ++++++++++++++++++ pkg/clientcapability/utf_label_names.go | 75 ++++++++++ pkg/distributor/distributor.go | 53 +++----- pkg/distributor/model/push.go | 5 - pkg/featureflags/client_capability.go | 52 ------- pkg/featureflags/feature_flags.go | 5 - .../queryfrontend/query_label_names.go | 14 +- .../queryfrontend/query_series_labels.go | 28 +++- pkg/pyroscope/feature_flags.go | 1 - pkg/pyroscope/modules.go | 7 +- pkg/validation/validate.go | 101 +------------- public/app/components/TagsBar.tsx | 2 +- public/app/services/base.ts | 4 + public/app/util/query.ts | 6 +- 14 files changed, 281 insertions(+), 200 deletions(-) create mode 100644 pkg/clientcapability/client_capability.go create mode 100644 pkg/clientcapability/utf_label_names.go delete mode 100644 pkg/featureflags/client_capability.go diff --git a/pkg/clientcapability/client_capability.go b/pkg/clientcapability/client_capability.go new file mode 100644 index 0000000000..19ee69ef8d --- /dev/null +++ b/pkg/clientcapability/client_capability.go @@ -0,0 +1,128 @@ +package clientcapability + +import ( + "context" + "mime" + "net/http" + "strings" + + "connectrpc.com/connect" + "github.com/grafana/dskit/middleware" + "google.golang.org/grpc" + "google.golang.org/grpc/metadata" +) + +const ( + // Capability names + AllowUtf8LabelNamesCapabilityName CapabilityName = "allow-utf8-labelnames" +) + +var capabilities = map[CapabilityName]bool{ + AllowUtf8LabelNamesCapabilityName: true, +} + +type CapabilityName string +type CapabilityValue string + +// Define a custom context key type to avoid collisions +type contextKey int + +const ( + clientCapabilitiesKey contextKey = iota + acceptHeader = "Accept" +) + +type ClientCapabilities map[CapabilityName]CapabilityValue + +func WithClientCapabilities(ctx context.Context, clientCapabilities ClientCapabilities) context.Context { + return context.WithValue(ctx, clientCapabilitiesKey, clientCapabilities) +} + +func GetClientCapabilities(ctx context.Context) (ClientCapabilities, bool) { + value, ok := ctx.Value(clientCapabilitiesKey).(ClientCapabilities) + return value, ok +} + +func ClientCapabilitiesGRPCMiddleware() grpc.UnaryServerInterceptor { + return func( + ctx context.Context, + req interface{}, + info *grpc.UnaryServerInfo, + handler grpc.UnaryHandler, + ) (interface{}, error) { + // Extract metadata from context + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + return handler(ctx, req) + } + + // Convert metadata to http.Header for reuse of existing parsing logic + httpHeader := make(http.Header) + for key, values := range md { + // gRPC metadata keys are lowercase, HTTP headers are case-insensitive + httpHeader[http.CanonicalHeaderKey(key)] = values + } + + // Reuse existing HTTP header parsing + clientCapabilities, err := parseClientCapabilities(httpHeader) + if err != nil { + return nil, connect.NewError(connect.CodeInvalidArgument, err) + } + + if len(clientCapabilities) == 0 { + return handler(ctx, req) + } + + enhancedCtx := WithClientCapabilities(ctx, clientCapabilities) + return handler(enhancedCtx, req) + } +} + +// ClientCapabilitiesHttpMiddleware creates middleware that extracts and parses the +// `Accept` header for capabilities the client supports +func ClientCapabilitiesHttpMiddleware() middleware.Interface { + return middleware.Func(func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + clientCapabilities, err := parseClientCapabilities(r.Header) + if err != nil { + http.Error(w, "Invalid header format: "+err.Error(), http.StatusBadRequest) + return + } else if len(clientCapabilities) == 0 { + // If no capabilities parsed, continue without setting context + next.ServeHTTP(w, r) + return + } + + ctx := WithClientCapabilities(r.Context(), clientCapabilities) + next.ServeHTTP(w, r.WithContext(ctx)) + }) + }) +} + +type ClientCapability struct { + Name CapabilityName + Value string +} + +func parseClientCapabilities(header http.Header) (ClientCapabilities, error) { + acceptHeaderValue := header.Get(acceptHeader) + if acceptHeaderValue != "" { + accepts := strings.Split(acceptHeaderValue, ",") + + var clientCapabilities = make(ClientCapabilities) + for _, accept := range accepts { + if _, params, err := mime.ParseMediaType(accept); err != nil { + return nil, err + } else { + for k, v := range params { + if _, ok := capabilities[CapabilityName(k)]; ok { + clientCapabilities[CapabilityName(k)] = CapabilityValue(v) + } + } + } + } + return clientCapabilities, nil + } + + return ClientCapabilities{}, nil +} diff --git a/pkg/clientcapability/utf_label_names.go b/pkg/clientcapability/utf_label_names.go new file mode 100644 index 0000000000..c2c2beb1af --- /dev/null +++ b/pkg/clientcapability/utf_label_names.go @@ -0,0 +1,75 @@ +package clientcapability + +import ( + "context" + "fmt" + "slices" + "strings" + + "github.com/grafana/pyroscope/pkg/validation" +) + +func Utf8LabelNamesEnabled(ctx context.Context) bool { + if capabilities, ok := GetClientCapabilities(ctx); ok { + if val, ok := capabilities[AllowUtf8LabelNamesCapabilityName]; ok && val == "true" { + return true + } + } + return false +} + +// SanitizeLabelNames uses legacy logic to transform non utf-8 compliant label names +// (if possible). It will error on duplicate label names post-sanitization. +func SanitizeLabelNames(labelNames []string) ([]string, error) { + slices.Sort(labelNames) + var ( + lastLabelName = "" + ) + for idx, labelName := range labelNames { + if origName, newName, ok := sanitizeLabelName(labelName); ok && origName != newName { + labelNames[idx] = newName + lastLabelName = origName + continue + } else if !ok { + return nil, validation.NewErrorf(validation.InvalidLabels, validation.InvalidLabelsErrorMsg, labelName, + fmt.Sprintf("invalid label name '%s'. consider setting '%s=true' in the `Accept` header", origName, AllowUtf8LabelNamesCapabilityName)) + } + + if cmp := strings.Compare(lastLabelName, labelName); cmp == 0 { + return nil, validation.NewErrorf(validation.DuplicateLabelNames, validation.DuplicateLabelNamesErrorMsg, lastLabelName, labelName) + } + lastLabelName = labelName + } + + return labelNames, nil +} + +// sanitizeLabelName reports whether the label name is valid, +// and returns the sanitized value. +// +// The only change the function makes is replacing dots with underscores. +func sanitizeLabelName(ln string) (old, sanitized string, ok bool) { + if len(ln) == 0 { + return ln, ln, false + } + hasDots := false + for i, b := range ln { + if (b < 'a' || b > 'z') && (b < 'A' || b > 'Z') && b != '_' && (b < '0' || b > '9' || i == 0) { + if b == '.' { + hasDots = true + } else { + return ln, ln, false + } + } + } + if !hasDots { + return ln, ln, true + } + r := []rune(ln) + for i, b := range r { + if b == '.' { + r[i] = '_' + } + } + return ln, string(r), true +} diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 0ef3235b3a..35a7e9788f 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -46,7 +46,6 @@ import ( distributormodel "github.com/grafana/pyroscope/pkg/distributor/model" "github.com/grafana/pyroscope/pkg/distributor/sampling" "github.com/grafana/pyroscope/pkg/distributor/writepath" - "github.com/grafana/pyroscope/pkg/featureflags" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/model/pprofsplit" "github.com/grafana/pyroscope/pkg/model/relabel" @@ -249,14 +248,7 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1. Series: make([]*distributormodel.ProfileSeries, 0, len(grpcReq.Msg.Series)), RawProfileType: distributormodel.RawProfileTypePPROF, } - allErrors := multierror.New() - - clientCapabilities, parseErr := featureflags.ParseClientCapabilities(grpcReq.Header()) - if parseErr != nil { - allErrors.Add(parseErr) - } - for _, grpcSeries := range grpcReq.Msg.Series { for _, grpcSample := range grpcSeries.Samples { profile, err := pprof.RawFromBytes(grpcSample.RawProfile) @@ -265,11 +257,10 @@ func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1. continue } series := &distributormodel.ProfileSeries{ - Labels: grpcSeries.Labels, - Profile: profile, - RawProfile: grpcSample.RawProfile, - ID: grpcSample.ID, - ClientCapabilities: clientCapabilities, + Labels: grpcSeries.Labels, + Profile: profile, + RawProfile: grpcSample.RawProfile, + ID: grpcSample.ID, } req.Series = append(req.Series, series) } @@ -639,11 +630,10 @@ func (d *Distributor) aggregate(ctx context.Context, req *distributormodel.Profi return handleErr } aggregated := &distributormodel.ProfileSeries{ - TenantID: req.TenantID, - Labels: labels, - Profile: pprof.RawFromProto(p.Profile()), - Annotations: annotations, - ClientCapabilities: req.ClientCapabilities, + TenantID: req.TenantID, + Labels: labels, + Profile: pprof.RawFromProto(p.Profile()), + Annotations: annotations, } return d.router.Send(localCtx, aggregated) })() @@ -1142,10 +1132,9 @@ func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit var result []*distributormodel.ProfileSeries usageGroups := d.usageGroupEvaluator.GetMatch(s.TenantID, usageConfig, s.Labels) visitor := &sampleSeriesVisitor{ - tenantID: s.TenantID, - limits: d.limits, - profile: s.Profile, - clientCapabilities: s.ClientCapabilities, + tenantID: s.TenantID, + limits: d.limits, + profile: s.Profile, } if err := visit(s.Profile.Profile, s.Labels, relabelingRules, visitor); err != nil { validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalProfiles)) @@ -1175,28 +1164,24 @@ func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit } type sampleSeriesVisitor struct { - tenantID string - limits Limits - profile *pprof.Profile - exp *pprof.SampleExporter - series []*distributormodel.ProfileSeries - clientCapabilities []*featureflags.ClientCapability + tenantID string + limits Limits + profile *pprof.Profile + exp *pprof.SampleExporter + series []*distributormodel.ProfileSeries discardedBytes int discardedProfiles int } func (v *sampleSeriesVisitor) ValidateLabels(labels phlaremodel.Labels) error { - capability := featureflags.GetClientCapability(v.clientCapabilities, featureflags.AllowUtf8LabelNamesCapabilityName) - utf8LabelNamesEnabled := capability != nil && capability.Value == "true" - return validation.ValidateLabels(v.limits, v.tenantID, utf8LabelNamesEnabled, labels) + return validation.ValidateLabels(v.limits, v.tenantID, labels) } func (v *sampleSeriesVisitor) VisitProfile(labels phlaremodel.Labels) { v.series = append(v.series, &distributormodel.ProfileSeries{ - Profile: v.profile, - Labels: labels, - ClientCapabilities: v.clientCapabilities, + Profile: v.profile, + Labels: labels, }) } diff --git a/pkg/distributor/model/push.go b/pkg/distributor/model/push.go index 534a85aa64..6b2e74b2f9 100644 --- a/pkg/distributor/model/push.go +++ b/pkg/distributor/model/push.go @@ -5,7 +5,6 @@ import ( "github.com/grafana/pyroscope/pkg/distributor/annotation" "github.com/grafana/pyroscope/pkg/distributor/ingestlimits" "github.com/grafana/pyroscope/pkg/distributor/sampling" - "github.com/grafana/pyroscope/pkg/featureflags" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/pprof" ) @@ -31,9 +30,6 @@ type ProfileSeries struct { RawProfile []byte // may be nil if the Profile is composed not from pprof ( e.g. jfr) ID string - // List of features the client supports - ClientCapabilities []*featureflags.ClientCapability - // todo split // Transient state TenantID string @@ -93,7 +89,6 @@ func (req *ProfileSeries) Clone() *ProfileSeries { ID: req.ID, Language: req.Language, Annotations: req.Annotations, - ClientCapabilities: req.ClientCapabilities, } return c } diff --git a/pkg/featureflags/client_capability.go b/pkg/featureflags/client_capability.go deleted file mode 100644 index 0d37d30225..0000000000 --- a/pkg/featureflags/client_capability.go +++ /dev/null @@ -1,52 +0,0 @@ -package featureflags - -import ( - "fmt" - "mime" - "net/http" -) - -const ( - // Capability names - AllowUtf8LabelNamesCapabilityName = "allow-utf8-labelnames" -) - -type ClientCapability struct { - Name string - Value string -} - -func ParseClientCapabilities(header http.Header) ([]*ClientCapability, error) { - acceptHeader := header.Get("Accept") - if acceptHeader != "" { - if _, params, err := mime.ParseMediaType(acceptHeader); err != nil { - return nil, err - } else { - capabilities := make([]*ClientCapability, 0, len(params)) - seenCapabilityNames := make(map[string]struct{}) - for k, v := range params { - // Check for duplicates - if _, ok := seenCapabilityNames[k]; ok { - return nil, fmt.Errorf("duplicate client capabilities parsed from `Accept:` header: '%s'", - params) - } - seenCapabilityNames[k] = struct{}{} - - capabilities = append(capabilities, &ClientCapability{Name: k, Value: v}) - } - - return capabilities, nil - } - } - - return []*ClientCapability{}, nil -} - -func GetClientCapability(capabilities []*ClientCapability, capabilityName string) *ClientCapability { - for _, capability := range capabilities { - if capability.Name == capabilityName { - return capability - } - } - return nil -} diff --git a/pkg/featureflags/feature_flags.go b/pkg/featureflags/feature_flags.go index d73ab26018..ca8368614f 100644 --- a/pkg/featureflags/feature_flags.go +++ b/pkg/featureflags/feature_flags.go @@ -15,7 +15,6 @@ const ( V2StorageLayer = "v2StorageLayer" PyroscopeRuler = "pyroscopeRuler" PyroscopeRulerFunctions = "pyroscopeRulerFunctions" - UTF8LabelNames = "utf8LabelNames" ) func stringPtr(s string) *string { @@ -37,10 +36,6 @@ var ( Enabled: false, Description: stringPtr("Enables function support for the Pyroscope ruler, which allows to export resource usage on a per function level."), }, - UTF8LabelNames: { - Enabled: false, - Description: stringPtr("Supports UTF-8 label names for Pyroscope read/write APIs."), - }, } ) diff --git a/pkg/frontend/readpath/queryfrontend/query_label_names.go b/pkg/frontend/readpath/queryfrontend/query_label_names.go index c430ebb310..1363795776 100644 --- a/pkg/frontend/readpath/queryfrontend/query_label_names.go +++ b/pkg/frontend/readpath/queryfrontend/query_label_names.go @@ -5,6 +5,7 @@ import ( "connectrpc.com/connect" "github.com/grafana/dskit/tenant" + "github.com/grafana/pyroscope/pkg/clientcapability" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" @@ -46,5 +47,16 @@ func (q *QueryFrontend) LabelNames( if report == nil { return connect.NewResponse(&typesv1.LabelNamesResponse{}), nil } - return connect.NewResponse(&typesv1.LabelNamesResponse{Names: report.LabelNames.LabelNames}), nil + + labelNames := report.LabelNames.LabelNames + if enabled := clientcapability.Utf8LabelNamesEnabled(ctx); !enabled { + // Use legacy label name sanitization if utf8 label names not enabled + if labelNames, err := clientcapability.SanitizeLabelNames(labelNames); err != nil { + return nil, connect.NewError(connect.CodeInvalidArgument, err) + } else { + return connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil + } + } + + return connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil } diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index 41589017da..443cd5a53e 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -6,6 +6,7 @@ import ( "connectrpc.com/connect" "github.com/go-kit/log/level" "github.com/grafana/dskit/tenant" + "github.com/grafana/pyroscope/pkg/clientcapability" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" @@ -54,5 +55,30 @@ func (q *QueryFrontend) Series( if report == nil { return connect.NewResponse(&querierv1.SeriesResponse{}), nil } - return connect.NewResponse(&querierv1.SeriesResponse{LabelsSet: report.SeriesLabels.SeriesLabels}), nil + + seriesLabels := report.SeriesLabels.SeriesLabels + if enabled := clientcapability.Utf8LabelNamesEnabled(ctx); !enabled { + // Use legacy label name sanitization if utf8 label names not enabled + for _, seriesLabel := range seriesLabels { + labelNames := make([]string, len(seriesLabel.Labels)) + for i, label := range seriesLabel.Labels { + labelNames[i] = label.Name + } + + // Sanitize the label names + sanitizedNames, err := clientcapability.SanitizeLabelNames(labelNames) + if err != nil { + return nil, connect.NewError(connect.CodeInvalidArgument, err) + } + + // Update the original labels with sanitized names + for i, label := range seriesLabel.Labels { + if i < len(sanitizedNames) { + label.Name = sanitizedNames[i] + } + } + } + } + + return connect.NewResponse(&querierv1.SeriesResponse{LabelsSet: seriesLabels}), nil } diff --git a/pkg/pyroscope/feature_flags.go b/pkg/pyroscope/feature_flags.go index 7e8872d1b4..41b11c625d 100644 --- a/pkg/pyroscope/feature_flags.go +++ b/pkg/pyroscope/feature_flags.go @@ -12,7 +12,6 @@ func (c *Config) getFeatureFlags() map[string]bool { featureflags.V2StorageLayer: c.V2, featureflags.PyroscopeRuler: rulerEnabled, featureflags.PyroscopeRulerFunctions: rulerEnabled, - featureflags.UTF8LabelNames: false, // not supported yet } } diff --git a/pkg/pyroscope/modules.go b/pkg/pyroscope/modules.go index b2bf670924..4725cf0656 100644 --- a/pkg/pyroscope/modules.go +++ b/pkg/pyroscope/modules.go @@ -11,6 +11,7 @@ import ( "time" "connectrpc.com/connect" + "github.com/grafana/pyroscope/pkg/clientcapability" "google.golang.org/genproto/googleapis/api/httpbody" "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/protobuf/encoding/protojson" @@ -464,7 +465,10 @@ func (f *Pyroscope) initServer() (services.Service, error) { // see https://github.com/grafana/pyroscope/issues/231 f.Cfg.Server.DoNotAddDefaultHTTPMiddleware = true f.Cfg.Server.ExcludeRequestInLog = true // gRPC-specific. - f.Cfg.Server.GRPCMiddleware = append(f.Cfg.Server.GRPCMiddleware, util.RecoveryInterceptorGRPC) + f.Cfg.Server.GRPCMiddleware = append(f.Cfg.Server.GRPCMiddleware, + util.RecoveryInterceptorGRPC, + clientcapability.ClientCapabilitiesGRPCMiddleware(), + ) if f.Cfg.V2 { f.Cfg.Server.MetricsNativeHistogramFactor = 1.1 // 10% increase from bucket to bucket @@ -523,6 +527,7 @@ func (f *Pyroscope) initServer() (services.Service, error) { httpMetric, objstoreTracerMiddleware, httputil.K6Middleware(), + clientcapability.ClientCapabilitiesHttpMiddleware(), } if f.Cfg.SelfProfiling.UseK6Middleware { defaultHTTPMiddleware = append(defaultHTTPMiddleware, httputil.K6Middleware()) diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index 32762ab334..6802fd6e47 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -3,7 +3,6 @@ package validation import ( "encoding/hex" "fmt" - "slices" "sort" "strings" "time" @@ -115,7 +114,7 @@ type LabelValidationLimits interface { } // ValidateLabels validates the labels of a profile. -func ValidateLabels(limits LabelValidationLimits, tenantID string, utf8LabelNamesEnabled bool, ls []*typesv1.LabelPair) error { +func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1.LabelPair) error { if len(ls) == 0 { return NewErrorf(MissingLabels, MissingLabelsErrorMsg) } @@ -146,24 +145,10 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, utf8LabelName if len(l.Value) > limits.MaxLabelValueLength(tenantID) { return NewErrorf(LabelValueTooLong, LabelValueTooLongErrorMsg, phlaremodel.LabelPairsString(ls), l.Value) } - // Note this conditional falls back on legacy logic if not valid utf-8 label name - if ok := ValidateUtf8LabelName(l.Name); utf8LabelNamesEnabled && ok { - idx += 1 - continue - } else if origName, newName, ok := SanitizeLabelName(l.Name); ok && origName != newName { - // Legacy logic if client does not specify utf8LabelNamesEnabled - var err error - ls, idx, err = handleSanitizedLabel(ls, idx, origName, newName) - if err != nil { - return err - } - lastLabelName = "" - if idx > 0 && idx <= len(ls) { - lastLabelName = ls[idx-1].Name - } - continue - } else if !ok { - return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label name '"+origName+"'") + // We are permissive to all utf-8 label names at write; but backward compatible + // for clients that do not support utf-8 label names at read. + if !model.LabelName(l.Name).IsValid() { + return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label name '"+l.Name+"'") } if !model.LabelValue(l.Value).IsValid() { return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label value '"+l.Value+"'") @@ -178,82 +163,6 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, utf8LabelName return nil } -// handleSanitizedLabel handles the case where a label name is sanitized. It ensures that the label name is unique and fails if the value is distinct. -func handleSanitizedLabel(ls []*typesv1.LabelPair, origIdx int, origName, newName string) ([]*typesv1.LabelPair, int, error) { - newLabel := &typesv1.LabelPair{Name: newName, Value: ls[origIdx].Value} - - // Create new slice without the original element - newSlice := make([]*typesv1.LabelPair, 0, len(ls)) - newSlice = append(newSlice, ls[:origIdx]...) - newSlice = append(newSlice, ls[origIdx+1:]...) - - insertIdx, found := slices.BinarySearchFunc(newSlice, newLabel, - func(a, b *typesv1.LabelPair) int { - return strings.Compare(a.Name, b.Name) - }) - - if found { - if newSlice[insertIdx].Value == newLabel.Value { - // Same name and value we are done and can just return newSlice - return newSlice, origIdx, nil - } else { - // Same name, different value - error - return nil, 0, NewErrorf(DuplicateLabelNames, - DuplicateLabelNamesAfterSanitizationErrorMsg, - phlaremodel.LabelPairsString(ls), newName, origName) - } - } - - // Insert the new label at correct position - newSlice = slices.Insert(newSlice, insertIdx, newLabel) - - finalIdx := insertIdx - if insertIdx >= origIdx { - finalIdx = origIdx - } - - copy(ls, newSlice) - return ls[:len(newSlice)], finalIdx, nil -} - -// ValidateUtf8LabelName validates the name is not empty and is a valid utf-8 string -func ValidateUtf8LabelName(name string) bool { - if len(name) == 0 { - return false - } - return utf8.ValidString(name) -} - -// SanitizeLabelName reports whether the label name is valid, -// and returns the sanitized value. -// -// The only change the function makes is replacing dots with underscores. -func SanitizeLabelName(ln string) (old, sanitized string, ok bool) { - if len(ln) == 0 { - return ln, ln, false - } - hasDots := false - for i, b := range ln { - if (b < 'a' || b > 'z') && (b < 'A' || b > 'Z') && b != '_' && (b < '0' || b > '9' || i == 0) { - if b == '.' { - hasDots = true - } else { - return ln, ln, false - } - } - } - if !hasDots { - return ln, ln, true - } - r := []rune(ln) - for i, b := range r { - if b == '.' { - r[i] = '_' - } - } - return ln, string(r), true -} - type ProfileValidationLimits interface { MaxProfileSizeBytes(tenantID string) int MaxProfileStacktraceSamples(tenantID string) int diff --git a/public/app/components/TagsBar.tsx b/public/app/components/TagsBar.tsx index decd6b51f0..f2a4225748 100644 --- a/public/app/components/TagsBar.tsx +++ b/public/app/components/TagsBar.tsx @@ -230,7 +230,7 @@ function LabelsSubmenu({ // Identifies whether a label is in a query or not function isLabelInQuery(query: string, label: string, labelValue: string) { - return query.includes(`${label}="${labelValue}"`); + return query.includes(`"${label}=${labelValue}"`); } export default TagsBar; diff --git a/public/app/services/base.ts b/public/app/services/base.ts index 07367e2c6a..591af56b4d 100644 --- a/public/app/services/base.ts +++ b/public/app/services/base.ts @@ -32,6 +32,8 @@ export async function request( headers = { ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), + // Specify client capabilities in `Accept` header + 'Accept': 'application/json; allow-utf8-labelnames=true', }; } @@ -53,6 +55,8 @@ export async function downloadWithOrgID( headers = { ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), + // Specify client capabilities in `Accept` header + 'Accept': 'application/json; allow-utf8-labelnames=true', }; } diff --git a/public/app/util/query.ts b/public/app/util/query.ts index 2e1df80698..13cf9d1d24 100644 --- a/public/app/util/query.ts +++ b/public/app/util/query.ts @@ -5,13 +5,13 @@ export function appendLabelToQuery( ) { const case1Regexp = new RegExp(`${label}=.+?(\\}|,)`); if (query.match(case1Regexp)) { - return query.replace(case1Regexp, `${label}="${labelValue}"$1`); + return query.replace(case1Regexp, `"${label}=${labelValue}"$1`); } if (query.indexOf('{}') !== -1) { - return query.replace('}', `${label}="${labelValue}"}`); + return query.replace('}', `"${label}=${labelValue}"}`); } if (query.indexOf('}') !== -1) { - return query.replace('}', `, ${label}="${labelValue}"}`); + return query.replace('}', `, "${label}=${labelValue}"}`); } console.warn('TODO: handle this case'); From f651740d6e5b1c6ae88d1b7ccfe578a65185b1a0 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:14:33 -0400 Subject: [PATCH 03/15] Prettier format --- public/app/services/base.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/app/services/base.ts b/public/app/services/base.ts index 591af56b4d..9f55695aef 100644 --- a/public/app/services/base.ts +++ b/public/app/services/base.ts @@ -33,7 +33,7 @@ export async function request( ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), // Specify client capabilities in `Accept` header - 'Accept': 'application/json; allow-utf8-labelnames=true', + Accept: 'application/json; allow-utf8-labelnames=true', }; } @@ -56,7 +56,7 @@ export async function downloadWithOrgID( ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), // Specify client capabilities in `Accept` header - 'Accept': 'application/json; allow-utf8-labelnames=true', + Accept: 'application/json; allow-utf8-labelnames=true', }; } From 6e8c8957e5ff50526cb54f0ad84ad2a3685ebb5b Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:14:40 -0400 Subject: [PATCH 04/15] Handle multiple values set for `Accept` header --- pkg/clientcapability/client_capability.go | 31 ++++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pkg/clientcapability/client_capability.go b/pkg/clientcapability/client_capability.go index 19ee69ef8d..f8a7c9db83 100644 --- a/pkg/clientcapability/client_capability.go +++ b/pkg/clientcapability/client_capability.go @@ -105,24 +105,25 @@ type ClientCapability struct { } func parseClientCapabilities(header http.Header) (ClientCapabilities, error) { - acceptHeaderValue := header.Get(acceptHeader) - if acceptHeaderValue != "" { - accepts := strings.Split(acceptHeaderValue, ",") - - var clientCapabilities = make(ClientCapabilities) - for _, accept := range accepts { - if _, params, err := mime.ParseMediaType(accept); err != nil { - return nil, err - } else { - for k, v := range params { - if _, ok := capabilities[CapabilityName(k)]; ok { - clientCapabilities[CapabilityName(k)] = CapabilityValue(v) + acceptHeaderValues := header.Values(acceptHeader) + + var clientCapabilities = make(ClientCapabilities) + for _, acceptHeaderValue := range acceptHeaderValues { + if acceptHeaderValue != "" { + accepts := strings.Split(acceptHeaderValue, ",") + + for _, accept := range accepts { + if _, params, err := mime.ParseMediaType(accept); err != nil { + return nil, err + } else { + for k, v := range params { + if _, ok := capabilities[CapabilityName(k)]; ok { + clientCapabilities[CapabilityName(k)] = CapabilityValue(v) + } } } } } - return clientCapabilities, nil } - - return ClientCapabilities{}, nil + return clientCapabilities, nil } From a2b48a152434870ac7f8f5742fd1487107f566b2 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Wed, 24 Sep 2025 14:42:15 -0400 Subject: [PATCH 05/15] Address comments - General cleanup - Removed clientcapabilities package, moved back in to frontend package - Using struct for client capabilities in the context - Added many tests --- cmd/profilecli/client.go | 33 +- cmd/profilecli/client_test.go | 79 ++++ pkg/clientcapability/utf_label_names.go | 75 --- .../client_capability.go | 66 ++- pkg/featureflags/client_capability_test.go | 239 ++++++++++ .../queryfrontend/query_label_names.go | 6 +- .../queryfrontend/query_series_labels.go | 6 +- pkg/pyroscope/modules.go | 7 +- pkg/validation/validate.go | 57 +++ pkg/validation/validate_test.go | 441 ++++++++++++++++-- 10 files changed, 831 insertions(+), 178 deletions(-) create mode 100644 cmd/profilecli/client_test.go delete mode 100644 pkg/clientcapability/utf_label_names.go rename pkg/{clientcapability => featureflags}/client_capability.go (67%) create mode 100644 pkg/featureflags/client_capability_test.go diff --git a/cmd/profilecli/client.go b/cmd/profilecli/client.go index 24e811ffee..cbc543499d 100644 --- a/cmd/profilecli/client.go +++ b/cmd/profilecli/client.go @@ -16,22 +16,38 @@ const ( protocolTypeConnect = "connect" protocolTypeGRPC = "grpc" protocolTypeGRPCWeb = "grpc-web" + + acceptHeaderMimeType = "*/*" ) -var acceptHeaderFeatureFlags = []string{ +var acceptHeaderClientCapabilities = []string{ "allow-utf8-labelnames=true", } var userAgentHeader = fmt.Sprintf("pyroscope/%s", version.Version) -var acceptHeaderMimeType = "*/*" -func buildAcceptHeader(featureFlags []string) string { - acceptHeader := acceptHeaderMimeType - if len(acceptHeaderFeatureFlags) > 0 { - acceptHeader += "; " + strings.Join(featureFlags, "; ") +func addClientCapabilitiesHeader(r *http.Request, mime string, clientCapabilities []string) { + missingClientCapabilities := make([]string, 0, len(clientCapabilities)) + for _, capability := range clientCapabilities { + found := false + // Check if any header value already contains this capability + for _, value := range r.Header.Values("Accept") { + if strings.Contains(value, capability) { + found = true + break + } + } + + if !found { + missingClientCapabilities = append(missingClientCapabilities, capability) + } } - return acceptHeader + if len(missingClientCapabilities) > 0 { + acceptHeader := mime + acceptHeader += "; " + strings.Join(missingClientCapabilities, "; ") + r.Header.Add("Accept", acceptHeader) + } } type phlareClient struct { @@ -61,8 +77,7 @@ func (a *authRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) } } - acceptHeader := buildAcceptHeader(acceptHeaderFeatureFlags) - req.Header.Set("Accept", acceptHeader) + addClientCapabilitiesHeader(req, acceptHeaderMimeType, acceptHeaderClientCapabilities) req.Header.Set("User-Agent", userAgentHeader) return a.next.RoundTrip(req) diff --git a/cmd/profilecli/client_test.go b/cmd/profilecli/client_test.go new file mode 100644 index 0000000000..e4f3f99363 --- /dev/null +++ b/cmd/profilecli/client_test.go @@ -0,0 +1,79 @@ +package main + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_AcceptHeader(t *testing.T) { + tests := []struct { + Name string + Header http.Header + ClientCapabilities []string + Want []string + }{ + { + Name: "empty header adds capability", + Header: http.Header{}, + ClientCapabilities: []string{ + "allow-utf8-labelnames=true", + }, + Want: []string{"*/*; allow-utf8-labelnames=true"}, + }, + { + Name: "existing header appends capability", + Header: http.Header{ + "Accept": []string{"application/json"}, + }, + ClientCapabilities: []string{ + "allow-utf8-labelnames=true", + }, + Want: []string{"application/json", "*/*; allow-utf8-labelnames=true"}, + }, + { + Name: "multiple existing values appends capability", + Header: http.Header{ + "Accept": []string{"application/json", "text/plain"}, + }, + ClientCapabilities: []string{ + "allow-utf8-labelnames=true", + }, + Want: []string{"application/json", "text/plain", "*/*; allow-utf8-labelnames=true"}, + }, + { + Name: "existing capability is not duplicated", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=true"}, + }, + ClientCapabilities: []string{ + "allow-utf8-labelnames=true", + }, + Want: []string{"*/*; allow-utf8-labelnames=true"}, + }, + { + Name: "multiple client capabilities appends capability", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=true"}, + }, + ClientCapabilities: []string{ + "allow-utf8-labelnames=true", + "capability2=false", + }, + Want: []string{"*/*; allow-utf8-labelnames=true", "*/*; capability2=false"}, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + req, _ := http.NewRequest("GET", "example.com", nil) + req.Header = tt.Header + clientCapabilities := tt.ClientCapabilities + + addClientCapabilitiesHeader(req, acceptHeaderMimeType, clientCapabilities) + require.Equal(t, tt.Want, req.Header.Values("Accept")) + }) + } +} diff --git a/pkg/clientcapability/utf_label_names.go b/pkg/clientcapability/utf_label_names.go deleted file mode 100644 index c2c2beb1af..0000000000 --- a/pkg/clientcapability/utf_label_names.go +++ /dev/null @@ -1,75 +0,0 @@ -package clientcapability - -import ( - "context" - "fmt" - "slices" - "strings" - - "github.com/grafana/pyroscope/pkg/validation" -) - -func Utf8LabelNamesEnabled(ctx context.Context) bool { - if capabilities, ok := GetClientCapabilities(ctx); ok { - if val, ok := capabilities[AllowUtf8LabelNamesCapabilityName]; ok && val == "true" { - return true - } - } - return false -} - -// SanitizeLabelNames uses legacy logic to transform non utf-8 compliant label names -// (if possible). It will error on duplicate label names post-sanitization. -func SanitizeLabelNames(labelNames []string) ([]string, error) { - slices.Sort(labelNames) - var ( - lastLabelName = "" - ) - for idx, labelName := range labelNames { - if origName, newName, ok := sanitizeLabelName(labelName); ok && origName != newName { - labelNames[idx] = newName - lastLabelName = origName - continue - } else if !ok { - return nil, validation.NewErrorf(validation.InvalidLabels, validation.InvalidLabelsErrorMsg, labelName, - fmt.Sprintf("invalid label name '%s'. consider setting '%s=true' in the `Accept` header", origName, AllowUtf8LabelNamesCapabilityName)) - } - - if cmp := strings.Compare(lastLabelName, labelName); cmp == 0 { - return nil, validation.NewErrorf(validation.DuplicateLabelNames, validation.DuplicateLabelNamesErrorMsg, lastLabelName, labelName) - } - lastLabelName = labelName - } - - return labelNames, nil -} - -// sanitizeLabelName reports whether the label name is valid, -// and returns the sanitized value. -// -// The only change the function makes is replacing dots with underscores. -func sanitizeLabelName(ln string) (old, sanitized string, ok bool) { - if len(ln) == 0 { - return ln, ln, false - } - hasDots := false - for i, b := range ln { - if (b < 'a' || b > 'z') && (b < 'A' || b > 'Z') && b != '_' && (b < '0' || b > '9' || i == 0) { - if b == '.' { - hasDots = true - } else { - return ln, ln, false - } - } - } - if !hasDots { - return ln, ln, true - } - r := []rune(ln) - for i, b := range r { - if b == '.' { - r[i] = '_' - } - } - return ln, string(r), true -} diff --git a/pkg/clientcapability/client_capability.go b/pkg/featureflags/client_capability.go similarity index 67% rename from pkg/clientcapability/client_capability.go rename to pkg/featureflags/client_capability.go index f8a7c9db83..82226c0d3a 100644 --- a/pkg/clientcapability/client_capability.go +++ b/pkg/featureflags/client_capability.go @@ -1,4 +1,4 @@ -package clientcapability +package featureflags import ( "context" @@ -7,39 +7,31 @@ import ( "strings" "connectrpc.com/connect" + "github.com/go-kit/log/level" "github.com/grafana/dskit/middleware" + "github.com/grafana/pyroscope/pkg/util" "google.golang.org/grpc" "google.golang.org/grpc/metadata" ) const ( - // Capability names - AllowUtf8LabelNamesCapabilityName CapabilityName = "allow-utf8-labelnames" + // Capability names - update parseClientCapabilities below when new capabilities added + allowUtf8LabelNamesCapabilityName string = "allow-utf8-labelnames" ) -var capabilities = map[CapabilityName]bool{ - AllowUtf8LabelNamesCapabilityName: true, -} - -type CapabilityName string -type CapabilityValue string - // Define a custom context key type to avoid collisions -type contextKey int +type contextKey struct{} -const ( - clientCapabilitiesKey contextKey = iota - acceptHeader = "Accept" -) - -type ClientCapabilities map[CapabilityName]CapabilityValue +type ClientCapabilities struct { + AllowUtf8LabelNames bool +} func WithClientCapabilities(ctx context.Context, clientCapabilities ClientCapabilities) context.Context { - return context.WithValue(ctx, clientCapabilitiesKey, clientCapabilities) + return context.WithValue(ctx, contextKey{}, clientCapabilities) } func GetClientCapabilities(ctx context.Context) (ClientCapabilities, bool) { - value, ok := ctx.Value(clientCapabilitiesKey).(ClientCapabilities) + value, ok := ctx.Value(contextKey{}).(ClientCapabilities) return value, ok } @@ -64,15 +56,13 @@ func ClientCapabilitiesGRPCMiddleware() grpc.UnaryServerInterceptor { } // Reuse existing HTTP header parsing + // TODO add metrics = # requests like this and # clients [need + // labels for requests and clients/tenet and user agent(?)] clientCapabilities, err := parseClientCapabilities(httpHeader) if err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) } - if len(clientCapabilities) == 0 { - return handler(ctx, req) - } - enhancedCtx := WithClientCapabilities(ctx, clientCapabilities) return handler(enhancedCtx, req) } @@ -87,10 +77,6 @@ func ClientCapabilitiesHttpMiddleware() middleware.Interface { if err != nil { http.Error(w, "Invalid header format: "+err.Error(), http.StatusBadRequest) return - } else if len(clientCapabilities) == 0 { - // If no capabilities parsed, continue without setting context - next.ServeHTTP(w, r) - return } ctx := WithClientCapabilities(r.Context(), clientCapabilities) @@ -99,31 +85,35 @@ func ClientCapabilitiesHttpMiddleware() middleware.Interface { }) } -type ClientCapability struct { - Name CapabilityName - Value string -} - func parseClientCapabilities(header http.Header) (ClientCapabilities, error) { - acceptHeaderValues := header.Values(acceptHeader) + acceptHeaderValues := header.Values("Accept") + + var capabilities ClientCapabilities - var clientCapabilities = make(ClientCapabilities) for _, acceptHeaderValue := range acceptHeaderValues { if acceptHeaderValue != "" { accepts := strings.Split(acceptHeaderValue, ",") for _, accept := range accepts { if _, params, err := mime.ParseMediaType(accept); err != nil { - return nil, err + return capabilities, err } else { for k, v := range params { - if _, ok := capabilities[CapabilityName(k)]; ok { - clientCapabilities[CapabilityName(k)] = CapabilityValue(v) + switch k { + case allowUtf8LabelNamesCapabilityName: + if v == "true" { + capabilities.AllowUtf8LabelNames = true + } + default: + level.Debug(util.Logger).Log( + "msg", "unknown capability parsed from Accept header", + "acceptHeaderKey", k, + "acceptHeaderValue", v) } } } } } } - return clientCapabilities, nil + return capabilities, nil } diff --git a/pkg/featureflags/client_capability_test.go b/pkg/featureflags/client_capability_test.go new file mode 100644 index 0000000000..85d284deca --- /dev/null +++ b/pkg/featureflags/client_capability_test.go @@ -0,0 +1,239 @@ +package featureflags + +import ( + "net/http" + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_parseClientCapabilities(t *testing.T) { + tests := []struct { + Name string + Header http.Header + Want ClientCapabilities + WantError bool + ErrorMessage string + }{ + { + Name: "empty header returns default capabilities", + Header: http.Header{}, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "no Accept header returns default capabilities", + Header: http.Header{ + "Content-Type": []string{"application/json"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "empty Accept header value returns default capabilities", + Header: http.Header{ + "Accept": []string{""}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "simple Accept header without capabilities", + Header: http.Header{ + "Accept": []string{"application/json"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "Accept header with utf8 label names capability true", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=true"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "Accept header with utf8 label names capability false", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=false"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "Accept header with utf8 label names capability invalid value", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=invalid"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "Accept header with unknown capability", + Header: http.Header{ + "Accept": []string{"*/*; unknown-capability=true"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + { + Name: "Accept header with multiple capabilities", + Header: http.Header{ + "Accept": []string{"*/*; allow-utf8-labelnames=true; unknown-capability=false"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "multiple Accept header values", + Header: http.Header{ + "Accept": []string{"application/json", "*/*; allow-utf8-labelnames=true"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "multiple Accept header values with different capabilities", + Header: http.Header{ + "Accept": []string{ + "application/json; allow-utf8-labelnames=false", + "*/*; allow-utf8-labelnames=true", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "Accept header with quality values", + Header: http.Header{ + "Accept": []string{"text/html; q=0.9; allow-utf8-labelnames=true"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "complex Accept header", + Header: http.Header{ + "Accept": []string{ + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8;allow-utf8-labelnames=true", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "multiple Accept header entries", + Header: http.Header{ + "Accept": []string{ + "application/json", + "text/plain; allow-utf8-labelnames=true", + "*/*; q=0.1", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "invalid mime type in Accept header", + Header: http.Header{ + "Accept": []string{"invalid/mime/type/format"}, + }, + Want: ClientCapabilities{}, + WantError: true, + ErrorMessage: "mime: unexpected content after media subtype", + }, + { + Name: "Accept header with invalid syntax", + Header: http.Header{ + "Accept": []string{"text/html; invalid-parameter-syntax"}, + }, + Want: ClientCapabilities{}, + WantError: true, + ErrorMessage: "mime: invalid media parameter", + }, + { + Name: "mixed valid and invalid Accept header values", + Header: http.Header{ + "Accept": []string{ + "application/json", + "invalid/mime/type/format", + }, + }, + Want: ClientCapabilities{}, + WantError: true, + ErrorMessage: "mime: unexpected content after media subtype", + }, + { + // Parameter names are case-insensitive in mime.ParseMediaType + Name: "case sensitivity test for capability name", + Header: http.Header{ + "Accept": []string{"*/*; Allow-Utf8-Labelnames=true"}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "whitespace handling in Accept header", + Header: http.Header{ + "Accept": []string{" application/json ; allow-utf8-labelnames=true "}, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + got, err := parseClientCapabilities(tt.Header) + + if tt.WantError { + require.Error(t, err) + if tt.ErrorMessage != "" { + require.Contains(t, err.Error(), tt.ErrorMessage) + } + return + } + + require.NoError(t, err) + require.Equal(t, tt.Want, got) + }) + } +} + +func Test_parseClientCapabilities_MultipleCapabilities(t *testing.T) { + // This test specifically checks that when the same capability appears + // multiple times with different values, the last "true" value wins + tests := []struct { + Name string + Header http.Header + Want ClientCapabilities + }{ + { + Name: "capability appears multiple times - last true wins", + Header: http.Header{ + "Accept": []string{ + "application/json; allow-utf8-labelnames=false", + "text/plain; allow-utf8-labelnames=true", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "capability appears multiple times - last false loses to earlier true", + Header: http.Header{ + "Accept": []string{ + "application/json; allow-utf8-labelnames=true", + "text/plain; allow-utf8-labelnames=false", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: true}, + }, + { + Name: "capability appears multiple times - all false", + Header: http.Header{ + "Accept": []string{ + "application/json; allow-utf8-labelnames=false", + "text/plain; allow-utf8-labelnames=false", + }, + }, + Want: ClientCapabilities{AllowUtf8LabelNames: false}, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + got, err := parseClientCapabilities(tt.Header) + require.NoError(t, err) + require.Equal(t, tt.Want, got) + }) + } +} diff --git a/pkg/frontend/readpath/queryfrontend/query_label_names.go b/pkg/frontend/readpath/queryfrontend/query_label_names.go index 1363795776..2affd6ae06 100644 --- a/pkg/frontend/readpath/queryfrontend/query_label_names.go +++ b/pkg/frontend/readpath/queryfrontend/query_label_names.go @@ -5,10 +5,10 @@ import ( "connectrpc.com/connect" "github.com/grafana/dskit/tenant" - "github.com/grafana/pyroscope/pkg/clientcapability" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" + "github.com/grafana/pyroscope/pkg/featureflags" "github.com/grafana/pyroscope/pkg/validation" ) @@ -49,9 +49,9 @@ func (q *QueryFrontend) LabelNames( } labelNames := report.LabelNames.LabelNames - if enabled := clientcapability.Utf8LabelNamesEnabled(ctx); !enabled { + if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { // Use legacy label name sanitization if utf8 label names not enabled - if labelNames, err := clientcapability.SanitizeLabelNames(labelNames); err != nil { + if labelNames, err := validation.SanitizeLabelNames(labelNames); err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) } else { return connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index 443cd5a53e..0d4031706c 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -6,10 +6,10 @@ import ( "connectrpc.com/connect" "github.com/go-kit/log/level" "github.com/grafana/dskit/tenant" - "github.com/grafana/pyroscope/pkg/clientcapability" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" + "github.com/grafana/pyroscope/pkg/featureflags" "github.com/grafana/pyroscope/pkg/validation" ) @@ -57,7 +57,7 @@ func (q *QueryFrontend) Series( } seriesLabels := report.SeriesLabels.SeriesLabels - if enabled := clientcapability.Utf8LabelNamesEnabled(ctx); !enabled { + if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { // Use legacy label name sanitization if utf8 label names not enabled for _, seriesLabel := range seriesLabels { labelNames := make([]string, len(seriesLabel.Labels)) @@ -66,7 +66,7 @@ func (q *QueryFrontend) Series( } // Sanitize the label names - sanitizedNames, err := clientcapability.SanitizeLabelNames(labelNames) + sanitizedNames, err := validation.SanitizeLabelNames(labelNames) if err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) } diff --git a/pkg/pyroscope/modules.go b/pkg/pyroscope/modules.go index 4725cf0656..0a2db5e030 100644 --- a/pkg/pyroscope/modules.go +++ b/pkg/pyroscope/modules.go @@ -11,7 +11,6 @@ import ( "time" "connectrpc.com/connect" - "github.com/grafana/pyroscope/pkg/clientcapability" "google.golang.org/genproto/googleapis/api/httpbody" "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/protobuf/encoding/protojson" @@ -43,6 +42,7 @@ import ( "github.com/grafana/pyroscope/pkg/compactor" "github.com/grafana/pyroscope/pkg/distributor" "github.com/grafana/pyroscope/pkg/embedded/grafana" + "github.com/grafana/pyroscope/pkg/featureflags" "github.com/grafana/pyroscope/pkg/ingester" objstoreclient "github.com/grafana/pyroscope/pkg/objstore/client" "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" @@ -467,7 +467,7 @@ func (f *Pyroscope) initServer() (services.Service, error) { f.Cfg.Server.ExcludeRequestInLog = true // gRPC-specific. f.Cfg.Server.GRPCMiddleware = append(f.Cfg.Server.GRPCMiddleware, util.RecoveryInterceptorGRPC, - clientcapability.ClientCapabilitiesGRPCMiddleware(), + featureflags.ClientCapabilitiesGRPCMiddleware(), ) if f.Cfg.V2 { @@ -487,7 +487,6 @@ func (f *Pyroscope) initServer() (services.Service, error) { f.Cfg.Server.HTTPServerReadTimeout = 2 * f.Cfg.Server.HTTPServerReadTimeout f.Cfg.Server.HTTPServerWriteTimeout = 2 * f.Cfg.Server.HTTPServerWriteTimeout } - var err error if f.Server, err = server.New(f.Cfg.Server); err != nil { return nil, err @@ -527,7 +526,7 @@ func (f *Pyroscope) initServer() (services.Service, error) { httpMetric, objstoreTracerMiddleware, httputil.K6Middleware(), - clientcapability.ClientCapabilitiesHttpMiddleware(), + featureflags.ClientCapabilitiesHttpMiddleware(), } if f.Cfg.SelfProfiling.UseK6Middleware { defaultHTTPMiddleware = append(defaultHTTPMiddleware, httputil.K6Middleware()) diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index 6802fd6e47..d923f02159 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -3,6 +3,7 @@ package validation import ( "encoding/hex" "fmt" + "slices" "sort" "strings" "time" @@ -163,6 +164,62 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1 return nil } +// SanitizeLabelNames uses legacy logic to transform non utf-8 compliant label names +// (if possible). It will error on duplicate label names post-sanitization. +func SanitizeLabelNames(labelNames []string) ([]string, error) { + slices.Sort(labelNames) + var ( + lastLabelName = "" + ) + for idx, labelName := range labelNames { + if origName, newName, ok := sanitizeLabelName(labelName); ok && origName != newName { + labelNames[idx] = newName + lastLabelName = origName + continue + } else if !ok { + return nil, NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, labelName, + fmt.Sprintf("invalid label name '%s'. consider setting 'allow-utf8-labelnames=true' in the `Accept` header", origName)) + } + + if cmp := strings.Compare(lastLabelName, labelName); cmp == 0 { + return nil, NewErrorf(DuplicateLabelNames, DuplicateLabelNamesErrorMsg, lastLabelName, labelName) + } + lastLabelName = labelName + } + + return labelNames, nil +} + +// sanitizeLabelName reports whether the label name is valid, +// and returns the sanitized value. +// +// The only change the function makes is replacing dots with underscores. +func sanitizeLabelName(ln string) (old, sanitized string, ok bool) { + if len(ln) == 0 { + return ln, ln, false + } + hasDots := false + for i, b := range ln { + if (b < 'a' || b > 'z') && (b < 'A' || b > 'Z') && b != '_' && (b < '0' || b > '9' || i == 0) { + if b == '.' { + hasDots = true + } else { + return ln, ln, false + } + } + } + if !hasDots { + return ln, ln, true + } + r := []rune(ln) + for i, b := range r { + if b == '.' { + r[i] = '_' + } + } + return ln, string(r), true +} + type ProfileValidationLimits interface { MaxProfileSizeBytes(tenantID string) int MaxProfileStacktraceSamples(tenantID string) int diff --git a/pkg/validation/validate_test.go b/pkg/validation/validate_test.go index 52c0a449dc..f04011c038 100644 --- a/pkg/validation/validate_test.go +++ b/pkg/validation/validate_test.go @@ -5,7 +5,6 @@ import ( "time" "github.com/prometheus/common/model" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" @@ -124,7 +123,7 @@ func TestValidateLabels(t *testing.T) { {Name: phlaremodel.LabelNameServiceName, Value: "svc"}, }, expectedReason: DuplicateLabelNames, - expectedErr: "profile with labels '{__name__=\"qux\", label.name=\"bar\", label_name=\"foo\", service_name=\"svc\"}' has duplicate label name 'label_name' after label name sanitization from 'label.name'", + expectedErr: "profile with labels '{__name__=\"qux\", label.name=\"foo\", label.name=\"bar\", service_name=\"svc\"}' has duplicate label name: 'label.name'", }, { name: "duplicates once sanitized with matching values", @@ -135,17 +134,6 @@ func TestValidateLabels(t *testing.T) { {Name: "service_name", Value: "svc0"}, }, }, - { - name: "duplicates once sanitized with conflicting values", - lbs: []*typesv1.LabelPair{ - {Name: model.MetricNameLabel, Value: "qux"}, - {Name: "service.name", Value: "svc1"}, - {Name: "service_abc", Value: "def"}, - {Name: "service_name", Value: "svc0"}, - }, - expectedReason: DuplicateLabelNames, - expectedErr: "profile with labels '{__name__=\"qux\", service.name=\"svc1\", service_abc=\"def\", service_name=\"svc0\"}' has duplicate label name 'service_name' after label name sanitization from 'service.name'", - }, } { t.Run(tt.name, func(t *testing.T) { err := ValidateLabels(MockLimits{ @@ -527,40 +515,401 @@ func TestValidateFlamegraphMaxNodes(t *testing.T) { } } -func Test_SanitizeLabelName(t *testing.T) { - for _, tc := range []struct { - input string - expected string - valid bool +func Test_sanitizeLabelName(t *testing.T) { + tests := []struct { + Name string + LabelName string + WantOld string + WantSanitized string + WantOk bool }{ - {"", "", false}, - {".", "_", true}, - {".a", "_a", true}, - {"a.", "a_", true}, - {"..", "__", true}, - {"..a", "__a", true}, - {"a..", "a__", true}, - {"a.a", "a_a", true}, - {".a.", "_a_", true}, - {"..a..", "__a__", true}, - {"世界", "世界", false}, - {"界世_a", "界世_a", false}, - {"界世__a", "界世__a", false}, - {"a_世界", "a_世界", false}, - {"0.a", "0.a", false}, - {"0a", "0a", false}, - {"a.0", "a_0", true}, - {"a0", "a0", true}, - {"_", "_", true}, - {"__a", "__a", true}, - {"__a__", "__a__", true}, - } { - tc := tc - t.Run("", func(t *testing.T) { - origName, actual, valid := SanitizeLabelName(tc.input) - assert.Equal(t, tc.input, origName) - assert.Equal(t, tc.expected, actual) - assert.Equal(t, tc.valid, valid) + { + Name: "empty string is invalid", + LabelName: "", + WantOld: "", + WantSanitized: "", + WantOk: false, + }, + { + Name: "valid simple label name", + LabelName: "service", + WantOld: "service", + WantSanitized: "service", + WantOk: true, + }, + { + Name: "valid label with underscores", + LabelName: "service_name", + WantOld: "service_name", + WantSanitized: "service_name", + WantOk: true, + }, + { + Name: "valid label with numbers", + LabelName: "service123", + WantOld: "service123", + WantSanitized: "service123", + WantOk: true, + }, + { + Name: "valid mixed case label", + LabelName: "ServiceName", + WantOld: "ServiceName", + WantSanitized: "ServiceName", + WantOk: true, + }, + { + Name: "label with dots gets sanitized", + LabelName: "service.name", + WantOld: "service.name", + WantSanitized: "service_name", + WantOk: true, + }, + { + Name: "label with multiple dots gets sanitized", + LabelName: "service.name.type", + WantOld: "service.name.type", + WantSanitized: "service_name_type", + WantOk: true, + }, + { + Name: "label starting with number is invalid", + LabelName: "123service", + WantOld: "123service", + WantSanitized: "123service", + WantOk: false, + }, + { + Name: "label with hyphen is invalid", + LabelName: "service-name", + WantOld: "service-name", + WantSanitized: "service-name", + WantOk: false, + }, + { + Name: "label with space is invalid", + LabelName: "service name", + WantOld: "service name", + WantSanitized: "service name", + WantOk: false, + }, + { + Name: "label with special characters is invalid", + LabelName: "service@name", + WantOld: "service@name", + WantSanitized: "service@name", + WantOk: false, + }, + { + Name: "label with dots and invalid characters is invalid", + LabelName: "service.name@host", + WantOld: "service.name@host", + WantSanitized: "service.name@host", + WantOk: false, + }, + { + Name: "label starting with underscore", + LabelName: "_service", + WantOld: "_service", + WantSanitized: "_service", + WantOk: true, + }, + { + Name: "label with only underscores", + LabelName: "___", + WantOld: "___", + WantSanitized: "___", + WantOk: true, + }, + { + Name: "label ending with dot", + LabelName: "service.", + WantOld: "service.", + WantSanitized: "service_", + WantOk: true, + }, + { + Name: "label starting with dot gets sanitized", + LabelName: ".service", + WantOld: ".service", + WantSanitized: "_service", + WantOk: true, + }, + { + Name: "single dot", + LabelName: ".", + WantOld: ".", + WantSanitized: "_", + WantOk: true, + }, + { + Name: "double dots", + LabelName: "..", + WantOld: "..", + WantSanitized: "__", + WantOk: true, + }, + { + Name: "double dots with letter at end", + LabelName: "..a", + WantOld: "..a", + WantSanitized: "__a", + WantOk: true, + }, + { + Name: "letter with double dots at end", + LabelName: "a..", + WantOld: "a..", + WantSanitized: "a__", + WantOk: true, + }, + { + Name: "letter surrounded by dots", + LabelName: ".a.", + WantOld: ".a.", + WantSanitized: "_a_", + WantOk: true, + }, + { + Name: "letter surrounded by double dots", + LabelName: "..a..", + WantOld: "..a..", + WantSanitized: "__a__", + WantOk: true, + }, + { + Name: "letter with dot and number", + LabelName: "a.0", + WantOld: "a.0", + WantSanitized: "a_0", + WantOk: true, + }, + { + Name: "number with dot is invalid", + LabelName: "0.a", + WantOld: "0.a", + WantSanitized: "0.a", + WantOk: false, + }, + { + Name: "single underscore", + LabelName: "_", + WantOld: "_", + WantSanitized: "_", + WantOk: true, + }, + { + Name: "double underscore with letter", + LabelName: "__a", + WantOld: "__a", + WantSanitized: "__a", + WantOk: true, + }, + { + Name: "letter surrounded by double underscores", + LabelName: "__a__", + WantOld: "__a__", + WantSanitized: "__a__", + WantOk: true, + }, + { + Name: "unicode characters are invalid", + LabelName: "世界", + WantOld: "世界", + WantSanitized: "世界", + WantOk: false, + }, + { + Name: "mixed unicode with valid characters is invalid", + LabelName: "界世_a", + WantOld: "界世_a", + WantSanitized: "界世_a", + WantOk: false, + }, + { + Name: "mixed unicode with underscores is invalid", + LabelName: "界世__a", + WantOld: "界世__a", + WantSanitized: "界世__a", + WantOk: false, + }, + { + Name: "valid characters with unicode suffix is invalid", + LabelName: "a_世界", + WantOld: "a_世界", + WantSanitized: "a_世界", + WantOk: false, + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + gotOld, gotSanitized, gotOk := sanitizeLabelName(tt.LabelName) + require.Equal(t, tt.WantOld, gotOld) + require.Equal(t, tt.WantSanitized, gotSanitized) + require.Equal(t, tt.WantOk, gotOk) + }) + } +} + +func Test_SanitizeLabelNames(t *testing.T) { + tests := []struct { + Name string + LabelNames []string + Want []string + WantError bool + WantErrorType Reason + }{ + { + Name: "empty slice returns empty slice", + LabelNames: []string{}, + Want: []string{}, + }, + { + Name: "single valid label", + LabelNames: []string{"service"}, + Want: []string{"service"}, + }, + { + Name: "multiple valid labels", + LabelNames: []string{"service", "environment", "version"}, + Want: []string{"environment", "service", "version"}, // sorted + }, + { + Name: "labels with dots get sanitized", + LabelNames: []string{"service.name", "app.version"}, + Want: []string{"app_version", "service_name"}, // sorted and sanitized + }, + { + Name: "mixed valid and sanitizable labels", + LabelNames: []string{"service", "app.version", "environment"}, + Want: []string{"app_version", "environment", "service"}, // sorted + }, + { + Name: "label with invalid characters causes error", + LabelNames: []string{"service", "invalid-name"}, + WantError: true, + WantErrorType: InvalidLabels, + }, + { + Name: "label starting with number causes error", + LabelNames: []string{"service", "123invalid"}, + WantError: true, + WantErrorType: InvalidLabels, + }, + { + Name: "empty label name causes error", + LabelNames: []string{"service", ""}, + WantError: true, + WantErrorType: InvalidLabels, + }, + { + Name: "duplicate labels after sanitization - implementation allows this", + LabelNames: []string{"service_name", "service.name"}, + Want: []string{"service_name", "service_name"}, // Both become service_name but no error in current implementation + }, + { + Name: "duplicate original labels cause error", + LabelNames: []string{"service", "service"}, + WantError: true, + WantErrorType: DuplicateLabelNames, + }, + { + Name: "complex valid case with mixed sanitization", + LabelNames: []string{"z_last", "a.first", "m_middle", "b.second"}, + Want: []string{"a_first", "b_second", "m_middle", "z_last"}, // sorted and sanitized + }, + { + Name: "label with space character", + LabelNames: []string{"service name"}, + WantError: true, + WantErrorType: InvalidLabels, + }, + { + Name: "label with special characters", + LabelNames: []string{"service@host"}, + WantError: true, + WantErrorType: InvalidLabels, + }, + { + Name: "single character labels", + LabelNames: []string{"a", "Z", "x"}, + Want: []string{"Z", "a", "x"}, // sorted + }, + { + Name: "labels with underscores", + LabelNames: []string{"_private", "public_"}, + Want: []string{"_private", "public_"}, // sorted + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + got, err := SanitizeLabelNames(tt.LabelNames) + + if tt.WantError { + require.Error(t, err) + var validationErr *Error + require.ErrorAs(t, err, &validationErr) + require.Equal(t, tt.WantErrorType, validationErr.Reason) + return + } + + require.NoError(t, err) + require.Equal(t, tt.Want, got) + }) + } +} + +func Test_SanitizeLabelNames_ErrorMessages(t *testing.T) { + tests := []struct { + Name string + LabelNames []string + WantErrorContains string + }{ + { + Name: "invalid label error contains capability suggestion", + LabelNames: []string{"invalid-name"}, + WantErrorContains: "consider setting 'allow-utf8-labelnames=true' in the `Accept` header", + }, + { + Name: "duplicate label error contains both names", + LabelNames: []string{"service", "service"}, + WantErrorContains: "service", + }, + { + Name: "invalid character error shows label name", + LabelNames: []string{"service@host"}, + WantErrorContains: "invalid label name 'service@host'", + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + t.Parallel() + + _, err := SanitizeLabelNames(tt.LabelNames) + require.Error(t, err) + require.Contains(t, err.Error(), tt.WantErrorContains) }) } } + +func Test_SanitizeLabelNames_PreservesOriginalSlice(t *testing.T) { + // Test that the original slice is not modified + original := []string{"service.name", "app.version"} + originalCopy := make([]string, len(original)) + copy(originalCopy, original) + + result, err := SanitizeLabelNames(original) + + require.NoError(t, err) + require.Equal(t, []string{"app_version", "service_name"}, result) + // Original slice should be modified since the function sorts and modifies in place + // This is the actual behavior based on the implementation + require.NotEqual(t, originalCopy, original) +} From 8b0a87ee2cd600f82e25bc508dc0dfa398f168f3 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 11:08:49 -0400 Subject: [PATCH 06/15] Filter (instead of sanitize) on read - Removes profilecli and OG front end changes (will go in separate PR's) - Updates read path to filter instead of sanitize on read - Reverts utf8 featureflag removal (will be used in separate PR) --- cmd/profilecli/client.go | 33 ---- cmd/profilecli/client_test.go | 79 --------- pkg/featureflags/client_capability.go | 2 - pkg/featureflags/feature_flags.go | 5 + .../queryfrontend/query_label_names.go | 15 +- .../queryfrontend/query_series_labels.go | 62 ++++--- pkg/pyroscope/feature_flags.go | 1 + pkg/validation/validate.go | 79 ++++++--- pkg/validation/validate_test.go | 166 +----------------- public/app/components/TagsBar.tsx | 2 +- public/app/services/base.ts | 4 - public/app/util/query.ts | 6 +- 12 files changed, 110 insertions(+), 344 deletions(-) delete mode 100644 cmd/profilecli/client_test.go diff --git a/cmd/profilecli/client.go b/cmd/profilecli/client.go index f173d0cde9..9aa835cdbf 100644 --- a/cmd/profilecli/client.go +++ b/cmd/profilecli/client.go @@ -3,7 +3,6 @@ package main import ( "fmt" "net/http" - "strings" "connectrpc.com/connect" "github.com/prometheus/common/version" @@ -16,40 +15,10 @@ const ( protocolTypeConnect = "connect" protocolTypeGRPC = "grpc" protocolTypeGRPCWeb = "grpc-web" - - acceptHeaderMimeType = "*/*" ) -var acceptHeaderClientCapabilities = []string{ - "allow-utf8-labelnames=true", -} - var userAgentHeader = fmt.Sprintf("pyroscope/%s", version.Version) -func addClientCapabilitiesHeader(r *http.Request, mime string, clientCapabilities []string) { - missingClientCapabilities := make([]string, 0, len(clientCapabilities)) - for _, capability := range clientCapabilities { - found := false - // Check if any header value already contains this capability - for _, value := range r.Header.Values("Accept") { - if strings.Contains(value, capability) { - found = true - break - } - } - - if !found { - missingClientCapabilities = append(missingClientCapabilities, capability) - } - } - - if len(missingClientCapabilities) > 0 { - acceptHeader := mime - acceptHeader += "; " + strings.Join(missingClientCapabilities, "; ") - r.Header.Add("Accept", acceptHeader) - } -} - type phlareClient struct { TenantID string URL string @@ -80,9 +49,7 @@ func (a *authRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) } } - addClientCapabilitiesHeader(req, acceptHeaderMimeType, acceptHeaderClientCapabilities) req.Header.Set("User-Agent", userAgentHeader) - return a.next.RoundTrip(req) } diff --git a/cmd/profilecli/client_test.go b/cmd/profilecli/client_test.go deleted file mode 100644 index e4f3f99363..0000000000 --- a/cmd/profilecli/client_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package main - -import ( - "net/http" - "testing" - - "github.com/stretchr/testify/require" -) - -func Test_AcceptHeader(t *testing.T) { - tests := []struct { - Name string - Header http.Header - ClientCapabilities []string - Want []string - }{ - { - Name: "empty header adds capability", - Header: http.Header{}, - ClientCapabilities: []string{ - "allow-utf8-labelnames=true", - }, - Want: []string{"*/*; allow-utf8-labelnames=true"}, - }, - { - Name: "existing header appends capability", - Header: http.Header{ - "Accept": []string{"application/json"}, - }, - ClientCapabilities: []string{ - "allow-utf8-labelnames=true", - }, - Want: []string{"application/json", "*/*; allow-utf8-labelnames=true"}, - }, - { - Name: "multiple existing values appends capability", - Header: http.Header{ - "Accept": []string{"application/json", "text/plain"}, - }, - ClientCapabilities: []string{ - "allow-utf8-labelnames=true", - }, - Want: []string{"application/json", "text/plain", "*/*; allow-utf8-labelnames=true"}, - }, - { - Name: "existing capability is not duplicated", - Header: http.Header{ - "Accept": []string{"*/*; allow-utf8-labelnames=true"}, - }, - ClientCapabilities: []string{ - "allow-utf8-labelnames=true", - }, - Want: []string{"*/*; allow-utf8-labelnames=true"}, - }, - { - Name: "multiple client capabilities appends capability", - Header: http.Header{ - "Accept": []string{"*/*; allow-utf8-labelnames=true"}, - }, - ClientCapabilities: []string{ - "allow-utf8-labelnames=true", - "capability2=false", - }, - Want: []string{"*/*; allow-utf8-labelnames=true", "*/*; capability2=false"}, - }, - } - - for _, tt := range tests { - t.Run(tt.Name, func(t *testing.T) { - t.Parallel() - req, _ := http.NewRequest("GET", "example.com", nil) - req.Header = tt.Header - clientCapabilities := tt.ClientCapabilities - - addClientCapabilitiesHeader(req, acceptHeaderMimeType, clientCapabilities) - require.Equal(t, tt.Want, req.Header.Values("Accept")) - }) - } -} diff --git a/pkg/featureflags/client_capability.go b/pkg/featureflags/client_capability.go index 82226c0d3a..f8f5c1c7f0 100644 --- a/pkg/featureflags/client_capability.go +++ b/pkg/featureflags/client_capability.go @@ -56,8 +56,6 @@ func ClientCapabilitiesGRPCMiddleware() grpc.UnaryServerInterceptor { } // Reuse existing HTTP header parsing - // TODO add metrics = # requests like this and # clients [need - // labels for requests and clients/tenet and user agent(?)] clientCapabilities, err := parseClientCapabilities(httpHeader) if err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) diff --git a/pkg/featureflags/feature_flags.go b/pkg/featureflags/feature_flags.go index ca8368614f..d73ab26018 100644 --- a/pkg/featureflags/feature_flags.go +++ b/pkg/featureflags/feature_flags.go @@ -15,6 +15,7 @@ const ( V2StorageLayer = "v2StorageLayer" PyroscopeRuler = "pyroscopeRuler" PyroscopeRulerFunctions = "pyroscopeRulerFunctions" + UTF8LabelNames = "utf8LabelNames" ) func stringPtr(s string) *string { @@ -36,6 +37,10 @@ var ( Enabled: false, Description: stringPtr("Enables function support for the Pyroscope ruler, which allows to export resource usage on a per function level."), }, + UTF8LabelNames: { + Enabled: false, + Description: stringPtr("Supports UTF-8 label names for Pyroscope read/write APIs."), + }, } ) diff --git a/pkg/frontend/readpath/queryfrontend/query_label_names.go b/pkg/frontend/readpath/queryfrontend/query_label_names.go index 2affd6ae06..18e2f0b777 100644 --- a/pkg/frontend/readpath/queryfrontend/query_label_names.go +++ b/pkg/frontend/readpath/queryfrontend/query_label_names.go @@ -4,6 +4,7 @@ import ( "context" "connectrpc.com/connect" + "github.com/go-kit/log/level" "github.com/grafana/dskit/tenant" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" @@ -50,12 +51,16 @@ func (q *QueryFrontend) LabelNames( labelNames := report.LabelNames.LabelNames if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { - // Use legacy label name sanitization if utf8 label names not enabled - if labelNames, err := validation.SanitizeLabelNames(labelNames); err != nil { - return nil, connect.NewError(connect.CodeInvalidArgument, err) - } else { - return connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil + // Filter out label names not passing legacy validation if utf8 label names not enabled + filteredLabelNames := make([]string, 0, len(labelNames)) + for _, labelName := range labelNames { + if _, _, ok := validation.SanitizeLegacyLabelName(labelName); !ok { + level.Debug(q.logger).Log("msg", "filtering out label", "label_name", labelName) + continue + } + filteredLabelNames = append(filteredLabelNames, labelName) } + labelNames = filteredLabelNames } return connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index 0d4031706c..036227f874 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -38,6 +38,40 @@ func (q *QueryFrontend) Series( if err != nil { return nil, connect.NewError(connect.CodeInvalidArgument, err) } + + labelNames := c.Msg.LabelNames + if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { + if len(labelNames) == 0 { + // Querying for all label names; must retrieve all label names to then filter out + report, err := q.querySingle(ctx, &queryv1.QueryRequest{ + StartTime: c.Msg.Start, + EndTime: c.Msg.End, + LabelSelector: labelSelector, + Query: []*queryv1.Query{{ + QueryType: queryv1.QueryType_QUERY_LABEL_NAMES, + LabelNames: &queryv1.LabelNamesQuery{}, + }}, + }) + if err != nil { + return nil, err + } + if report != nil { + labelNames = report.LabelNames.LabelNames + } + } + + // Filter out label names not passing legacy validation if utf8 label names not enabled + filteredLabelNames := make([]string, 0, len(labelNames)) + for _, labelName := range labelNames { + if _, _, ok := validation.SanitizeLegacyLabelName(labelName); !ok { + level.Debug(q.logger).Log("msg", "filtering out label", "label_name", labelName) + continue + } + filteredLabelNames = append(filteredLabelNames, labelName) + } + labelNames = filteredLabelNames + } + report, err := q.querySingle(ctx, &queryv1.QueryRequest{ StartTime: c.Msg.Start, EndTime: c.Msg.End, @@ -45,7 +79,7 @@ func (q *QueryFrontend) Series( Query: []*queryv1.Query{{ QueryType: queryv1.QueryType_QUERY_SERIES_LABELS, SeriesLabels: &queryv1.SeriesLabelsQuery{ - LabelNames: c.Msg.LabelNames, + LabelNames: labelNames, }, }}, }) @@ -56,29 +90,5 @@ func (q *QueryFrontend) Series( return connect.NewResponse(&querierv1.SeriesResponse{}), nil } - seriesLabels := report.SeriesLabels.SeriesLabels - if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { - // Use legacy label name sanitization if utf8 label names not enabled - for _, seriesLabel := range seriesLabels { - labelNames := make([]string, len(seriesLabel.Labels)) - for i, label := range seriesLabel.Labels { - labelNames[i] = label.Name - } - - // Sanitize the label names - sanitizedNames, err := validation.SanitizeLabelNames(labelNames) - if err != nil { - return nil, connect.NewError(connect.CodeInvalidArgument, err) - } - - // Update the original labels with sanitized names - for i, label := range seriesLabel.Labels { - if i < len(sanitizedNames) { - label.Name = sanitizedNames[i] - } - } - } - } - - return connect.NewResponse(&querierv1.SeriesResponse{LabelsSet: seriesLabels}), nil + return connect.NewResponse(&querierv1.SeriesResponse{LabelsSet: report.SeriesLabels.SeriesLabels}), nil } diff --git a/pkg/pyroscope/feature_flags.go b/pkg/pyroscope/feature_flags.go index 41b11c625d..7e8872d1b4 100644 --- a/pkg/pyroscope/feature_flags.go +++ b/pkg/pyroscope/feature_flags.go @@ -12,6 +12,7 @@ func (c *Config) getFeatureFlags() map[string]bool { featureflags.V2StorageLayer: c.V2, featureflags.PyroscopeRuler: rulerEnabled, featureflags.PyroscopeRulerFunctions: rulerEnabled, + featureflags.UTF8LabelNames: false, // not supported yet } } diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index d923f02159..0984ea76d1 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -146,10 +146,19 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1 if len(l.Value) > limits.MaxLabelValueLength(tenantID) { return NewErrorf(LabelValueTooLong, LabelValueTooLongErrorMsg, phlaremodel.LabelPairsString(ls), l.Value) } - // We are permissive to all utf-8 label names at write; but backward compatible - // for clients that do not support utf-8 label names at read. - if !model.LabelName(l.Name).IsValid() { - return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label name '"+l.Name+"'") + if origName, newName, ok := SanitizeLegacyLabelName(l.Name); ok && origName != newName { + var err error + ls, idx, err = handleSanitizedLabel(ls, idx, origName, newName) + if err != nil { + return err + } + lastLabelName = "" + if idx > 0 && idx <= len(ls) { + lastLabelName = ls[idx-1].Name + } + continue + } else if !ok { + return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label name '"+origName+"'") } if !model.LabelValue(l.Value).IsValid() { return NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, phlaremodel.LabelPairsString(ls), "invalid label value '"+l.Value+"'") @@ -164,37 +173,51 @@ func ValidateLabels(limits LabelValidationLimits, tenantID string, ls []*typesv1 return nil } -// SanitizeLabelNames uses legacy logic to transform non utf-8 compliant label names -// (if possible). It will error on duplicate label names post-sanitization. -func SanitizeLabelNames(labelNames []string) ([]string, error) { - slices.Sort(labelNames) - var ( - lastLabelName = "" - ) - for idx, labelName := range labelNames { - if origName, newName, ok := sanitizeLabelName(labelName); ok && origName != newName { - labelNames[idx] = newName - lastLabelName = origName - continue - } else if !ok { - return nil, NewErrorf(InvalidLabels, InvalidLabelsErrorMsg, labelName, - fmt.Sprintf("invalid label name '%s'. consider setting 'allow-utf8-labelnames=true' in the `Accept` header", origName)) +// handleSanitizedLabel handles the case where a label name is sanitized. It ensures that the label name is unique and fails if the value is distinct. +func handleSanitizedLabel(ls []*typesv1.LabelPair, origIdx int, origName, newName string) ([]*typesv1.LabelPair, int, error) { + newLabel := &typesv1.LabelPair{Name: newName, Value: ls[origIdx].Value} + + // Create new slice without the original element + newSlice := make([]*typesv1.LabelPair, 0, len(ls)) + newSlice = append(newSlice, ls[:origIdx]...) + newSlice = append(newSlice, ls[origIdx+1:]...) + + insertIdx, found := slices.BinarySearchFunc(newSlice, newLabel, + func(a, b *typesv1.LabelPair) int { + return strings.Compare(a.Name, b.Name) + }) + + if found { + if newSlice[insertIdx].Value == newLabel.Value { + // Same name and value we are done and can just return newSlice + return newSlice, origIdx, nil + } else { + // Same name, different value - error + return nil, 0, NewErrorf(DuplicateLabelNames, + DuplicateLabelNamesAfterSanitizationErrorMsg, + phlaremodel.LabelPairsString(ls), newName, origName) } + } - if cmp := strings.Compare(lastLabelName, labelName); cmp == 0 { - return nil, NewErrorf(DuplicateLabelNames, DuplicateLabelNamesErrorMsg, lastLabelName, labelName) - } - lastLabelName = labelName + // Insert the new label at correct position + newSlice = slices.Insert(newSlice, insertIdx, newLabel) + + finalIdx := insertIdx + if insertIdx >= origIdx { + finalIdx = origIdx } - return labelNames, nil + copy(ls, newSlice) + return ls[:len(newSlice)], finalIdx, nil } -// sanitizeLabelName reports whether the label name is valid, -// and returns the sanitized value. +// SanitizeLegacyLabelName reports whether the label name is a valid legacy label name, +// and returns the sanitized value. Legacy label names are non utf-8 and contain characters +// [a-zA-Z0-9_-]. +// // -// The only change the function makes is replacing dots with underscores. -func sanitizeLabelName(ln string) (old, sanitized string, ok bool) { +// The only sanitization the function makes is replacing dots with underscores. +func SanitizeLegacyLabelName(ln string) (old, sanitized string, ok bool) { if len(ln) == 0 { return ln, ln, false } diff --git a/pkg/validation/validate_test.go b/pkg/validation/validate_test.go index f04011c038..5baa5ab2fe 100644 --- a/pkg/validation/validate_test.go +++ b/pkg/validation/validate_test.go @@ -123,7 +123,7 @@ func TestValidateLabels(t *testing.T) { {Name: phlaremodel.LabelNameServiceName, Value: "svc"}, }, expectedReason: DuplicateLabelNames, - expectedErr: "profile with labels '{__name__=\"qux\", label.name=\"foo\", label.name=\"bar\", service_name=\"svc\"}' has duplicate label name: 'label.name'", + expectedErr: "profile with labels '{__name__=\"qux\", label.name=\"bar\", label_name=\"foo\", service_name=\"svc\"}' has duplicate label name 'label_name' after label name sanitization from 'label.name'", }, { name: "duplicates once sanitized with matching values", @@ -515,7 +515,7 @@ func TestValidateFlamegraphMaxNodes(t *testing.T) { } } -func Test_sanitizeLabelName(t *testing.T) { +func Test_SanitizeLegacyLabelName(t *testing.T) { tests := []struct { Name string LabelName string @@ -746,170 +746,10 @@ func Test_sanitizeLabelName(t *testing.T) { t.Run(tt.Name, func(t *testing.T) { t.Parallel() - gotOld, gotSanitized, gotOk := sanitizeLabelName(tt.LabelName) + gotOld, gotSanitized, gotOk := SanitizeLegacyLabelName(tt.LabelName) require.Equal(t, tt.WantOld, gotOld) require.Equal(t, tt.WantSanitized, gotSanitized) require.Equal(t, tt.WantOk, gotOk) }) } } - -func Test_SanitizeLabelNames(t *testing.T) { - tests := []struct { - Name string - LabelNames []string - Want []string - WantError bool - WantErrorType Reason - }{ - { - Name: "empty slice returns empty slice", - LabelNames: []string{}, - Want: []string{}, - }, - { - Name: "single valid label", - LabelNames: []string{"service"}, - Want: []string{"service"}, - }, - { - Name: "multiple valid labels", - LabelNames: []string{"service", "environment", "version"}, - Want: []string{"environment", "service", "version"}, // sorted - }, - { - Name: "labels with dots get sanitized", - LabelNames: []string{"service.name", "app.version"}, - Want: []string{"app_version", "service_name"}, // sorted and sanitized - }, - { - Name: "mixed valid and sanitizable labels", - LabelNames: []string{"service", "app.version", "environment"}, - Want: []string{"app_version", "environment", "service"}, // sorted - }, - { - Name: "label with invalid characters causes error", - LabelNames: []string{"service", "invalid-name"}, - WantError: true, - WantErrorType: InvalidLabels, - }, - { - Name: "label starting with number causes error", - LabelNames: []string{"service", "123invalid"}, - WantError: true, - WantErrorType: InvalidLabels, - }, - { - Name: "empty label name causes error", - LabelNames: []string{"service", ""}, - WantError: true, - WantErrorType: InvalidLabels, - }, - { - Name: "duplicate labels after sanitization - implementation allows this", - LabelNames: []string{"service_name", "service.name"}, - Want: []string{"service_name", "service_name"}, // Both become service_name but no error in current implementation - }, - { - Name: "duplicate original labels cause error", - LabelNames: []string{"service", "service"}, - WantError: true, - WantErrorType: DuplicateLabelNames, - }, - { - Name: "complex valid case with mixed sanitization", - LabelNames: []string{"z_last", "a.first", "m_middle", "b.second"}, - Want: []string{"a_first", "b_second", "m_middle", "z_last"}, // sorted and sanitized - }, - { - Name: "label with space character", - LabelNames: []string{"service name"}, - WantError: true, - WantErrorType: InvalidLabels, - }, - { - Name: "label with special characters", - LabelNames: []string{"service@host"}, - WantError: true, - WantErrorType: InvalidLabels, - }, - { - Name: "single character labels", - LabelNames: []string{"a", "Z", "x"}, - Want: []string{"Z", "a", "x"}, // sorted - }, - { - Name: "labels with underscores", - LabelNames: []string{"_private", "public_"}, - Want: []string{"_private", "public_"}, // sorted - }, - } - - for _, tt := range tests { - t.Run(tt.Name, func(t *testing.T) { - t.Parallel() - - got, err := SanitizeLabelNames(tt.LabelNames) - - if tt.WantError { - require.Error(t, err) - var validationErr *Error - require.ErrorAs(t, err, &validationErr) - require.Equal(t, tt.WantErrorType, validationErr.Reason) - return - } - - require.NoError(t, err) - require.Equal(t, tt.Want, got) - }) - } -} - -func Test_SanitizeLabelNames_ErrorMessages(t *testing.T) { - tests := []struct { - Name string - LabelNames []string - WantErrorContains string - }{ - { - Name: "invalid label error contains capability suggestion", - LabelNames: []string{"invalid-name"}, - WantErrorContains: "consider setting 'allow-utf8-labelnames=true' in the `Accept` header", - }, - { - Name: "duplicate label error contains both names", - LabelNames: []string{"service", "service"}, - WantErrorContains: "service", - }, - { - Name: "invalid character error shows label name", - LabelNames: []string{"service@host"}, - WantErrorContains: "invalid label name 'service@host'", - }, - } - - for _, tt := range tests { - t.Run(tt.Name, func(t *testing.T) { - t.Parallel() - - _, err := SanitizeLabelNames(tt.LabelNames) - require.Error(t, err) - require.Contains(t, err.Error(), tt.WantErrorContains) - }) - } -} - -func Test_SanitizeLabelNames_PreservesOriginalSlice(t *testing.T) { - // Test that the original slice is not modified - original := []string{"service.name", "app.version"} - originalCopy := make([]string, len(original)) - copy(originalCopy, original) - - result, err := SanitizeLabelNames(original) - - require.NoError(t, err) - require.Equal(t, []string{"app_version", "service_name"}, result) - // Original slice should be modified since the function sorts and modifies in place - // This is the actual behavior based on the implementation - require.NotEqual(t, originalCopy, original) -} diff --git a/public/app/components/TagsBar.tsx b/public/app/components/TagsBar.tsx index f2a4225748..decd6b51f0 100644 --- a/public/app/components/TagsBar.tsx +++ b/public/app/components/TagsBar.tsx @@ -230,7 +230,7 @@ function LabelsSubmenu({ // Identifies whether a label is in a query or not function isLabelInQuery(query: string, label: string, labelValue: string) { - return query.includes(`"${label}=${labelValue}"`); + return query.includes(`${label}="${labelValue}"`); } export default TagsBar; diff --git a/public/app/services/base.ts b/public/app/services/base.ts index 9f55695aef..07367e2c6a 100644 --- a/public/app/services/base.ts +++ b/public/app/services/base.ts @@ -32,8 +32,6 @@ export async function request( headers = { ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), - // Specify client capabilities in `Accept` header - Accept: 'application/json; allow-utf8-labelnames=true', }; } @@ -55,8 +53,6 @@ export async function downloadWithOrgID( headers = { ...config?.headers, ...(tenantID && { 'X-Scope-OrgID': tenantID }), - // Specify client capabilities in `Accept` header - Accept: 'application/json; allow-utf8-labelnames=true', }; } diff --git a/public/app/util/query.ts b/public/app/util/query.ts index 13cf9d1d24..2e1df80698 100644 --- a/public/app/util/query.ts +++ b/public/app/util/query.ts @@ -5,13 +5,13 @@ export function appendLabelToQuery( ) { const case1Regexp = new RegExp(`${label}=.+?(\\}|,)`); if (query.match(case1Regexp)) { - return query.replace(case1Regexp, `"${label}=${labelValue}"$1`); + return query.replace(case1Regexp, `${label}="${labelValue}"$1`); } if (query.indexOf('{}') !== -1) { - return query.replace('}', `"${label}=${labelValue}"}`); + return query.replace('}', `${label}="${labelValue}"}`); } if (query.indexOf('}') !== -1) { - return query.replace('}', `, "${label}=${labelValue}"}`); + return query.replace('}', `, ${label}="${labelValue}"}`); } console.warn('TODO: handle this case'); From 12c20e7ac99d5850b0223d60abde3c7b1f6404de Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:07:15 -0400 Subject: [PATCH 07/15] Finish v2 read path filtering --- .../queryfrontend/query_series_labels.go | 78 +++++++++++-------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index 036227f874..fa0d303f78 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -13,6 +13,50 @@ import ( "github.com/grafana/pyroscope/pkg/validation" ) +func (q *QueryFrontend) filterLabelNames( + ctx context.Context, + c *connect.Request[querierv1.SeriesRequest], + labelNames []string, + labelSelector string, +) ([]string, error) { + if capabilities, ok := featureflags.GetClientCapabilities(ctx); ok && capabilities.AllowUtf8LabelNames { + return labelNames, nil + } + + toFilter := make([]string, len(labelNames)) + copy(toFilter, labelNames) + + // Filter out label names not passing legacy validation if utf8 label names not enabled + if len(labelNames) == 0 { + // Querying for all label names; must retrieve all label names to then filter out + report, err := q.querySingle(ctx, &queryv1.QueryRequest{ + StartTime: c.Msg.Start, + EndTime: c.Msg.End, + LabelSelector: labelSelector, + Query: []*queryv1.Query{{ + QueryType: queryv1.QueryType_QUERY_LABEL_NAMES, + LabelNames: &queryv1.LabelNamesQuery{}, + }}, + }) + if err != nil { + return nil, err + } + if report != nil { + toFilter = report.LabelNames.LabelNames + } + } + + filtered := make([]string, 0, len(labelNames)) + for _, name := range toFilter { + if _, _, ok := validation.SanitizeLegacyLabelName(name); !ok { + level.Debug(q.logger).Log("msg", "filtering out label", "label_name", name) + continue + } + filtered = append(filtered, name) + } + return filtered, nil +} + func (q *QueryFrontend) Series( ctx context.Context, c *connect.Request[querierv1.SeriesRequest], @@ -39,37 +83,9 @@ func (q *QueryFrontend) Series( return nil, connect.NewError(connect.CodeInvalidArgument, err) } - labelNames := c.Msg.LabelNames - if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { - if len(labelNames) == 0 { - // Querying for all label names; must retrieve all label names to then filter out - report, err := q.querySingle(ctx, &queryv1.QueryRequest{ - StartTime: c.Msg.Start, - EndTime: c.Msg.End, - LabelSelector: labelSelector, - Query: []*queryv1.Query{{ - QueryType: queryv1.QueryType_QUERY_LABEL_NAMES, - LabelNames: &queryv1.LabelNamesQuery{}, - }}, - }) - if err != nil { - return nil, err - } - if report != nil { - labelNames = report.LabelNames.LabelNames - } - } - - // Filter out label names not passing legacy validation if utf8 label names not enabled - filteredLabelNames := make([]string, 0, len(labelNames)) - for _, labelName := range labelNames { - if _, _, ok := validation.SanitizeLegacyLabelName(labelName); !ok { - level.Debug(q.logger).Log("msg", "filtering out label", "label_name", labelName) - continue - } - filteredLabelNames = append(filteredLabelNames, labelName) - } - labelNames = filteredLabelNames + labelNames, err := q.filterLabelNames(ctx, c, c.Msg.LabelNames, labelSelector) + if err != nil { + return nil, err } report, err := q.querySingle(ctx, &queryv1.QueryRequest{ From 52d3ed3ac7a152d5590ed56e3cd10fb26039d7fb Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:07:45 -0400 Subject: [PATCH 08/15] v1 read path filtering --- pkg/querier/querier.go | 76 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 2397092e71..19a6d27d80 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -17,6 +17,7 @@ import ( ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/grafana/dskit/tenant" + "github.com/grafana/pyroscope/pkg/featureflags" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" "github.com/pkg/errors" @@ -271,6 +272,18 @@ func (q *Querier) LabelValues(ctx context.Context, req *connect.Request[typesv1. }), nil } +func filterLabelNames(labelNames []string) []string { + filtered := make([]string, 0, len(labelNames)) + // Filter out label names not passing legacy validation if utf8 label names not enabled + for _, labelName := range labelNames { + if _, _, ok := validation.SanitizeLegacyLabelName(labelName); !ok { + continue + } + filtered = append(filtered, labelName) + } + return filtered +} + func (q *Querier) LabelNames(ctx context.Context, req *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) { sp, ctx := opentracing.StartSpanFromContext(ctx, "LabelNames") defer sp.Finish() @@ -288,8 +301,15 @@ func (q *Querier) LabelNames(ctx context.Context, req *connect.Request[typesv1.L if err != nil { return nil, err } + + labelNames := uniqueSortedStrings(responses) + if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { + level.Debug(q.logger).Log("msg", "filtering out non-valid labels") + labelNames = filterLabelNames(labelNames) + } + return connect.NewResponse(&typesv1.LabelNamesResponse{ - Names: uniqueSortedStrings(responses), + Names: labelNames, }), nil } @@ -336,8 +356,14 @@ func (q *Querier) LabelNames(ctx context.Context, req *connect.Request[typesv1.L return nil, err } + labelNames := uniqueSortedStrings(responses) + if capabilities, ok := featureflags.GetClientCapabilities(ctx); !ok || !capabilities.AllowUtf8LabelNames { + level.Debug(q.logger).Log("msg", "filtering out non-valid labels") + labelNames = filterLabelNames(labelNames) + } + return connect.NewResponse(&typesv1.LabelNamesResponse{ - Names: uniqueSortedStrings(responses), + Names: labelNames, }), nil } @@ -378,6 +404,42 @@ func (q *Querier) blockSelect(ctx context.Context, start, end model.Time) (block return results.blockPlan(ctx), nil } +func (q *Querier) filterLabelNames( + ctx context.Context, + req *connect.Request[querierv1.SeriesRequest], +) ([]string, error) { + if capabilities, ok := featureflags.GetClientCapabilities(ctx); ok && capabilities.AllowUtf8LabelNames { + return req.Msg.LabelNames, nil + } + + // Filter out label names not passing legacy validation if utf8 label names not enabled + toFilter := make([]string, len(req.Msg.LabelNames)) + copy(toFilter, req.Msg.LabelNames) + filtered := make([]string, 0, len(toFilter)) + + if len(req.Msg.LabelNames) == 0 { + // Querying for all label names; must retrieve all label names to then filter out + response, err := q.LabelNames(ctx, connect.NewRequest(&typesv1.LabelNamesRequest{ + Matchers: req.Msg.Matchers, + Start: req.Msg.Start, + End: req.Msg.End, + })) + if err != nil { + return nil, err + } + toFilter = response.Msg.Names + } + + for _, name := range toFilter { + if _, _, ok := validation.SanitizeLegacyLabelName(name); !ok { + level.Debug(q.logger).Log("msg", "filtering out label", "label_name", name) + continue + } + filtered = append(filtered, name) + } + return filtered, nil +} + func (q *Querier) Series(ctx context.Context, req *connect.Request[querierv1.SeriesRequest]) (*connect.Response[querierv1.SeriesResponse], error) { sp, ctx := opentracing.StartSpanFromContext(ctx, "Series") defer sp.Finish() @@ -390,6 +452,14 @@ func (q *Querier) Series(ctx context.Context, req *connect.Request[querierv1.Ser otlog.Int64("start", req.Msg.Start), otlog.Int64("end", req.Msg.End), ) + + // Update LabelNames + filteredLabelNames, err := q.filterLabelNames(ctx, req) + if err != nil { + return nil, err + } + req.Msg.LabelNames = filteredLabelNames + // no store gateways configured so just query the ingesters if q.storeGatewayQuerier == nil || !hasTimeRange { responses, err := q.seriesFromIngesters(ctx, &ingestv1.SeriesRequest{ @@ -451,7 +521,7 @@ func (q *Querier) Series(ctx context.Context, req *connect.Request[querierv1.Ser }) } - err := group.Wait() + err = group.Wait() if err != nil { return nil, err } From cb7c9b23870209967b2dbd143aa80d3c54f37448 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 15:38:12 -0400 Subject: [PATCH 09/15] Add tests --- .../queryfrontend/query_frontend_test.go | 253 ++++++++++++++ pkg/querier/querier_test.go | 322 ++++++++++++++++++ 2 files changed, 575 insertions(+) diff --git a/pkg/frontend/readpath/queryfrontend/query_frontend_test.go b/pkg/frontend/readpath/queryfrontend/query_frontend_test.go index b6ee685f90..c88f641b1b 100644 --- a/pkg/frontend/readpath/queryfrontend/query_frontend_test.go +++ b/pkg/frontend/readpath/queryfrontend/query_frontend_test.go @@ -4,7 +4,9 @@ import ( "context" "fmt" "testing" + "time" + "connectrpc.com/connect" "github.com/go-kit/log" "github.com/grafana/dskit/user" "github.com/stretchr/testify/assert" @@ -13,8 +15,11 @@ import ( profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" + querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" + typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" "github.com/grafana/pyroscope/pkg/block/metadata" + "github.com/grafana/pyroscope/pkg/featureflags" "github.com/grafana/pyroscope/pkg/tenant" "github.com/grafana/pyroscope/pkg/test/mocks/mockfrontend" "github.com/grafana/pyroscope/pkg/test/mocks/mockmetastorev1" @@ -224,3 +229,251 @@ func createProfile(t *testing.T) []byte { require.NoError(t, err) return bytes } + +func Test_QueryFrontend_LabelNames_WithFiltering(t *testing.T) { + tests := []struct { + name string + allowUtf8LabelNames bool + setCapabilities bool + backendLabelNames []string + expectedLabelNames []string + }{ + { + name: "UTF8 labels allowed when enabled", + allowUtf8LabelNames: true, + setCapabilities: true, + backendLabelNames: []string{"foo", "bar", "世界"}, + expectedLabelNames: []string{"foo", "bar", "世界"}, + }, + { + name: "UTF8 labels filtered when disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + backendLabelNames: []string{"foo", "bar", "世界"}, + expectedLabelNames: []string{"foo", "bar"}, + }, + { + name: "invalid labels pass through when UTF8 enabled", + allowUtf8LabelNames: true, + setCapabilities: true, + backendLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + }, + { + name: "invalid labels filtered when UTF8 disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + backendLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedLabelNames: []string{"valid_name"}, + }, + { + name: "filtering enabled when no capabilities set", + setCapabilities: false, + backendLabelNames: []string{"valid_name", "123invalid", "世界"}, + expectedLabelNames: []string{"valid_name"}, + }, + { + name: "labels with dots pass through", + allowUtf8LabelNames: false, + setCapabilities: true, + backendLabelNames: []string{"service.name", "app.version"}, + expectedLabelNames: []string{"service.name", "app.version"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mockQueryBackend := mockqueryfrontend.NewMockQueryBackend(t) + mockQueryBackend.On("Invoke", mock.Anything, mock.Anything).Return(&queryv1.InvokeResponse{ + Reports: []*queryv1.Report{ + { + ReportType: queryv1.ReportType_REPORT_LABEL_NAMES, + LabelNames: &queryv1.LabelNamesReport{ + LabelNames: tc.backendLabelNames, + }, + }, + }, + }, nil) + + mockLimits := mockfrontend.NewMockLimits(t) + mockLimits.On("MaxQueryLookback", "test-tenant").Return(time.Duration(0)) + mockLimits.On("MaxQueryLength", "test-tenant").Return(time.Duration(0)) + mockLimits.On("QuerySanitizeOnMerge", "test-tenant").Return(true) + mockMetadataClient := new(mockmetastorev1.MockMetadataQueryServiceClient) + mockMetadataClient.On("QueryMetadata", mock.Anything, mock.Anything).Return(&metastorev1.QueryMetadataResponse{ + Blocks: []*metastorev1.BlockMeta{{Id: "test-block"}}, + }, nil) + + qf := NewQueryFrontend( + log.NewNopLogger(), + mockLimits, + mockMetadataClient, + nil, + mockQueryBackend, + nil, + ) + + ctx := tenant.InjectTenantID(context.Background(), "test-tenant") + if tc.setCapabilities { + ctx = featureflags.WithClientCapabilities(ctx, featureflags.ClientCapabilities{ + AllowUtf8LabelNames: tc.allowUtf8LabelNames, + }) + } + + req := connect.NewRequest(&typesv1.LabelNamesRequest{ + Start: 1000, + End: 2000, + }) + + resp, err := qf.LabelNames(ctx, req) + require.NoError(t, err) + require.Equal(t, tc.expectedLabelNames, resp.Msg.Names) + }) + } +} + +func Test_QueryFrontend_Series_WithLabelNameFiltering(t *testing.T) { + tests := []struct { + name string + allowUtf8LabelNames bool + setCapabilities bool + requestLabelNames []string + backendLabelNames []string // For empty request case + expectedQueryRequest []string // What should be passed to backend + }{ + { + name: "all label names pass through when UTF8 enabled", + allowUtf8LabelNames: true, + setCapabilities: true, + requestLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedQueryRequest: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + }, + { + name: "invalid label names filtered when UTF8 disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedQueryRequest: []string{"valid_name"}, + }, + { + name: "UTF8 labels filtered when UTF8 disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"foo", "bar", "世界", "日本語"}, + expectedQueryRequest: []string{"foo", "bar"}, + }, + { + name: "filtering enabled when no capabilities set", + setCapabilities: false, + requestLabelNames: []string{"foo", "123invalid", "世界"}, + expectedQueryRequest: []string{"foo"}, + }, + { + name: "all valid labels pass through", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"foo", "bar", "service_name"}, + expectedQueryRequest: []string{"foo", "bar", "service_name"}, + }, + { + name: "labels with dots pass through", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"service.name", "app.version"}, + expectedQueryRequest: []string{"service.name", "app.version"}, + }, + { + name: "empty label names with UTF8 disabled queries and filters all labels", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{}, + backendLabelNames: []string{"foo", "bar", "世界"}, + expectedQueryRequest: []string{"foo", "bar"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var capturedLabelNames []string + + mockQueryBackend := mockqueryfrontend.NewMockQueryBackend(t) + + // For empty label names case, we need to mock the LabelNames query first + if len(tc.requestLabelNames) == 0 { + mockQueryBackend.On("Invoke", mock.Anything, mock.MatchedBy(func(req *queryv1.InvokeRequest) bool { + return len(req.Query) > 0 && req.Query[0].QueryType == queryv1.QueryType_QUERY_LABEL_NAMES + })).Return(&queryv1.InvokeResponse{ + Reports: []*queryv1.Report{ + { + ReportType: queryv1.ReportType_REPORT_LABEL_NAMES, + LabelNames: &queryv1.LabelNamesReport{ + LabelNames: tc.backendLabelNames, + }, + }, + }, + }, nil).Once() + } + + // Mock the Series query specifically + mockQueryBackend.On("Invoke", mock.Anything, mock.MatchedBy(func(req *queryv1.InvokeRequest) bool { + return len(req.Query) > 0 && req.Query[0].QueryType == queryv1.QueryType_QUERY_SERIES_LABELS + })).Run(func(args mock.Arguments) { + invReq := args.Get(1).(*queryv1.InvokeRequest) + if len(invReq.Query) > 0 && invReq.Query[0].SeriesLabels != nil { + capturedLabelNames = invReq.Query[0].SeriesLabels.LabelNames + if capturedLabelNames == nil { + capturedLabelNames = []string{} + } + } + }).Return(&queryv1.InvokeResponse{ + Reports: []*queryv1.Report{ + { + ReportType: queryv1.ReportType_REPORT_SERIES_LABELS, + SeriesLabels: &queryv1.SeriesLabelsReport{ + SeriesLabels: []*typesv1.Labels{}, + }, + }, + }, + }, nil).Once() + + mockLimits := mockfrontend.NewMockLimits(t) + mockLimits.On("MaxQueryLookback", "test-tenant").Return(time.Duration(0)) + mockLimits.On("MaxQueryLength", "test-tenant").Return(time.Duration(0)) + mockLimits.On("QuerySanitizeOnMerge", "test-tenant").Return(true) + mockMetadataClient := new(mockmetastorev1.MockMetadataQueryServiceClient) + mockMetadataClient.On("QueryMetadata", mock.Anything, mock.Anything).Return(&metastorev1.QueryMetadataResponse{ + Blocks: []*metastorev1.BlockMeta{{Id: "test-block"}}, + }, nil) + + qf := NewQueryFrontend( + log.NewNopLogger(), + mockLimits, + mockMetadataClient, + nil, + mockQueryBackend, + nil, + ) + + ctx := tenant.InjectTenantID(context.Background(), "test-tenant") + if tc.setCapabilities { + ctx = featureflags.WithClientCapabilities(ctx, featureflags.ClientCapabilities{ + AllowUtf8LabelNames: tc.allowUtf8LabelNames, + }) + } + + req := connect.NewRequest(&querierv1.SeriesRequest{ + Matchers: []string{`{service_name="test"}`}, + LabelNames: tc.requestLabelNames, + Start: 1000, + End: 2000, + }) + + _, err := qf.Series(ctx, req) + require.NoError(t, err) + + // Verify that the label names were filtered correctly before being sent to backend + require.Equal(t, tc.expectedQueryRequest, capturedLabelNames, + "Expected label names sent to backend to be %v, but got %v", tc.expectedQueryRequest, capturedLabelNames) + }) + } +} diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index dd6c66ef27..5c4f98fa39 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -31,6 +31,7 @@ import ( querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" "github.com/grafana/pyroscope/pkg/clientpool" + "github.com/grafana/pyroscope/pkg/featureflags" phlaremodel "github.com/grafana/pyroscope/pkg/model" objstoreclient "github.com/grafana/pyroscope/pkg/objstore/client" "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" @@ -170,6 +171,206 @@ func Test_QueryLabelNames(t *testing.T) { require.Equal(t, []string{"bar", "buzz", "foo"}, out.Msg.Names) } +func Test_filterLabelNames(t *testing.T) { + tests := []struct { + name string + labelNames []string + want []string + }{ + { + name: "empty slice", + labelNames: []string{}, + want: []string{}, + }, + { + name: "nil slice", + labelNames: nil, + want: []string{}, + }, + { + name: "all valid label names", + labelNames: []string{"foo", "bar", "service_name", "ServiceName123"}, + want: []string{"foo", "bar", "service_name", "ServiceName123"}, + }, + { + name: "all invalid label names", + labelNames: []string{"123service", "service-name", "service name", "世界"}, + want: []string{}, + }, + { + name: "mix of valid and invalid label names", + labelNames: []string{"foo", "123invalid", "bar", "invalid-name", "buzz", "世界"}, + want: []string{"foo", "bar", "buzz"}, + }, + { + name: "label names with dots (valid but get sanitized)", + labelNames: []string{"service.name", "app.version", "valid_label"}, + want: []string{"service.name", "app.version", "valid_label"}, + }, + { + name: "single valid label", + labelNames: []string{"service"}, + want: []string{"service"}, + }, + { + name: "single invalid label", + labelNames: []string{"123invalid"}, + want: []string{}, + }, + { + name: "labels with underscores", + labelNames: []string{"_", "__a", "__a__", "_service_name_"}, + want: []string{"_", "__a", "__a__", "_service_name_"}, + }, + { + name: "mixed valid labels with dots and underscores", + labelNames: []string{"a.b.c", "a_b_c", "a.b_c.d"}, + want: []string{"a.b.c", "a_b_c", "a.b_c.d"}, + }, + { + name: "labels starting with dots (valid, dots are sanitized)", + labelNames: []string{".abc", "..def", ".g.h.i"}, + want: []string{".abc", "..def", ".g.h.i"}, + }, + { + name: "labels starting with invalid characters", + labelNames: []string{"-abc", "0abc", "123xyz"}, + want: []string{}, + }, + { + name: "empty string in slice (invalid)", + labelNames: []string{"foo", "", "bar"}, + want: []string{"foo", "bar"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := filterLabelNames(tt.labelNames) + require.Equal(t, tt.want, got) + }) + } +} + +func Test_QueryLabelNames_WithFiltering(t *testing.T) { + req := connect.NewRequest(&typesv1.LabelNamesRequest{}) + + tests := []struct { + name string + allowUtf8LabelNames bool + setCapabilities bool + ingesterLabelNames map[string][]string + expectedLabelNames []string + }{ + { + name: "UTF8 labels allowed when enabled", + allowUtf8LabelNames: true, + setCapabilities: true, + ingesterLabelNames: map[string][]string{ + "1": {"foo", "bar", "世界"}, + "2": {"foo", "bar", "世界"}, + "3": {"foo", "bar", "世界"}, + }, + // UTF8 labels pass through when UTF8 is enabled + expectedLabelNames: []string{"bar", "foo", "世界"}, + }, + { + name: "UTF8 labels filtered when disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + ingesterLabelNames: map[string][]string{ + "1": {"foo", "bar", "世界"}, + "2": {"foo", "bar", "世界"}, + "3": {"foo", "bar", "世界"}, + }, + // Only legacy-valid labels pass through (UTF8 filtered out) + expectedLabelNames: []string{"bar", "foo"}, + }, + { + name: "all labels pass through when UTF8 enabled including invalid ones", + allowUtf8LabelNames: true, + setCapabilities: true, + ingesterLabelNames: map[string][]string{ + "1": {"foo", "123invalid"}, + "2": {"foo", "123invalid"}, + "3": {"foo", "123invalid"}, + }, + // When UTF8 is enabled, NO filtering happens - even invalid labels pass through + expectedLabelNames: []string{"123invalid", "foo"}, + }, + { + name: "filtering enabled when no capabilities set", + setCapabilities: false, + ingesterLabelNames: map[string][]string{ + "1": {"valid_name", "123invalid", "世界"}, + "2": {"valid_name", "123invalid", "世界"}, + "3": {"valid_name", "123invalid", "世界"}, + }, + // No capabilities means legacy-only mode - UTF8 and invalid labels filtered + expectedLabelNames: []string{"valid_name"}, + }, + { + name: "all valid legacy labels pass through when filtering enabled", + allowUtf8LabelNames: false, + setCapabilities: true, + ingesterLabelNames: map[string][]string{ + "1": {"foo", "bar"}, + "2": {"bar", "buzz"}, + "3": {"buzz", "foo"}, + }, + expectedLabelNames: []string{"bar", "buzz", "foo"}, + }, + { + name: "labels with dots pass through in both modes", + allowUtf8LabelNames: false, + setCapabilities: true, + ingesterLabelNames: map[string][]string{ + "1": {"service.name", "app.version"}, + "2": {"host.name", "service.name"}, + "3": {"app.version", "host.name"}, + }, + // Dots are valid legacy characters (get sanitized to underscores) + expectedLabelNames: []string{"app.version", "host.name", "service.name"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + querier, err := New(&NewQuerierParams{ + Cfg: Config{ + PoolConfig: clientpool.PoolConfig{ClientCleanupPeriod: 1 * time.Millisecond}, + }, + IngestersRing: testhelper.NewMockRing([]ring.InstanceDesc{ + {Addr: "1"}, + {Addr: "2"}, + {Addr: "3"}, + }, 3), + PoolFactory: &poolFactory{f: func(addr string) (client.PoolClient, error) { + q := newFakeQuerier() + if labelNames, ok := tc.ingesterLabelNames[addr]; ok { + q.On("LabelNames", mock.Anything, mock.Anything).Return( + connect.NewResponse(&typesv1.LabelNamesResponse{Names: labelNames}), nil) + } + return q, nil + }}, + Logger: log.NewLogfmtLogger(os.Stdout), + }) + require.NoError(t, err) + + ctx := context.Background() + if tc.setCapabilities { + ctx = featureflags.WithClientCapabilities(ctx, featureflags.ClientCapabilities{ + AllowUtf8LabelNames: tc.allowUtf8LabelNames, + }) + } + + out, err := querier.LabelNames(ctx, req) + require.NoError(t, err) + require.Equal(t, tc.expectedLabelNames, out.Msg.Names) + }) + } +} + func Test_Series(t *testing.T) { foobarlabels := phlaremodel.NewLabelsBuilder(nil).Set("foo", "bar") foobuzzlabels := phlaremodel.NewLabelsBuilder(nil).Set("foo", "buzz") @@ -191,10 +392,13 @@ func Test_Series(t *testing.T) { q := newFakeQuerier() switch addr { case "1": + q.On("LabelNames", mock.Anything, mock.Anything).Return(connect.NewResponse(&typesv1.LabelNamesResponse{Names: []string{"foo", "bar"}}), nil) q.On("Series", mock.Anything, mock.Anything).Return(ingesterResponse, nil) case "2": + q.On("LabelNames", mock.Anything, mock.Anything).Return(connect.NewResponse(&typesv1.LabelNamesResponse{Names: []string{"foo", "bar"}}), nil) q.On("Series", mock.Anything, mock.Anything).Return(ingesterResponse, nil) case "3": + q.On("LabelNames", mock.Anything, mock.Anything).Return(connect.NewResponse(&typesv1.LabelNamesResponse{Names: []string{"foo", "bar"}}), nil) q.On("Series", mock.Anything, mock.Anything).Return(ingesterResponse, nil) } return q, nil @@ -211,6 +415,124 @@ func Test_Series(t *testing.T) { }, out.Msg.LabelsSet) } +func Test_Series_WithLabelNameFiltering(t *testing.T) { + tests := []struct { + name string + allowUtf8LabelNames bool + setCapabilities bool + requestLabelNames []string + expectedLabelNames []string + }{ + { + name: "all label names pass through when UTF8 enabled", + allowUtf8LabelNames: true, + setCapabilities: true, + requestLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + }, + { + name: "invalid label names filtered when UTF8 disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"valid_name", "123invalid", "invalid-hyphen", "世界"}, + expectedLabelNames: []string{"valid_name"}, + }, + { + name: "UTF8 labels filtered when UTF8 disabled", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"foo", "bar", "世界", "日本語"}, + expectedLabelNames: []string{"foo", "bar"}, + }, + { + name: "filtering enabled when no capabilities set", + setCapabilities: false, + requestLabelNames: []string{"foo", "123invalid", "世界"}, + expectedLabelNames: []string{"foo"}, + }, + { + name: "all valid labels pass through", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"foo", "bar", "service_name"}, + expectedLabelNames: []string{"foo", "bar", "service_name"}, + }, + { + name: "labels with dots pass through", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{"service.name", "app.version"}, + expectedLabelNames: []string{"service.name", "app.version"}, + }, + { + name: "empty label names with UTF8 disabled queries all labels and filters", + allowUtf8LabelNames: false, + setCapabilities: true, + requestLabelNames: []string{}, + expectedLabelNames: []string{"bar", "foo"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + foobarlabels := phlaremodel.NewLabelsBuilder(nil).Set("foo", "bar") + + var capturedLabelNames []string + + ingesterResponse := connect.NewResponse(&ingestv1.SeriesResponse{LabelsSet: []*typesv1.Labels{ + {Labels: foobarlabels.Labels()}, + }}) + + querier, err := New(&NewQuerierParams{ + Cfg: Config{ + PoolConfig: clientpool.PoolConfig{ClientCleanupPeriod: 1 * time.Millisecond}, + }, + IngestersRing: testhelper.NewMockRing([]ring.InstanceDesc{ + {Addr: "1"}, + {Addr: "2"}, + {Addr: "3"}, + }, 3), + PoolFactory: &poolFactory{f: func(addr string) (client.PoolClient, error) { + q := newFakeQuerier() + + q.On("Series", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { + req := args.Get(1).(*connect.Request[ingestv1.SeriesRequest]) + capturedLabelNames = req.Msg.LabelNames + }).Return(ingesterResponse, nil) + + if len(tc.requestLabelNames) == 0 { + q.On("LabelNames", mock.Anything, mock.Anything).Return( + connect.NewResponse(&typesv1.LabelNamesResponse{Names: []string{"foo", "bar", "世界"}}), nil) + } + + return q, nil + }}, + Logger: log.NewLogfmtLogger(os.Stdout), + }) + require.NoError(t, err) + + ctx := context.Background() + if tc.setCapabilities { + ctx = featureflags.WithClientCapabilities(ctx, featureflags.ClientCapabilities{ + AllowUtf8LabelNames: tc.allowUtf8LabelNames, + }) + } + + req := connect.NewRequest(&querierv1.SeriesRequest{ + Matchers: []string{`{foo="bar"}`}, + LabelNames: tc.requestLabelNames, + }) + + _, err = querier.Series(ctx, req) + require.NoError(t, err) + + // Verify that the label names were filtered correctly before being sent to ingester + require.Equal(t, tc.expectedLabelNames, capturedLabelNames, + "Expected label names sent to ingester to be %v, but got %v", tc.expectedLabelNames, capturedLabelNames) + }) + } +} + func newBlockMeta(ulids ...string) *connect.Response[ingestv1.BlockMetadataResponse] { resp := &ingestv1.BlockMetadataResponse{} From 705b47ef9ed310a7d8cf9c1d7e76810233d677aa Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:18:39 -0400 Subject: [PATCH 10/15] fmt/lint --- pkg/querier/querier.go | 3 ++- pkg/validation/validate.go | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 19a6d27d80..cebf26a5f7 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -17,7 +17,6 @@ import ( ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/grafana/dskit/tenant" - "github.com/grafana/pyroscope/pkg/featureflags" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" "github.com/pkg/errors" @@ -28,6 +27,8 @@ import ( "github.com/samber/lo" "golang.org/x/sync/errgroup" + "github.com/grafana/pyroscope/pkg/featureflags" + googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" ingestv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index 0984ea76d1..b47f16032e 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -215,7 +215,6 @@ func handleSanitizedLabel(ls []*typesv1.LabelPair, origIdx int, origName, newNam // and returns the sanitized value. Legacy label names are non utf-8 and contain characters // [a-zA-Z0-9_-]. // -// // The only sanitization the function makes is replacing dots with underscores. func SanitizeLegacyLabelName(ln string) (old, sanitized string, ok bool) { if len(ln) == 0 { From 84bc1b2e657bd3b0365f90bfa63c72a14dcca4a5 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:28:23 -0400 Subject: [PATCH 11/15] Fix comment on legacy label name valid chars --- pkg/validation/validate.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/validation/validate.go b/pkg/validation/validate.go index b47f16032e..240c921088 100644 --- a/pkg/validation/validate.go +++ b/pkg/validation/validate.go @@ -213,7 +213,7 @@ func handleSanitizedLabel(ls []*typesv1.LabelPair, origIdx int, origName, newNam // SanitizeLegacyLabelName reports whether the label name is a valid legacy label name, // and returns the sanitized value. Legacy label names are non utf-8 and contain characters -// [a-zA-Z0-9_-]. +// [a-zA-Z0-9_.]. // // The only sanitization the function makes is replacing dots with underscores. func SanitizeLegacyLabelName(ln string) (old, sanitized string, ok bool) { From b9d1ec264fd3dc5d5c068701e5c166bca85a29e0 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:45:55 -0400 Subject: [PATCH 12/15] Fix race condition in test --- pkg/querier/querier_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index 5c4f98fa39..e43ea9e439 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -9,6 +9,7 @@ import ( "os" "path" "sort" + "sync" "testing" "time" @@ -478,6 +479,7 @@ func Test_Series_WithLabelNameFiltering(t *testing.T) { foobarlabels := phlaremodel.NewLabelsBuilder(nil).Set("foo", "bar") var capturedLabelNames []string + var capturedMutex sync.Mutex ingesterResponse := connect.NewResponse(&ingestv1.SeriesResponse{LabelsSet: []*typesv1.Labels{ {Labels: foobarlabels.Labels()}, @@ -497,6 +499,8 @@ func Test_Series_WithLabelNameFiltering(t *testing.T) { q.On("Series", mock.Anything, mock.Anything).Run(func(args mock.Arguments) { req := args.Get(1).(*connect.Request[ingestv1.SeriesRequest]) + capturedMutex.Lock() + defer capturedMutex.Unlock() capturedLabelNames = req.Msg.LabelNames }).Return(ingesterResponse, nil) @@ -527,8 +531,11 @@ func Test_Series_WithLabelNameFiltering(t *testing.T) { require.NoError(t, err) // Verify that the label names were filtered correctly before being sent to ingester - require.Equal(t, tc.expectedLabelNames, capturedLabelNames, - "Expected label names sent to ingester to be %v, but got %v", tc.expectedLabelNames, capturedLabelNames) + capturedMutex.Lock() + actualLabelNames := capturedLabelNames + capturedMutex.Unlock() + require.Equal(t, tc.expectedLabelNames, actualLabelNames, + "Expected label names sent to ingester to be %v, but got %v", tc.expectedLabelNames, actualLabelNames) }) } } From 9b0d790c94f953560d0e60ee47ae43f81143a2dc Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Wed, 8 Oct 2025 09:43:07 -0400 Subject: [PATCH 13/15] Use `LabelNames` instead of backdoor query --- .../queryfrontend/query_series_labels.go | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index fa0d303f78..0a0dfdfd08 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -6,6 +6,7 @@ import ( "connectrpc.com/connect" "github.com/go-kit/log/level" "github.com/grafana/dskit/tenant" + typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1" @@ -17,7 +18,7 @@ func (q *QueryFrontend) filterLabelNames( ctx context.Context, c *connect.Request[querierv1.SeriesRequest], labelNames []string, - labelSelector string, + matchers []string, ) ([]string, error) { if capabilities, ok := featureflags.GetClientCapabilities(ctx); ok && capabilities.AllowUtf8LabelNames { return labelNames, nil @@ -29,20 +30,17 @@ func (q *QueryFrontend) filterLabelNames( // Filter out label names not passing legacy validation if utf8 label names not enabled if len(labelNames) == 0 { // Querying for all label names; must retrieve all label names to then filter out - report, err := q.querySingle(ctx, &queryv1.QueryRequest{ - StartTime: c.Msg.Start, - EndTime: c.Msg.End, - LabelSelector: labelSelector, - Query: []*queryv1.Query{{ - QueryType: queryv1.QueryType_QUERY_LABEL_NAMES, - LabelNames: &queryv1.LabelNamesQuery{}, - }}, - }) + response, err := q.LabelNames(ctx, connect.NewRequest(&typesv1.LabelNamesRequest{ + Start: c.Msg.Start, + End: c.Msg.End, + Matchers: matchers, + })) + if err != nil { return nil, err } - if report != nil { - toFilter = report.LabelNames.LabelNames + if response != nil { + toFilter = response.Msg.Names } } @@ -83,7 +81,7 @@ func (q *QueryFrontend) Series( return nil, connect.NewError(connect.CodeInvalidArgument, err) } - labelNames, err := q.filterLabelNames(ctx, c, c.Msg.LabelNames, labelSelector) + labelNames, err := q.filterLabelNames(ctx, c, c.Msg.LabelNames, c.Msg.Matchers) if err != nil { return nil, err } From f41512ffd4ea25519a79921084f22786a62b75a7 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Wed, 8 Oct 2025 11:02:21 -0400 Subject: [PATCH 14/15] Update querier label name filtering test --- pkg/querier/querier_test.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index e43ea9e439..a3c563193d 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -472,19 +472,21 @@ func Test_Series_WithLabelNameFiltering(t *testing.T) { requestLabelNames: []string{}, expectedLabelNames: []string{"bar", "foo"}, }, + { + name: "empty label names with UTF8 enabled returns ingester labels without filtering", + allowUtf8LabelNames: true, + setCapabilities: true, + requestLabelNames: []string{}, + expectedLabelNames: []string{"日本語", "bar"}, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - foobarlabels := phlaremodel.NewLabelsBuilder(nil).Set("foo", "bar") - + ingesterLabels := []string{"日本語", "bar"} var capturedLabelNames []string var capturedMutex sync.Mutex - ingesterResponse := connect.NewResponse(&ingestv1.SeriesResponse{LabelsSet: []*typesv1.Labels{ - {Labels: foobarlabels.Labels()}, - }}) - querier, err := New(&NewQuerierParams{ Cfg: Config{ PoolConfig: clientpool.PoolConfig{ClientCleanupPeriod: 1 * time.Millisecond}, @@ -501,8 +503,15 @@ func Test_Series_WithLabelNameFiltering(t *testing.T) { req := args.Get(1).(*connect.Request[ingestv1.SeriesRequest]) capturedMutex.Lock() defer capturedMutex.Unlock() - capturedLabelNames = req.Msg.LabelNames - }).Return(ingesterResponse, nil) + + // If no labels passed to ingester series request, + // ingester returns all ingester labels + if len(req.Msg.LabelNames) == 0 { + capturedLabelNames = ingesterLabels + } else { + capturedLabelNames = req.Msg.LabelNames + } + }).Return(connect.NewResponse(&ingestv1.SeriesResponse{}), nil) if len(tc.requestLabelNames) == 0 { q.On("LabelNames", mock.Anything, mock.Anything).Return( From 55810ebf14c5ded7f0f3fd05017d2088c6c3ebf5 Mon Sep 17 00:00:00 2001 From: Jake Kramer <899428+jake-kramer@users.noreply.github.com> Date: Wed, 8 Oct 2025 11:03:24 -0400 Subject: [PATCH 15/15] fmt --- pkg/frontend/readpath/queryfrontend/query_series_labels.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/frontend/readpath/queryfrontend/query_series_labels.go b/pkg/frontend/readpath/queryfrontend/query_series_labels.go index 0a0dfdfd08..763ec92fe3 100644 --- a/pkg/frontend/readpath/queryfrontend/query_series_labels.go +++ b/pkg/frontend/readpath/queryfrontend/query_series_labels.go @@ -6,6 +6,7 @@ import ( "connectrpc.com/connect" "github.com/go-kit/log/level" "github.com/grafana/dskit/tenant" + typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1"