From 59144c618defd95dd8bc65c875b1cebd5ef5b255 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 11 Jan 2023 12:10:32 -0500 Subject: [PATCH] Prep for v0.30.2 (#2723) * prometheus.relabel: clone labels before relabeling (#2701) This commit clones the label set before applying relabels. Not cloning does two things: 1. It forces the computed ID of the incoming series to change (as its labels changed) 2. It can cause obscure bugs with relabel rules being applied, such as a `keep` action which doesn't work after modifying the original slice. * component/common/loki: drop unqueued logs after 5 seconds on shutdown (#2721) Fix an issue where being unable to send logs to `loki.write` due to the client being permanently backlogged would deadlock the Flow controller. The `loki.write` client may be permanently backlogged when: * Limits are reached when sending logs to Loki, leading to endless request retries. * Loki has an extended outage. When an EntryHandler is stopped, it will wait for 5 seconds before forcibly stopping the goroutine which queues log entries. If this timeout is reached, any unqueued log entries are permanently lost, as the positions file will likely be updated past the point where the entry was read. While losing logs is not ideal, it's unacceptable for any Flow component to be able to block the controller. This is a short-term solution to allow the Flow controller to continue working properly. A long term solution would be to use a Write-Ahead Log (WAL) for log entries. See grafana/loki#7993. Fixes #2716. Related to grafana/loki#2361. * prepare for v0.30.2 release * address review feedback * operator: Use enableHttp2 field as boolean in libsonnet templates (#2724) Signed-off-by: Paschalis Tsilias Signed-off-by: Paschalis Tsilias Co-authored-by: Paschalis Tsilias --- CHANGELOG.md | 18 ++++++- component/common/loki/types.go | 54 +++++++++++++++++-- component/prometheus/relabel/relabel.go | 4 +- .../integrations/node-exporter-config.md | 4 +- .../integrations/process-exporter-config.md | 4 +- .../operator/custom-resource-quickstart.md | 2 +- docs/sources/operator/getting-started.md | 2 +- docs/sources/set-up/install-agent-docker.md | 2 +- .../component/metrics/pod_monitor.libsonnet | 2 +- .../metrics/service_monitor.libsonnet | 2 +- pkg/operator/defaults.go | 1 + production/grafanacloud-install.sh | 2 +- production/kubernetes/agent-bare.yaml | 2 +- production/kubernetes/agent-loki.yaml | 2 +- production/kubernetes/agent-traces.yaml | 2 +- .../kubernetes/build/lib/version.libsonnet | 2 +- .../build/templates/operator/main.jsonnet | 4 +- production/kubernetes/install-bare.sh | 2 +- .../operator/templates/agent-operator.yaml | 4 +- .../tanka/grafana-agent/v1/main.libsonnet | 4 +- .../grafana-agent/v2/internal/base.libsonnet | 4 +- .../v2/internal/syncer.libsonnet | 2 +- 22 files changed, 94 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a12e8f37dd7..94310e648120 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,22 @@ This document contains a historical list of changes between releases. Only changes that impact end-user behavior are listed; changes to documentation or internal API changes are not present. -Main (unreleased) ------------------ +v0.30.2 (2023-01-11) +-------------------- + +### Bugfixes + +- Flow: `prometheus.relabel` will no longer modify the labels of the original + metrics, which could lead to the incorrect application of relabel rules on + subsequent relabels. (@rfratto) + +- Flow: `loki.source.file` will no longer deadlock other components if log + lines cannot be sent to Loki. `loki.source.file` will wait for 5 seconds per + file to finish flushing read logs to the client, after which it will drop + them, resulting in lost logs. (@rfratto) + +- Operator: Fix the handling of the enableHttp2 field as a boolean in + `pod_monitor` and `service_monitor` templates. (@tpaschalis) v0.30.1 (2022-12-23) -------------------- diff --git a/component/common/loki/types.go b/component/common/loki/types.go index 5b287756a133..f78333e1eab2 100644 --- a/component/common/loki/types.go +++ b/component/common/loki/types.go @@ -5,7 +5,9 @@ package loki // to relabeling, stages and finally batched in a client to be written to Loki. import ( + "context" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -13,6 +15,14 @@ import ( "github.com/grafana/loki/pkg/logproto" ) +// finalEntryTimeout is how long NewEntryMutatorHandler will wait before giving +// up on sending the final log entry. If this timeout is reached, the final log +// entry is permanently lost. +// +// This timeout can only be reached if the loki.write client is backlogged due +// to an outage or erroring (such as limits being hit). +const finalEntryTimeout = 5 * time.Second + // LogsReceiver is an alias for chan Entry which will be used for component // communication type LogsReceiver chan Entry @@ -77,17 +87,53 @@ func NewEntryHandler(entries chan<- Entry, stop func()) EntryHandler { // NewEntryMutatorHandler creates a EntryHandler that mutates incoming entries from another EntryHandler. func NewEntryMutatorHandler(next EntryHandler, f EntryMutatorFunc) EntryHandler { - in, wg, once := make(chan Entry), sync.WaitGroup{}, sync.Once{} - nextChan := next.Chan() + var ( + ctx, cancel = context.WithCancel(context.Background()) + + in = make(chan Entry) + nextChan = next.Chan() + ) + + var wg sync.WaitGroup wg.Add(1) + go func() { defer wg.Done() + defer cancel() + for e := range in { - nextChan <- f(e) + select { + case <-ctx.Done(): + // This is a hard stop to the reading goroutine. Anything not forwarded + // to nextChan at this point will probably be permanently lost, since + // the positions file has likely already updated to a byte offset past + // the read entry. + // + // TODO(rfratto): revisit whether this logic is necessary after we have + // a WAL for logs. + return + case nextChan <- f(e): + // no-op; log entry has been queued for sending. + } } }() + + var closeOnce sync.Once return NewEntryHandler(in, func() { - once.Do(func() { close(in) }) + closeOnce.Do(func() { + close(in) + + select { + case <-ctx.Done(): + // The goroutine above exited on its own so we don't have to wait for + // the timeout. + case <-time.After(finalEntryTimeout): + // We reached the timeout for sending the final entry to nextChan; + // request a hard stop from the reading goroutine. + cancel() + } + }) + wg.Wait() }) } diff --git a/component/prometheus/relabel/relabel.go b/component/prometheus/relabel/relabel.go index 0ead7ee15bc0..2ea87c6bebd6 100644 --- a/component/prometheus/relabel/relabel.go +++ b/component/prometheus/relabel/relabel.go @@ -173,7 +173,9 @@ func (c *Component) relabel(val float64, lbls labels.Labels) labels.Labels { relabelled = newLbls.labels } } else { - relabelled = relabel.Process(lbls, c.mrc...) + // Relabel against a copy of the labels to prevent modifying the original + // slice. + relabelled = relabel.Process(lbls.Copy(), c.mrc...) c.cacheMisses.Inc() c.cacheSize.Inc() c.addToCache(globalRef, relabelled) diff --git a/docs/sources/configuration/integrations/node-exporter-config.md b/docs/sources/configuration/integrations/node-exporter-config.md index da340241cb1c..5edcecf73ae5 100644 --- a/docs/sources/configuration/integrations/node-exporter-config.md +++ b/docs/sources/configuration/integrations/node-exporter-config.md @@ -28,7 +28,7 @@ docker run \ -v "/proc:/host/proc:ro,rslave" \ -v /tmp/agent:/etc/agent \ -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:v0.30.1 \ + grafana/agent:v0.30.2 \ --config.file=/etc/agent-config/agent.yaml ``` @@ -67,7 +67,7 @@ metadata: name: agent spec: containers: - - image: grafana/agent:v0.30.0 + - image: grafana/agent:v0.30.2 name: agent args: - --config.file=/etc/agent-config/agent.yaml diff --git a/docs/sources/configuration/integrations/process-exporter-config.md b/docs/sources/configuration/integrations/process-exporter-config.md index 4b8e358cf304..386b25f937d2 100644 --- a/docs/sources/configuration/integrations/process-exporter-config.md +++ b/docs/sources/configuration/integrations/process-exporter-config.md @@ -20,7 +20,7 @@ docker run \ -v "/proc:/proc:ro" \ -v /tmp/agent:/etc/agent \ -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:v0.30.1 \ + grafana/agent:v0.30.2 \ --config.file=/etc/agent-config/agent.yaml ``` @@ -37,7 +37,7 @@ metadata: name: agent spec: containers: - - image: grafana/agent:v0.30.1 + - image: grafana/agent:v0.30.2 name: agent args: - --config.file=/etc/agent-config/agent.yaml diff --git a/docs/sources/operator/custom-resource-quickstart.md b/docs/sources/operator/custom-resource-quickstart.md index 559d3869d32c..4c09526e3115 100644 --- a/docs/sources/operator/custom-resource-quickstart.md +++ b/docs/sources/operator/custom-resource-quickstart.md @@ -45,7 +45,7 @@ metadata: labels: app: grafana-agent spec: - image: grafana/agent:v0.30.1 + image: grafana/agent:v0.30.2 logLevel: info serviceAccountName: grafana-agent metrics: diff --git a/docs/sources/operator/getting-started.md b/docs/sources/operator/getting-started.md index 8ed2c4d53f4d..0969e4880beb 100644 --- a/docs/sources/operator/getting-started.md +++ b/docs/sources/operator/getting-started.md @@ -74,7 +74,7 @@ spec: serviceAccountName: grafana-agent-operator containers: - name: operator - image: grafana/agent-operator:v0.30.1 + image: grafana/agent-operator:v0.30.2 args: - --kubelet-service=default/kubelet --- diff --git a/docs/sources/set-up/install-agent-docker.md b/docs/sources/set-up/install-agent-docker.md index c9af58b7d2a5..fe8f9153c5f5 100644 --- a/docs/sources/set-up/install-agent-docker.md +++ b/docs/sources/set-up/install-agent-docker.md @@ -21,7 +21,7 @@ Install Grafana Agent and get it up and running on Docker. docker run \ -v /tmp/agent:/etc/agent/data \ -v /path/to/config.yaml:/etc/agent/agent.yaml \ - grafana/agent:v0.30.1 + grafana/agent:v0.30.2 ``` 2. Replace `/tmp/agent` with the folder you want to store WAL data in. diff --git a/pkg/operator/config/templates/component/metrics/pod_monitor.libsonnet b/pkg/operator/config/templates/component/metrics/pod_monitor.libsonnet index 58cca2e7df3a..dbf3b30c859c 100644 --- a/pkg/operator/config/templates/component/metrics/pod_monitor.libsonnet +++ b/pkg/operator/config/templates/component/metrics/pod_monitor.libsonnet @@ -63,7 +63,7 @@ function( proxy_url: optionals.string(endpoint.ProxyURL), params: optionals.object(endpoint.Params), scheme: optionals.string(endpoint.Scheme), - enable_http2: optionals.string(endpoint.EnableHttp2), + enable_http2: optionals.bool(endpoint.EnableHttp2), // NOTE(rfratto): unlike ServiceMonitor, pod monitors explicitly use // SafeTLSConfig. diff --git a/pkg/operator/config/templates/component/metrics/service_monitor.libsonnet b/pkg/operator/config/templates/component/metrics/service_monitor.libsonnet index 98ed9e53e2f6..99ccedff3fed 100644 --- a/pkg/operator/config/templates/component/metrics/service_monitor.libsonnet +++ b/pkg/operator/config/templates/component/metrics/service_monitor.libsonnet @@ -63,7 +63,7 @@ function( proxy_url: optionals.string(endpoint.ProxyURL), params: optionals.object(endpoint.Params), scheme: optionals.string(endpoint.Scheme), - enable_http2: optionals.string(endpoint.EnableHttp2), + enable_http2: optionals.bool(endpoint.EnableHttp2), tls_config: if endpoint.TLSConfig != null then new_tls_config(meta.Namespace, endpoint.TLSConfig), diff --git a/pkg/operator/defaults.go b/pkg/operator/defaults.go index 639df5c73b0f..9bd66bbf1eb1 100644 --- a/pkg/operator/defaults.go +++ b/pkg/operator/defaults.go @@ -35,6 +35,7 @@ var ( "v0.29.0", "v0.30.0", "v0.30.1", + "v0.30.2", // NOTE(rfratto): when performing an upgrade, add the newest version above instead of changing the existing reference. } diff --git a/production/grafanacloud-install.sh b/production/grafanacloud-install.sh index a5e0f04a4833..951893cf12db 100755 --- a/production/grafanacloud-install.sh +++ b/production/grafanacloud-install.sh @@ -50,7 +50,7 @@ PACKAGE_SYSTEM=${PACKAGE_SYSTEM:=} # # Global constants. # -RELEASE_VERSION="v0.30.1" +RELEASE_VERSION="v0.30.2" # The DEB and RPM urls don't include the `v` version prefix in the file names, # so we trim it out using ${RELEASE_VERSION#v} below. diff --git a/production/kubernetes/agent-bare.yaml b/production/kubernetes/agent-bare.yaml index fc382932cb11..d9c900033e98 100644 --- a/production/kubernetes/agent-bare.yaml +++ b/production/kubernetes/agent-bare.yaml @@ -84,7 +84,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: grafana/agent:v0.30.1 + image: grafana/agent:v0.30.2 imagePullPolicy: IfNotPresent name: grafana-agent ports: diff --git a/production/kubernetes/agent-loki.yaml b/production/kubernetes/agent-loki.yaml index e0b59852ef27..584c817657f3 100644 --- a/production/kubernetes/agent-loki.yaml +++ b/production/kubernetes/agent-loki.yaml @@ -66,7 +66,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: grafana/agent:v0.30.1 + image: grafana/agent:v0.30.2 imagePullPolicy: IfNotPresent name: grafana-agent-logs ports: diff --git a/production/kubernetes/agent-traces.yaml b/production/kubernetes/agent-traces.yaml index 556b4c3ad6e2..71c6f4e7a0ac 100644 --- a/production/kubernetes/agent-traces.yaml +++ b/production/kubernetes/agent-traces.yaml @@ -115,7 +115,7 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - image: grafana/agent:v0.30.1 + image: grafana/agent:v0.30.2 imagePullPolicy: IfNotPresent name: grafana-agent-traces ports: diff --git a/production/kubernetes/build/lib/version.libsonnet b/production/kubernetes/build/lib/version.libsonnet index 00a5b8eea4b2..0e9bb7c0d238 100644 --- a/production/kubernetes/build/lib/version.libsonnet +++ b/production/kubernetes/build/lib/version.libsonnet @@ -1 +1 @@ -'grafana/agent:v0.30.1' +'grafana/agent:v0.30.2' diff --git a/production/kubernetes/build/templates/operator/main.jsonnet b/production/kubernetes/build/templates/operator/main.jsonnet index 71789d097794..820b175b8a06 100644 --- a/production/kubernetes/build/templates/operator/main.jsonnet +++ b/production/kubernetes/build/templates/operator/main.jsonnet @@ -23,8 +23,8 @@ local ksm = import 'kube-state-metrics/kube-state-metrics.libsonnet'; local this = self, _images:: { - agent: 'grafana/agent:v0.30.1', - agent_operator: 'grafana/agent-operator:v0.30.1', + agent: 'grafana/agent:v0.30.2', + agent_operator: 'grafana/agent-operator:v0.30.2', ksm: 'registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.5.0', }, diff --git a/production/kubernetes/install-bare.sh b/production/kubernetes/install-bare.sh index 283634202de1..a28cd7d97044 100644 --- a/production/kubernetes/install-bare.sh +++ b/production/kubernetes/install-bare.sh @@ -25,7 +25,7 @@ check_installed() { check_installed curl check_installed envsubst -MANIFEST_BRANCH=v0.30.1 +MANIFEST_BRANCH=v0.30.2 MANIFEST_URL=${MANIFEST_URL:-https://raw.githubusercontent.com/grafana/agent/${MANIFEST_BRANCH}/production/kubernetes/agent-bare.yaml} NAMESPACE=${NAMESPACE:-default} diff --git a/production/operator/templates/agent-operator.yaml b/production/operator/templates/agent-operator.yaml index 21cf2f671ba2..5ef5de2e9ffd 100644 --- a/production/operator/templates/agent-operator.yaml +++ b/production/operator/templates/agent-operator.yaml @@ -372,7 +372,7 @@ spec: containers: - args: - --kubelet-service=default/kubelet - image: grafana/agent-operator:v0.30.1 + image: grafana/agent-operator:v0.30.2 imagePullPolicy: IfNotPresent name: grafana-agent-operator serviceAccount: grafana-agent-operator @@ -436,7 +436,7 @@ metadata: name: grafana-agent namespace: ${NAMESPACE} spec: - image: grafana/agent:v0.30.1 + image: grafana/agent:v0.30.2 integrations: selector: matchLabels: diff --git a/production/tanka/grafana-agent/v1/main.libsonnet b/production/tanka/grafana-agent/v1/main.libsonnet index 6bb31c3b533f..daab6d300bbe 100644 --- a/production/tanka/grafana-agent/v1/main.libsonnet +++ b/production/tanka/grafana-agent/v1/main.libsonnet @@ -15,8 +15,8 @@ local service = k.core.v1.service; (import './lib/traces.libsonnet') + { _images:: { - agent: 'grafana/agent:v0.30.1', - agentctl: 'grafana/agentctl:v0.30.1', + agent: 'grafana/agent:v0.30.2', + agentctl: 'grafana/agentctl:v0.30.2', }, // new creates a new DaemonSet deployment of the grafana-agent. By default, diff --git a/production/tanka/grafana-agent/v2/internal/base.libsonnet b/production/tanka/grafana-agent/v2/internal/base.libsonnet index bbddb2a5b67e..83906308f871 100644 --- a/production/tanka/grafana-agent/v2/internal/base.libsonnet +++ b/production/tanka/grafana-agent/v2/internal/base.libsonnet @@ -11,8 +11,8 @@ function(name='grafana-agent', namespace='') { local this = self, _images:: { - agent: 'grafana/agent:v0.30.1', - agentctl: 'grafana/agentctl:v0.30.1', + agent: 'grafana/agent:v0.30.2', + agentctl: 'grafana/agentctl:v0.30.2', }, _config:: { name: name, diff --git a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet index 324833acb98d..532e30f7b092 100644 --- a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet +++ b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet @@ -14,7 +14,7 @@ function( ) { local _config = { api: error 'api must be set', - image: 'grafana/agentctl:v0.30.1', + image: 'grafana/agentctl:v0.30.2', schedule: '*/5 * * * *', configs: [], } + config,