Merge branch 'release-v0.35' of github.com:grafana/agent into release…

…-v0.35
grafana · Jul 17, 2023 · 4ee67d6 · 4ee67d6
2 parents 2bee826 + 5d7e511
commit 4ee67d6
Show file tree

Hide file tree

Showing 27 changed files with 645 additions and 47 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,20 @@ This document contains a historical list of changes between releases. Only
 changes that impact end-user behavior are listed; changes to documentation or
 internal API changes are not present.
 
+v0.35.0-rc.1 (2023-07-17)
+-------------------------
+
+### Features
+
+- Add support for converting Prometheus `file_sd_config` to `discovery.file`. (@erikbaranowski)
+
+
+### Bugfixes
+
+- Fix issue where `remote.http` incorrectly had a status of "Unknown" until the
+  period specified by the polling frequency elapsed. (@rfratto)
+
+
 v0.35.0-rc.0 (2023-07-13)
 -------------------------
 
@@ -201,6 +215,8 @@ v0.35.0-rc.0 (2023-07-13)
 - Mongodb integration has been re-enabled. (@jcreixell, @marctc)
 - Build with go 1.20.6 (@captncraig)
 
+- Clustering for Grafana Agent in flow mode has graduated from experimental to beta.
+
 v0.34.3 (2023-06-27)
 --------------------
 

diff --git a/component/module/http/http.go b/component/module/http/http.go
@@ -139,8 +139,6 @@ func (c *Component) Update(args component.Arguments) error {
 
 // CurrentHealth implements component.HealthComponent.
 func (c *Component) CurrentHealth() component.Health {
-	// Note that it takes until the first successful poll for c.managedRemoteHTTP to
-	// become healthy.
 	leastHealthy := component.LeastHealthy(
 		c.managedRemoteHTTP.CurrentHealth(),
 		c.mod.CurrentHealth(),

diff --git a/component/remote/http/http.go b/component/remote/http/http.go
@@ -160,25 +160,25 @@ func (c *Component) nextPoll() time.Duration {
 // not be held when calling. After polling, the component's health is updated
 // with the success or failure status.
 func (c *Component) poll() {
-	startTime := time.Now()
 	err := c.pollError()
+	c.updatePollHealth(err)
+}
 
-	// NOTE(rfratto): to prevent the health from being inaccessible for longer
-	// than is needed, only update the health after the poll finished.
+func (c *Component) updatePollHealth(err error) {
 	c.healthMut.Lock()
 	defer c.healthMut.Unlock()
 
 	if err == nil {
 		c.health = component.Health{
 			Health:     component.HealthTypeHealthy,
 			Message:    "polled endpoint",
-			UpdateTime: startTime,
+			UpdateTime: time.Now(),
 		}
 	} else {
 		c.health = component.Health{
 			Health:     component.HealthTypeUnhealthy,
 			Message:    fmt.Sprintf("polling failed: %s", err),
-			UpdateTime: startTime,
+			UpdateTime: time.Now(),
 		}
 	}
 }
@@ -252,6 +252,7 @@ func (c *Component) Update(args component.Arguments) (err error) {
 			return
 		}
 		err = c.pollError()
+		c.updatePollHealth(err)
 	}()
 
 	c.mut.Lock()

diff --git a/converter/internal/prometheusconvert/file.go b/converter/internal/prometheusconvert/file.go
@@ -0,0 +1,34 @@
+package prometheusconvert
+
+import (
+	"time"
+
+	"github.com/grafana/agent/component/discovery"
+	"github.com/grafana/agent/component/discovery/file"
+	"github.com/grafana/agent/converter/diag"
+	"github.com/grafana/agent/converter/internal/common"
+	prom_file "github.com/prometheus/prometheus/discovery/file"
+)
+
+func appendDiscoveryFile(pb *prometheusBlocks, label string, sdConfig *prom_file.SDConfig) discovery.Exports {
+	discoveryFileArgs := toDiscoveryFile(sdConfig)
+	name := []string{"discovery", "file"}
+	block := common.NewBlockWithOverride(name, label, discoveryFileArgs)
+	pb.discoveryBlocks = append(pb.discoveryBlocks, newPrometheusBlock(block, name, label, "", ""))
+	return newDiscoverExports("discovery.file." + label + ".targets")
+}
+
+func validateDiscoveryFile(sdConfig *prom_file.SDConfig) diag.Diagnostics {
+	return make(diag.Diagnostics, 0)
+}
+
+func toDiscoveryFile(sdConfig *prom_file.SDConfig) *file.Arguments {
+	if sdConfig == nil {
+		return nil
+	}
+
+	return &file.Arguments{
+		Files:           sdConfig.Files,
+		RefreshInterval: time.Duration(sdConfig.RefreshInterval),
+	}
+}
diff --git a/converter/internal/prometheusconvert/prometheusconvert.go b/converter/internal/prometheusconvert/prometheusconvert.go
@@ -16,6 +16,7 @@ import (
 	prom_consul "github.com/prometheus/prometheus/discovery/consul"
 	prom_digitalocean "github.com/prometheus/prometheus/discovery/digitalocean"
 	prom_dns "github.com/prometheus/prometheus/discovery/dns"
+	prom_file "github.com/prometheus/prometheus/discovery/file"
 	prom_gce "github.com/prometheus/prometheus/discovery/gce"
 	prom_kubernetes "github.com/prometheus/prometheus/discovery/kubernetes"
 	prom_docker "github.com/prometheus/prometheus/discovery/moby"
@@ -121,6 +122,9 @@ func appendServiceDiscoveryConfigs(pb *prometheusBlocks, serviceDiscoveryConfig
 		case *prom_aws.EC2SDConfig:
 			labelCounts["ec2"]++
 			exports = appendDiscoveryEC2(pb, common.GetUniqueLabel(label, labelCounts["ec2"]), sdc)
+		case *prom_file.SDConfig:
+			labelCounts["file"]++
+			exports = appendDiscoveryFile(pb, common.GetUniqueLabel(label, labelCounts["file"]), sdc)
 		case *prom_gce.SDConfig:
 			labelCounts["gce"]++
 			exports = appendDiscoveryGCE(pb, common.GetUniqueLabel(label, labelCounts["gce"]), sdc)

diff --git a/converter/internal/prometheusconvert/testdata/file.river b/converter/internal/prometheusconvert/testdata/file.river
@@ -0,0 +1,43 @@
+discovery.file "prometheus1" {
+	files = ["/tmp/example_*.yaml", "/tmp/example2_*.yaml"]
+}
+
+discovery.file "prometheus2" {
+	files            = ["/tmp/example_*.yaml", "/tmp/example2_*.yaml"]
+	refresh_interval = "1m0s"
+}
+
+prometheus.scrape "prometheus1" {
+	targets = concat(
+		discovery.file.prometheus1.targets,
+		[{
+			__address__ = "localhost:9090",
+		}],
+	)
+	forward_to = [prometheus.remote_write.default.receiver]
+	job_name   = "prometheus1"
+}
+
+prometheus.scrape "prometheus2" {
+	targets    = discovery.file.prometheus2.targets
+	forward_to = [prometheus.remote_write.default.receiver]
+	job_name   = "prometheus2"
+}
+
+prometheus.remote_write "default" {
+	endpoint {
+		name           = "remote1"
+		url            = "http://remote-write-url1"
+		send_exemplars = false
+
+		queue_config {
+			capacity             = 2500
+			max_shards           = 200
+			max_samples_per_send = 500
+		}
+
+		metadata_config {
+			max_samples_per_send = 500
+		}
+	}
+}
diff --git a/converter/internal/prometheusconvert/testdata/file.yaml b/converter/internal/prometheusconvert/testdata/file.yaml
@@ -0,0 +1,19 @@
+scrape_configs:
+  - job_name: "prometheus1"
+    static_configs:
+      - targets: ["localhost:9090"]
+    file_sd_configs:
+      - refresh_interval: 5m
+        files:
+          - "/tmp/example_*.yaml"
+          - "/tmp/example2_*.yaml"
+  - job_name: "prometheus2"
+    file_sd_configs:
+      - refresh_interval: 1m
+        files:
+          - "/tmp/example_*.yaml"
+          - "/tmp/example2_*.yaml"
+
+remote_write:
+  - name: "remote1"
+    url: "http://remote-write-url1"
diff --git a/converter/internal/prometheusconvert/validate.go b/converter/internal/prometheusconvert/validate.go
@@ -13,6 +13,7 @@ import (
 	prom_consul "github.com/prometheus/prometheus/discovery/consul"
 	prom_digitalocean "github.com/prometheus/prometheus/discovery/digitalocean"
 	prom_dns "github.com/prometheus/prometheus/discovery/dns"
+	prom_file "github.com/prometheus/prometheus/discovery/file"
 	prom_gce "github.com/prometheus/prometheus/discovery/gce"
 	_ "github.com/prometheus/prometheus/discovery/install" // Register Prometheus SDs
 	prom_kubernetes "github.com/prometheus/prometheus/discovery/kubernetes"
@@ -101,6 +102,8 @@ func validateScrapeConfigs(scrapeConfigs []*prom_config.ScrapeConfig) diag.Diagn
 				newDiags = validateDiscoveryDocker(sdc)
 			case *prom_aws.EC2SDConfig:
 				newDiags = validateDiscoveryEC2(sdc)
+			case *prom_file.SDConfig:
+				newDiags = validateDiscoveryFile(sdc)
 			case *prom_gce.SDConfig:
 				newDiags = validateDiscoveryGce(sdc)
 			case *prom_kubernetes.SDConfig:

diff --git a/docs/sources/_index.md b/docs/sources/_index.md
@@ -1,6 +1,6 @@
 ---
 title: Grafana Agent
-weight: 1
+weight: 550
 ---
 
 # Grafana Agent

diff --git a/docs/sources/assets/ui_clustering_page.png b/docs/sources/assets/ui_clustering_page.png
diff --git a/docs/sources/flow/concepts/clustering.md b/docs/sources/flow/concepts/clustering.md
@@ -0,0 +1,80 @@
+---
+title: Grafana Agent clustering concepts
+menuTitle: Clustering
+weight: 500
+labels:
+  stage: beta
+---
+
+# Clustering (beta)
+
+Clustering enables a fleet of agents to work together for workload distribution
+and high availability. It helps create horizontally scalable deployments with
+minimal resource and operational overhead.
+
+To achieve this, Grafana Agent makes use of an eventually consistent model that
+assumes all participating Agents are interchangeable and converge on using the
+same configuration file.
+
+The behavior of a standalone, non-clustered agent is the same as if it was a
+single-node cluster.
+
+You configure clustering by passing `cluster` command-line flags to the [run][]
+command.
+
+[run]: {{< relref "../reference/cli/run.md#clustering-beta" >}}
+
+## Use cases
+
+### Target auto-distribution
+
+Target auto-distribution is the most basic use case of clustering; it allows
+scraping components running on all peers to distribute scrape load between
+themselves. For target auto-distribution to work correctly, all agents in the
+same cluster must be able to reach the same service discovery APIs and must be
+able to scrape the same targets.
+
+You must explicitly enable target auto-distribution on components by defining a
+`clustering` block, such as:
+
+```river
+prometheus.scrape "default" {
+    clustering {
+        enabled = true
+    }
+
+    ...
+}
+```
+
+A cluster state change is detected when a new node joins or an existing node goes away. All participating components locally
+recalculate target ownership and rebalance the number of targets they’re
+scraping without explicitly communicating ownership over the network.
+
+Target auto-distribution allows you to dynamically scale the number of agents to distribute workload during peaks. 
+It also provides resiliency because targets are automatically picked up by one of the node peers if a node goes away.
+
+The agent uses a fully-local consistent hashing algorithm to distribute
+targets, meaning that, on average, only ~1/N of the targets are redistributed.
+
+Refer to component reference documentation to discover whether it supports
+clustering, such as:
+
+- [prometheus.scrape][]
+- [pyroscope.scrape][]
+- [prometheus.operator.podmonitors][]
+- [prometheus.operator.servicemonitors][]
+
+[prometheus.scrape]: {{< relref "../reference/components/prometheus.scrape.md#clustering-beta" >}}
+[pyroscope.scrape]: {{< relref "../reference/components/pyroscope.scrape.md#clustering-beta" >}}
+[prometheus.operator.podmonitors]: {{< relref "../reference/components/prometheus.operator.podmonitors.md#clustering-beta" >}}
+[prometheus.operator.servicemonitors]: {{< relref "../reference/components/prometheus.operator.servicemonitors.md#clustering-beta" >}}
+
+## Cluster monitoring and troubleshooting
+
+To monitor your cluster status, you can check the Flow UI [clustering page][].
+The [debugging][] topic contains some clues to help pin down probable
+clustering issues.
+
+[clustering page]: {{< relref "../monitoring/debugging.md#clustering-page" >}}
+[debugging]: {{< relref "../monitoring/debugging.md#debugging-clustering-issues" >}}
diff --git a/docs/sources/flow/getting-started/configure-agent-clustering.md b/docs/sources/flow/getting-started/configure-agent-clustering.md
@@ -0,0 +1,64 @@
+---
+title: Configure Grafana Agent clustering in an existing installation
+menuTitle: Configure Grafana Agent clustering
+weight: 400
+---
+
+# Configure Grafana Agent clustering
+
+You can configure Grafana Agent to run with [clustering][] so that
+individual agents can work together for workload distribution and high
+availability.
+
+{{% admonition type="note" %}}
+Clustering is a [beta][] feature. Beta features are subject to breaking
+changes and may be replaced with equivalent functionality that covers the same
+use case.
+{{%/admonition %}}
+
+This topic describes how to add clustering to an existing installation.
+
+[clustering]: {{< relref "../concepts/clustering.md" >}}
+[beta]: {{< relref "../../stability.md#beta" >}}
+
+## Configure Grafana Agent clustering with Helm Chart
+
+This section will guide you through enabling clustering when Grafana Agent is
+installed on Kubernetes using the [Grafana Agent Helm chart][install-helm].
+
+[install-helm]: {{< relref "../setup/install/kubernetes.md" >}}
+
+### Before you begin
+
+- Ensure that your `values.yaml` file has `controller.type` set to
+  `statefulset`.
+
+### Steps
+
+To configure clustering:
+
+1. Amend your existing values.yaml file to add `clustering.enabled=true` inside
+   of the `agent` block:
+
+   ```yaml
+   agent:
+     clustering:
+       enabled: true
+   ```
+
+1. Upgrade your installation to use the new values.yaml file:
+
+   ```bash
+   helm upgrade RELEASE_NAME -f values.yaml
+   ```
+
+   Replace `RELEASE_NAME` with the name of the installation you chose when you
+   installed the Helm chart.
+
+1. Use [UI][] to verify the cluster status:
+
+   1. Click **Clustering** in the navigation bar.
+
+   2. Ensure that all expected nodes appear in the resulting table.
+
+[UI]: {{< relref "../monitoring/debugging.md#clustering-page" >}}