Skip to content

Commit

Permalink
Merge branch 'release-v0.35' of github.com:grafana/agent into release…
Browse files Browse the repository at this point in the history
…-v0.35
  • Loading branch information
mattdurham committed Jul 17, 2023
2 parents 2bee826 + 5d7e511 commit 4ee67d6
Show file tree
Hide file tree
Showing 27 changed files with 645 additions and 47 deletions.
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ This document contains a historical list of changes between releases. Only
changes that impact end-user behavior are listed; changes to documentation or
internal API changes are not present.

v0.35.0-rc.1 (2023-07-17)
-------------------------

### Features

- Add support for converting Prometheus `file_sd_config` to `discovery.file`. (@erikbaranowski)


### Bugfixes

- Fix issue where `remote.http` incorrectly had a status of "Unknown" until the
period specified by the polling frequency elapsed. (@rfratto)


v0.35.0-rc.0 (2023-07-13)
-------------------------

Expand Down Expand Up @@ -201,6 +215,8 @@ v0.35.0-rc.0 (2023-07-13)
- Mongodb integration has been re-enabled. (@jcreixell, @marctc)
- Build with go 1.20.6 (@captncraig)

- Clustering for Grafana Agent in flow mode has graduated from experimental to beta.

v0.34.3 (2023-06-27)
--------------------

Expand Down
2 changes: 0 additions & 2 deletions component/module/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ func (c *Component) Update(args component.Arguments) error {

// CurrentHealth implements component.HealthComponent.
func (c *Component) CurrentHealth() component.Health {
// Note that it takes until the first successful poll for c.managedRemoteHTTP to
// become healthy.
leastHealthy := component.LeastHealthy(
c.managedRemoteHTTP.CurrentHealth(),
c.mod.CurrentHealth(),
Expand Down
11 changes: 6 additions & 5 deletions component/remote/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,25 +160,25 @@ func (c *Component) nextPoll() time.Duration {
// not be held when calling. After polling, the component's health is updated
// with the success or failure status.
func (c *Component) poll() {
startTime := time.Now()
err := c.pollError()
c.updatePollHealth(err)
}

// NOTE(rfratto): to prevent the health from being inaccessible for longer
// than is needed, only update the health after the poll finished.
func (c *Component) updatePollHealth(err error) {
c.healthMut.Lock()
defer c.healthMut.Unlock()

if err == nil {
c.health = component.Health{
Health: component.HealthTypeHealthy,
Message: "polled endpoint",
UpdateTime: startTime,
UpdateTime: time.Now(),
}
} else {
c.health = component.Health{
Health: component.HealthTypeUnhealthy,
Message: fmt.Sprintf("polling failed: %s", err),
UpdateTime: startTime,
UpdateTime: time.Now(),
}
}
}
Expand Down Expand Up @@ -252,6 +252,7 @@ func (c *Component) Update(args component.Arguments) (err error) {
return
}
err = c.pollError()
c.updatePollHealth(err)
}()

c.mut.Lock()
Expand Down
34 changes: 34 additions & 0 deletions converter/internal/prometheusconvert/file.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package prometheusconvert

import (
"time"

"github.com/grafana/agent/component/discovery"
"github.com/grafana/agent/component/discovery/file"
"github.com/grafana/agent/converter/diag"
"github.com/grafana/agent/converter/internal/common"
prom_file "github.com/prometheus/prometheus/discovery/file"
)

func appendDiscoveryFile(pb *prometheusBlocks, label string, sdConfig *prom_file.SDConfig) discovery.Exports {
discoveryFileArgs := toDiscoveryFile(sdConfig)
name := []string{"discovery", "file"}
block := common.NewBlockWithOverride(name, label, discoveryFileArgs)
pb.discoveryBlocks = append(pb.discoveryBlocks, newPrometheusBlock(block, name, label, "", ""))
return newDiscoverExports("discovery.file." + label + ".targets")
}

func validateDiscoveryFile(sdConfig *prom_file.SDConfig) diag.Diagnostics {
return make(diag.Diagnostics, 0)
}

func toDiscoveryFile(sdConfig *prom_file.SDConfig) *file.Arguments {
if sdConfig == nil {
return nil
}

return &file.Arguments{
Files: sdConfig.Files,
RefreshInterval: time.Duration(sdConfig.RefreshInterval),
}
}
4 changes: 4 additions & 0 deletions converter/internal/prometheusconvert/prometheusconvert.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
prom_consul "github.com/prometheus/prometheus/discovery/consul"
prom_digitalocean "github.com/prometheus/prometheus/discovery/digitalocean"
prom_dns "github.com/prometheus/prometheus/discovery/dns"
prom_file "github.com/prometheus/prometheus/discovery/file"
prom_gce "github.com/prometheus/prometheus/discovery/gce"
prom_kubernetes "github.com/prometheus/prometheus/discovery/kubernetes"
prom_docker "github.com/prometheus/prometheus/discovery/moby"
Expand Down Expand Up @@ -121,6 +122,9 @@ func appendServiceDiscoveryConfigs(pb *prometheusBlocks, serviceDiscoveryConfig
case *prom_aws.EC2SDConfig:
labelCounts["ec2"]++
exports = appendDiscoveryEC2(pb, common.GetUniqueLabel(label, labelCounts["ec2"]), sdc)
case *prom_file.SDConfig:
labelCounts["file"]++
exports = appendDiscoveryFile(pb, common.GetUniqueLabel(label, labelCounts["file"]), sdc)
case *prom_gce.SDConfig:
labelCounts["gce"]++
exports = appendDiscoveryGCE(pb, common.GetUniqueLabel(label, labelCounts["gce"]), sdc)
Expand Down
43 changes: 43 additions & 0 deletions converter/internal/prometheusconvert/testdata/file.river
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
discovery.file "prometheus1" {
files = ["/tmp/example_*.yaml", "/tmp/example2_*.yaml"]
}

discovery.file "prometheus2" {
files = ["/tmp/example_*.yaml", "/tmp/example2_*.yaml"]
refresh_interval = "1m0s"
}

prometheus.scrape "prometheus1" {
targets = concat(
discovery.file.prometheus1.targets,
[{
__address__ = "localhost:9090",
}],
)
forward_to = [prometheus.remote_write.default.receiver]
job_name = "prometheus1"
}

prometheus.scrape "prometheus2" {
targets = discovery.file.prometheus2.targets
forward_to = [prometheus.remote_write.default.receiver]
job_name = "prometheus2"
}

prometheus.remote_write "default" {
endpoint {
name = "remote1"
url = "http://remote-write-url1"
send_exemplars = false

queue_config {
capacity = 2500
max_shards = 200
max_samples_per_send = 500
}

metadata_config {
max_samples_per_send = 500
}
}
}
19 changes: 19 additions & 0 deletions converter/internal/prometheusconvert/testdata/file.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
scrape_configs:
- job_name: "prometheus1"
static_configs:
- targets: ["localhost:9090"]
file_sd_configs:
- refresh_interval: 5m
files:
- "/tmp/example_*.yaml"
- "/tmp/example2_*.yaml"
- job_name: "prometheus2"
file_sd_configs:
- refresh_interval: 1m
files:
- "/tmp/example_*.yaml"
- "/tmp/example2_*.yaml"

remote_write:
- name: "remote1"
url: "http://remote-write-url1"
3 changes: 3 additions & 0 deletions converter/internal/prometheusconvert/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
prom_consul "github.com/prometheus/prometheus/discovery/consul"
prom_digitalocean "github.com/prometheus/prometheus/discovery/digitalocean"
prom_dns "github.com/prometheus/prometheus/discovery/dns"
prom_file "github.com/prometheus/prometheus/discovery/file"
prom_gce "github.com/prometheus/prometheus/discovery/gce"
_ "github.com/prometheus/prometheus/discovery/install" // Register Prometheus SDs
prom_kubernetes "github.com/prometheus/prometheus/discovery/kubernetes"
Expand Down Expand Up @@ -101,6 +102,8 @@ func validateScrapeConfigs(scrapeConfigs []*prom_config.ScrapeConfig) diag.Diagn
newDiags = validateDiscoveryDocker(sdc)
case *prom_aws.EC2SDConfig:
newDiags = validateDiscoveryEC2(sdc)
case *prom_file.SDConfig:
newDiags = validateDiscoveryFile(sdc)
case *prom_gce.SDConfig:
newDiags = validateDiscoveryGce(sdc)
case *prom_kubernetes.SDConfig:
Expand Down
2 changes: 1 addition & 1 deletion docs/sources/_index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Grafana Agent
weight: 1
weight: 550
---

# Grafana Agent
Expand Down
Binary file added docs/sources/assets/ui_clustering_page.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
80 changes: 80 additions & 0 deletions docs/sources/flow/concepts/clustering.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
title: Grafana Agent clustering concepts
menuTitle: Clustering
weight: 500
labels:
stage: beta
---

# Clustering (beta)

Clustering enables a fleet of agents to work together for workload distribution
and high availability. It helps create horizontally scalable deployments with
minimal resource and operational overhead.

To achieve this, Grafana Agent makes use of an eventually consistent model that
assumes all participating Agents are interchangeable and converge on using the
same configuration file.

The behavior of a standalone, non-clustered agent is the same as if it was a
single-node cluster.

You configure clustering by passing `cluster` command-line flags to the [run][]
command.

[run]: {{< relref "../reference/cli/run.md#clustering-beta" >}}

## Use cases

### Target auto-distribution

Target auto-distribution is the most basic use case of clustering; it allows
scraping components running on all peers to distribute scrape load between
themselves. For target auto-distribution to work correctly, all agents in the
same cluster must be able to reach the same service discovery APIs and must be
able to scrape the same targets.

You must explicitly enable target auto-distribution on components by defining a
`clustering` block, such as:

```river
prometheus.scrape "default" {
clustering {
enabled = true
}
...
}
```

A cluster state change is detected when a new node joins or an existing node goes away. All participating components locally
recalculate target ownership and rebalance the number of targets they’re
scraping without explicitly communicating ownership over the network.

Target auto-distribution allows you to dynamically scale the number of agents to distribute workload during peaks.
It also provides resiliency because targets are automatically picked up by one of the node peers if a node goes away.

The agent uses a fully-local consistent hashing algorithm to distribute
targets, meaning that, on average, only ~1/N of the targets are redistributed.

Refer to component reference documentation to discover whether it supports
clustering, such as:

- [prometheus.scrape][]
- [pyroscope.scrape][]
- [prometheus.operator.podmonitors][]
- [prometheus.operator.servicemonitors][]

[prometheus.scrape]: {{< relref "../reference/components/prometheus.scrape.md#clustering-beta" >}}
[pyroscope.scrape]: {{< relref "../reference/components/pyroscope.scrape.md#clustering-beta" >}}
[prometheus.operator.podmonitors]: {{< relref "../reference/components/prometheus.operator.podmonitors.md#clustering-beta" >}}
[prometheus.operator.servicemonitors]: {{< relref "../reference/components/prometheus.operator.servicemonitors.md#clustering-beta" >}}

## Cluster monitoring and troubleshooting

To monitor your cluster status, you can check the Flow UI [clustering page][].
The [debugging][] topic contains some clues to help pin down probable
clustering issues.

[clustering page]: {{< relref "../monitoring/debugging.md#clustering-page" >}}
[debugging]: {{< relref "../monitoring/debugging.md#debugging-clustering-issues" >}}
64 changes: 64 additions & 0 deletions docs/sources/flow/getting-started/configure-agent-clustering.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
title: Configure Grafana Agent clustering in an existing installation
menuTitle: Configure Grafana Agent clustering
weight: 400
---

# Configure Grafana Agent clustering

You can configure Grafana Agent to run with [clustering][] so that
individual agents can work together for workload distribution and high
availability.

{{% admonition type="note" %}}
Clustering is a [beta][] feature. Beta features are subject to breaking
changes and may be replaced with equivalent functionality that covers the same
use case.
{{%/admonition %}}

This topic describes how to add clustering to an existing installation.

[clustering]: {{< relref "../concepts/clustering.md" >}}
[beta]: {{< relref "../../stability.md#beta" >}}

## Configure Grafana Agent clustering with Helm Chart

This section will guide you through enabling clustering when Grafana Agent is
installed on Kubernetes using the [Grafana Agent Helm chart][install-helm].

[install-helm]: {{< relref "../setup/install/kubernetes.md" >}}

### Before you begin

- Ensure that your `values.yaml` file has `controller.type` set to
`statefulset`.

### Steps

To configure clustering:

1. Amend your existing values.yaml file to add `clustering.enabled=true` inside
of the `agent` block:

```yaml
agent:
clustering:
enabled: true
```
1. Upgrade your installation to use the new values.yaml file:
```bash
helm upgrade RELEASE_NAME -f values.yaml
```

Replace `RELEASE_NAME` with the name of the installation you chose when you
installed the Helm chart.

1. Use [UI][] to verify the cluster status:

1. Click **Clustering** in the navigation bar.

2. Ensure that all expected nodes appear in the resulting table.

[UI]: {{< relref "../monitoring/debugging.md#clustering-page" >}}
Loading

0 comments on commit 4ee67d6

Please sign in to comment.