Skip to content

Commit

Permalink
fix: filter out snapshot-clone-src-*|replica-vol-* (#10)
Browse files Browse the repository at this point in the history
* fix: filter out `snapshot-clone-src-*|replica-vol-*`

* chore: ignore .txt files

* unfeat: remove 'node_(read|write)_ops_total'

* fix: remove volume info from `solidfire_node_iscsi_sessions`

* remove: volume_last_sample_read_bytes|volume_read_latency_seconds|volume_last_sample_read_ops|volume_last_sample_write_bytes|volume_write_latency_seconds|volume_write_ops_last_sample
  • Loading branch information
MCBrandenburg authored May 12, 2023
1 parent e860dfe commit 6fd81ac
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 173 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ tmp
./solidfire-exporter
archive.tar.gz
bin/*
*.txt
8 changes: 0 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,20 +121,15 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
| solidfire_node_interface_utilization_percentage | gauge | Network interface utilization (in percent) of network interface. |
| solidfire_node_load | histogram | System load histogram |
| solidfire_node_read_latency_seconds_total | counter | The total time spent performing read operations since the creation of the cluster. |
| solidfire_node_read_ops_total | counter | Total read operations to a node. |
| solidfire_node_samples | gauge | Node stat sample count |
| solidfire_node_total_memory_bytes | gauge | Total node memory in bytes. |
| solidfire_node_used_memory_bytes | gauge | Total node memory used in bytes. |
| solidfire_node_write_latency_seconds_total | counter | The total time spent performing write operations since the creation of the cluster. |
| solidfire_node_write_ops_total | counter | Total write operations to a node. |
| solidfire_up | gauge | Whether last scrape against Solidfire API was successful |
| solidfire_volume_actual_iops | gauge | The current actual IOPS to the volume in the last 500 milliseconds |
| solidfire_volume_average_iop_size_bytes | gauge | The average size in bytes of recent I/O to the volume in the last 500 milliseconds |
| solidfire_volume_burst_iops_credit | gauge | The total number of IOP credits available to the user. When volumes are not using up to the configured maxIOPS, credits are accrued. |
| solidfire_volume_client_queue_depth | gauge | The number of outstanding read and write operations to the volume. |
| solidfire_volume_last_sample_read_bytes | gauge | The total number of bytes read from the volume during the last sample period. |
| solidfire_volume_last_sample_read_ops | gauge | The total number of read operations during the last sample period |
| solidfire_volume_last_sample_write_bytes | gauge | The total number of bytes written to the volume during the last sample period. |
| solidfire_volume_latency_seconds | gauge | The average time, in seconds, to complete operations to the volume in the last 500 milliseconds. A '0' (zero) value means there is no I/O to the volume. |
| solidfire_volume_non_zero_blocks | gauge | The total number of 4KiB blocks that contain data after the last garbage collection operation has completed. |
| solidfire_volume_qos_below_min_iops_percentage | histogram | Volume QoS Below minimum IOPS percentage |
Expand All @@ -144,7 +139,6 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
| solidfire_volume_qos_throttle_percentage | histogram | Volume QoS throttle percentage |
| solidfire_volume_qos_write_block_sizes_bytes_bucket | histogram | Volume QoS write block sizes |
| solidfire_volume_read_bytes_total | counter | The total cumulative bytes read from the volume since the creation of the volume. |
| solidfire_volume_read_latency_seconds | gauge | The average time, in seconds, to complete read operations to the volume in the last 500 milliseconds. |
| solidfire_volume_read_latency_seconds_total | counter | The total time spent performing read operations from the volume |
| solidfire_volume_read_ops_total | counter | The total read operations to the volume since the creation of the volume. |
| solidfire_volume_size_bytes | gauge | Total provisioned capacity in bytes. |
Expand All @@ -153,9 +147,7 @@ docker run --rm -p 9987:9987 ghcr.io/mjavier2k/solidfire-exporter:v0.6.6
| solidfire_volume_unaligned_writes_total | counter | The total cumulative unaligned write operations to a volume since the creation of the volume. |
| solidfire_volume_utilization | gauge | A floating value that describes how much the client is using the volume. Value 0: The client is not using the volume. Value 1: The client is using their maximum. Value 1+: The client is using their burst. |
| solidfire_volume_write_bytes_total | counter | The total cumulative bytes written to the volume since the creation of the volume. |
| solidfire_volume_write_latency_seconds | gauge | The average time, in seconds, to complete write operations to a volume in the last 500 milliseconds. |
| solidfire_volume_write_latency_seconds_total | counter | The total time spent performing write operations to the volume |
| solidfire_volume_write_ops_last_sample | gauge | The total number of write operations during the last sample period. |
| solidfire_volume_write_ops_total | counter | The total cumulative write operations to the volume since the creation of the volume. |
| solidfire_volume_zero_blocks | gauge | The total number of empty 4KiB blocks without data after the last round of garbage collection operation has completed. |

Expand Down
99 changes: 17 additions & 82 deletions pkg/prom/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -58,22 +59,16 @@ func (c *SolidfireCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- MetricDescriptions.VolumeLatencySeconds
ch <- MetricDescriptions.VolumeNonZeroBlocks
ch <- MetricDescriptions.VolumeReadBytesTotal
ch <- MetricDescriptions.VolumeLastSampleReadBytes
ch <- MetricDescriptions.VolumeReadLatencySeconds
ch <- MetricDescriptions.VolumeReadLatencySecondsTotal
ch <- MetricDescriptions.VolumeReadOpsTotal
ch <- MetricDescriptions.VolumeLastSampleReadOps
ch <- MetricDescriptions.VolumeThrottle
ch <- MetricDescriptions.VolumeUnalignedReadsTotal
ch <- MetricDescriptions.VolumeUnalignedWritesTotal
ch <- MetricDescriptions.VolumeSizeBytes
ch <- MetricDescriptions.VolumeUtilization
ch <- MetricDescriptions.VolumeWriteBytesTotal
ch <- MetricDescriptions.VolumeLastSampleWriteBytes
ch <- MetricDescriptions.VolumeWriteLatencySeconds
ch <- MetricDescriptions.VolumeWriteLatencyTotal
ch <- MetricDescriptions.VolumeWriteOpsTotal
ch <- MetricDescriptions.VolumeWriteOpsLastSample
ch <- MetricDescriptions.VolumeStatsZeroBlocks

ch <- MetricDescriptions.ClusterActiveBlockSpaceBytes
Expand Down Expand Up @@ -112,10 +107,8 @@ func (c *SolidfireCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- MetricDescriptions.NodeInterfaceOutBytesTotal
ch <- MetricDescriptions.NodeInterfaceUtilizationPercentage
ch <- MetricDescriptions.NodeReadLatencyTotal
ch <- MetricDescriptions.NodeReadOpsTotal
ch <- MetricDescriptions.NodeUsedMemoryBytes
ch <- MetricDescriptions.NodeWriteLatencyTotal
ch <- MetricDescriptions.NodeWriteOpsTotal
ch <- MetricDescriptions.NodeLoadHistogram

ch <- MetricDescriptions.NodeInfo
Expand Down Expand Up @@ -249,6 +242,9 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
mu.Lock()
defer mu.Unlock()
for _, vol := range volumeStats.Result.VolumeStats {
if ok, _ := regexp.MatchString(`snapshot-clone-src-*|replica-vol-*`, c.volumeNamesByID[vol.VolumeID]); ok {
continue
}
ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeActualIOPS,
prometheus.GaugeValue,
Expand Down Expand Up @@ -298,20 +294,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeLastSampleReadBytes,
prometheus.GaugeValue,
vol.ReadBytesLastSample,
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeReadLatencySeconds,
prometheus.GaugeValue,
MicrosecondsToSeconds(vol.ReadLatencyUSec),
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeReadLatencySecondsTotal,
prometheus.CounterValue,
Expand All @@ -326,13 +308,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeLastSampleReadOps,
prometheus.GaugeValue,
vol.ReadOpsLastSample,
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeThrottle,
prometheus.GaugeValue,
Expand Down Expand Up @@ -375,20 +350,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeLastSampleWriteBytes,
prometheus.GaugeValue,
vol.WriteBytesLastSample,
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeWriteLatencySeconds,
prometheus.GaugeValue,
MicrosecondsToSeconds(vol.WriteLatencyUSec),
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeWriteLatencyTotal,
prometheus.CounterValue,
Expand All @@ -403,13 +364,6 @@ func (c *SolidfireCollector) collectVolumeStats(ctx context.Context, ch chan<- p
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeWriteOpsLastSample,
prometheus.GaugeValue,
vol.WriteOpsLastSample,
strconv.Itoa(vol.VolumeID),
c.volumeNamesByID[vol.VolumeID])

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.VolumeStatsZeroBlocks,
prometheus.GaugeValue,
Expand Down Expand Up @@ -712,14 +666,6 @@ func (c *SolidfireCollector) collectClusterNodeStats(ctx context.Context, ch cha
c.nodesNamesByID[stats.NodeID],
)

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeReadOpsTotal,
prometheus.CounterValue,
stats.ReadOps,
strconv.Itoa(stats.NodeID),
c.nodesNamesByID[stats.NodeID],
)

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeInterfaceInBytesTotal,
prometheus.CounterValue,
Expand Down Expand Up @@ -754,13 +700,6 @@ func (c *SolidfireCollector) collectClusterNodeStats(ctx context.Context, ch cha
c.nodesNamesByID[stats.NodeID],
)

ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeWriteOpsTotal,
prometheus.CounterValue,
stats.WriteOps,
strconv.Itoa(stats.NodeID),
c.nodesNamesByID[stats.NodeID],
)
}
return nil
}
Expand All @@ -773,6 +712,9 @@ func (c *SolidfireCollector) collectVolumeQosHistograms(ctx context.Context, ch
mu.Lock()
defer mu.Unlock()
for _, h := range VolumeQoSHistograms.Result.QosHistograms {
if ok, _ := regexp.MatchString(`snapshot-clone-src-*|replica-vol-*`, c.volumeNamesByID[h.VolumeID]); ok {
continue
}
// Below Min IOPS Percentage
BelowMinIopsPercentages := map[float64]uint64{
19: h.Histograms.BelowMinIopsPercentages.Bucket1To19,
Expand Down Expand Up @@ -1256,27 +1198,20 @@ func (c *SolidfireCollector) collectISCSISessions(ctx context.Context, ch chan<-
}
mu.Lock()
defer mu.Unlock()
sessions := make(map[int]map[int]float64)
sessions := make(map[int]float64)

for _, session := range ListISCSISessions.Result.Sessions {
if sessions[session.NodeID] == nil {
sessions[session.NodeID] = make(map[int]float64)
}
sessions[session.NodeID][session.VolumeID]++
sessions[session.NodeID]++
}

for node, v := range sessions {
for vol, val := range v {
ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeISCSISessions,
prometheus.GaugeValue,
val,
strconv.Itoa(node),
c.nodesNamesByID[node],
strconv.Itoa(vol),
c.volumeNamesByID[vol],
)
}
for node, val := range sessions {
ch <- prometheus.MustNewConstMetric(
MetricDescriptions.NodeISCSISessions,
prometheus.GaugeValue,
val,
strconv.Itoa(node),
c.nodesNamesByID[node],
)
}
return nil
}
Expand Down
66 changes: 1 addition & 65 deletions pkg/prom/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@ type Descriptions struct {
VolumeAverageIOPSizeBytes *prometheus.Desc
VolumeBurstIOPSCredit *prometheus.Desc
VolumeClientQueueDepth *prometheus.Desc
VolumeLastSampleReadBytes *prometheus.Desc
VolumeLastSampleReadOps *prometheus.Desc
VolumeLastSampleWriteBytes *prometheus.Desc
VolumeLatencySeconds *prometheus.Desc
VolumeNonZeroBlocks *prometheus.Desc
VolumeReadBytesTotal *prometheus.Desc
VolumeReadLatencySeconds *prometheus.Desc
VolumeReadLatencySecondsTotal *prometheus.Desc
VolumeReadOpsTotal *prometheus.Desc
VolumeThrottle *prometheus.Desc
Expand All @@ -28,9 +24,7 @@ type Descriptions struct {
VolumeSizeBytes *prometheus.Desc
VolumeUtilization *prometheus.Desc
VolumeWriteBytesTotal *prometheus.Desc
VolumeWriteLatencySeconds *prometheus.Desc
VolumeWriteLatencyTotal *prometheus.Desc
VolumeWriteOpsLastSample *prometheus.Desc
VolumeWriteOpsTotal *prometheus.Desc
VolumeStatsZeroBlocks *prometheus.Desc

Expand Down Expand Up @@ -82,12 +76,10 @@ type Descriptions struct {
NodeInterfaceUtilizationPercentage *prometheus.Desc
NodeLoadHistogram *prometheus.Desc
NodeReadLatencyTotal *prometheus.Desc
NodeReadOpsTotal *prometheus.Desc
NodeSamples *prometheus.Desc
NodeTotalMemoryBytes *prometheus.Desc
NodeUsedMemoryBytes *prometheus.Desc
NodeWriteLatencyTotal *prometheus.Desc
NodeWriteOpsTotal *prometheus.Desc

// ListAllNodes
NodeInfo *prometheus.Desc
Expand Down Expand Up @@ -211,20 +203,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.VolumeLastSampleReadBytes = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_last_sample_read_bytes"),
"The total number of bytes read from the volume during the last sample period.",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeReadLatencySeconds = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_read_latency_seconds"),
"The average time, in seconds, to complete read operations to the volume in the last 500 milliseconds.",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeReadLatencySecondsTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_read_latency_seconds_total"),
"The total time spent performing read operations from the volume",
Expand All @@ -239,13 +217,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.VolumeLastSampleReadOps = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_last_sample_read_ops"),
"The total number of read operations during the last sample period",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeThrottle = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_throttle"),
"A floating value between 0 and 1 that represents how much the system is throttling clients below their maxIOPS because of rereplication of data, transient errors, and snapshots taken.",
Expand Down Expand Up @@ -288,20 +259,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.VolumeLastSampleWriteBytes = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_last_sample_write_bytes"),
"The total number of bytes written to the volume during the last sample period.",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeWriteLatencySeconds = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_write_latency_seconds"),
"The average time, in seconds, to complete write operations to a volume in the last 500 milliseconds.",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeWriteLatencyTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_write_latency_seconds_total"),
"The total time spent performing write operations to the volume",
Expand All @@ -316,13 +273,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.VolumeWriteOpsLastSample = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_write_ops_last_sample"),
"The total number of write operations during the last sample period.",
[]string{"volume_id", "volume_name"},
nil,
)

d.VolumeStatsZeroBlocks = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "volume_zero_blocks"),
"The total number of empty 4KiB blocks without data after the last round of garbage collection operation has completed.",
Expand Down Expand Up @@ -544,20 +494,6 @@ func NewMetricDescriptions(namespace string) *Descriptions {
nil,
)

d.NodeReadOpsTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_read_ops_total"),
"Total read operations to a node.", // undocumented metric
[]string{"node_id", "node_name"},
nil,
)

d.NodeWriteOpsTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_write_ops_total"),
"Total write operations to a node", // undocumented metric
[]string{"node_id", "node_name"},
nil,
)

d.NodeTotalMemoryBytes = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_total_memory_bytes"),
"Total node memory in bytes.",
Expand Down Expand Up @@ -932,7 +868,7 @@ func NewMetricDescriptions(namespace string) *Descriptions {
d.NodeISCSISessions = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_iscsi_sessions"),
"The total number of iscsi sessions per node and volume",
[]string{"node_id", "node_name", "volume_id", "volume_name"},
[]string{"node_id", "node_name"},
nil,
)

Expand Down
Loading

0 comments on commit 6fd81ac

Please sign in to comment.