@@ -21,56 +21,76 @@ receivers:
2121 hostmetrics :
2222 root_path : /hostfs
2323 collection_interval : 30s
24+
25+ # all scrapers are added explicitly, so we can be clear
26+ # about which metrics we've chosen to include/exclude.
27+ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/hostmetricsreceiver/README.md
2428 scrapers :
2529 cpu :
2630 metrics :
27- system.cpu.time :
28- enabled : false
29- system.cpu.utilization :
31+ system.cpu.time : # counter of seconds spent
32+ enabled : true
33+ system.cpu.frequency :
3034 enabled : false
3135 system.cpu.logical.count :
3236 enabled : true
3337 system.cpu.physical.count :
3438 enabled : true
39+ system.cpu.utilization : # percentage utilized in the moment
40+ enabled : false
41+
42+ # entirely disabled, so we omit the module
43+ # disk:
44+ # metrics:
45+ # system.disk.io:
46+ # enabled: false
47+ # system.disk.io_time:
48+ # enabled: false
49+ # system.disk.merged:
50+ # enabled: false
51+ # system.disk.operation_time:
52+ # enabled: false
53+ # system.disk.operations:
54+ # enabled: false
55+ # system.disk.pending_operations:
56+ # enabled: false
57+ # system.disk.weighted_io_time:
58+ # enabled: false
3559
3660 load :
3761 metrics :
62+ system.cpu.load_average.15m :
63+ enabled : true
3864 system.cpu.load_average.1m :
3965 enabled : true
4066 system.cpu.load_average.5m :
4167 enabled : true
42- system.cpu.load_average.15m :
43- enabled : false
4468
45- # Scraping traffic going in/out of network interfaces
46- network :
69+ # filter not needed stuff (mode, type)
70+ filesystem :
4771 metrics :
48- system.network.connections :
49- enabled : false
50- system.network.dropped :
51- enabled : false
52- system.network.errors :
53- enabled : false
54- system.network.io :
55- enabled : true
56- system.network.packets :
57- enabled : false
72+ system.filesystem.inodes.usage :
73+ enabled : true
74+ system.filesystem.usage :
75+ enabled : true
76+ system.filesystem.utilization :
77+ enabled : true
5878
5979 # Scraping only total memory (memory limit) and available memory to reduce cardinality
6080 memory :
6181 metrics :
62- system.linux. memory.dirty :
63- enabled : false
82+ system.memory.usage :
83+ enabled : true
6484 system.linux.memory.available : # free memory
6585 enabled : true
86+ system.linux.memory.dirty : # data waiting to be written to disk
87+ enabled : true
6688 system.memory.limit : # total memory
6789 enabled : true
6890 system.memory.page_size :
6991 enabled : false
70- system.memory.usage :
71- enabled : false
7292 system.memory.utilization :
73- enabled : false
93+ enabled : true
7494
7595 # Huge pages metrics added as part of temporal patch
7696 # https://github.com/e2b-dev/opentelemetry-collector-contrib/pull/1
@@ -85,14 +105,80 @@ receivers:
85105 system.linux.memory.huge_pages.total :
86106 enabled : true
87107
88- # filter not needed stuff (mode, type)
89- # filter /dev/loop*, maybe check fo other that are polluting
90- filesystem :
108+ # Scraping traffic going in/out of network interfaces
109+ network :
91110 metrics :
92- system.filesystem.inodes.usage :
111+ system.network.connections :
112+ enabled : true
113+ system.network.dropped :
114+ enabled : true
115+ system.network.errors :
116+ enabled : true
117+ system.network.io :
118+ enabled : true
119+ system.network.packets :
93120 enabled : false
94- system.filesystem.usage :
121+ system.network.conntrack.count :
122+ enabled : true
123+ system.network.conntrack.max :
124+ enabled : true
125+
126+ # entirely disabled, so we omit the module
127+ # paging:
128+ # metrics:
129+ # system.paging.faults:
130+ # enabled: false
131+ # system.paging.operations:
132+ # enabled: false
133+ # system.paging.usage:
134+ # enabled: false
135+ # system.paging.utilization:
136+ # enabled: false
137+
138+ processes :
139+ metrics :
140+ system.processes.count :
95141 enabled : true
142+ system.processes.created :
143+ enabled : true
144+
145+ # entirely disabled, so we omit the module
146+ # process:
147+ # metrics:
148+ # process.cpu.time:
149+ # enabled: false
150+ # process.disk.io:
151+ # enabled: false
152+ # process.memory.usage:
153+ # enabled: false
154+ # process.memory.virtual:
155+ # enabled: false
156+ # process.context_switches:
157+ # enabled: false
158+ # process.cpu.utilization:
159+ # enabled: false
160+ # process.disk.operations:
161+ # enabled: false
162+ # process.handles:
163+ # enabled: false
164+ # process.memory.utilization:
165+ # enabled: false
166+ # process.open_file_descriptors:
167+ # enabled: false
168+ # process.paging.faults:
169+ # enabled: false
170+ # process.signals_pending:
171+ # enabled: false
172+ # process.threads:
173+ # enabled: false
174+ # process.uptime:
175+ # enabled: false
176+
177+ # entirely disabled, so we omit the module
178+ # system:
179+ # metrics:
180+ # system.uptime:
181+ # enabled: false
96182
97183processors :
98184 batch :
@@ -107,14 +193,15 @@ processors:
107193 metrics :
108194 datapoint :
109195 # Drop system.network.io for veth-* or docker* interfaces
110- - ' metric.name == "system.network.io" and IsMatch(attributes["device"], "^(veth- .*|docker.*|lo)$")'
111- # Drop system.filesystem.usage for loop devices
112- - ' metric.name == "system.filesystem.usage" and IsMatch(attributes["device"], "^/dev/loop.*$")'
196+ - ' IsMatch( metric.name, "system.network..*") and IsMatch(attributes["device"], "^(veth.*|docker.*|lo)$")'
197+ # Drop anything related to loop devices
198+ - ' IsMatch(attributes["device"], "^/dev/loop.*$")'
113199
114200 attributes/strip_fs_labels :
115201 include :
116202 match_type : strict
117- metric_names : [system.filesystem.usage]
203+ metric_names :
204+ - system.filesystem.usage
118205 actions :
119206 - action : delete
120207 key : mode
@@ -143,7 +230,6 @@ processors:
143230 - " vault.*"
144231 - " client_proxy.*"
145232 - " Click*"
146- - " otelcol.*"
147233 - " pgxpool.*"
148234
149235 filter/prometheus :
@@ -180,6 +266,18 @@ processors:
180266 aggregation_type : sum
181267 label_set : [instance, node_id, node_status, node_pool]
182268
269+ metricstransform/single_cpu :
270+ transforms :
271+ - include : " system.cpu.time"
272+ match_type : strict
273+ action : update
274+ operations :
275+ - action : aggregate_labels
276+ label_set :
277+ - node.id
278+ - state
279+ aggregation_type : sum
280+
183281 resourcedetection :
184282 # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
185283 detectors : [gcp]
@@ -280,7 +378,12 @@ service:
280378 metrics/prometheus :
281379 receivers :
282380 - prometheus
283- processors : [filter/prometheus, metricstransform, resourcedetection, transform/set-name, batch]
381+ processors :
382+ - filter/prometheus
383+ - metricstransform
384+ - resourcedetection
385+ - transform/set-name
386+ - batch
284387 exporters :
285388 - otlphttp/grafana_cloud
286389 metrics/rpc_only :
@@ -292,7 +395,12 @@ service:
292395 metrics/host :
293396 receivers :
294397 - hostmetrics
295- processors : [filter/drop_by_device, attributes/strip_fs_labels, attributes/host_metrics_node, batch]
398+ processors :
399+ - filter/drop_by_device
400+ - attributes/strip_fs_labels
401+ - attributes/host_metrics_node
402+ - metricstransform/single_cpu
403+ - batch
296404 exporters :
297405 - otlphttp/grafana_cloud
298406 metrics/external :
0 commit comments