Skip to content

Commit 8347302

Browse files
j8-redisslorello89
authored andcommitted
added missing shard metrics, still lacking shard replication descriptions
1 parent b3e299c commit 8347302

File tree

4 files changed

+139
-71
lines changed

4 files changed

+139
-71
lines changed

redis_enterprise/assets/dashboards/redis_enterprise_proxy-threads.json

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"title": "Redis Enterprise - Proxy Threads",
2+
"title": "Redis Enterprise - Proxy Threads v2",
33
"description": "Redis Enterprise Proxy Threads are the individual threads of the proxy process",
44
"widgets": [
55
{
@@ -57,13 +57,12 @@
5757
"title_size": "16",
5858
"title_align": "left",
5959
"show_legend": true,
60-
"legend_layout": "auto",
60+
"legend_layout": "vertical",
6161
"legend_columns": [
6262
"avg",
6363
"min",
6464
"max",
65-
"value",
66-
"sum"
65+
"value"
6766
],
6867
"time": {},
6968
"type": "timeseries",
@@ -164,7 +163,7 @@
164163
{
165164
"data_source": "metrics",
166165
"name": "query2",
167-
"query": "avg:rdse.redis_process_main_thread_cpu_system_seconds_total{$cluster,threadname:listener.*,mode:user} by {node}"
166+
"query": "sum:rdse2.namedprocess_namegroup_thread_cpu_seconds.count{$cluster,mode:user, threadname:listener}.as_rate()"
168167
}
169168
],
170169
"response_format": "timeseries",
@@ -218,8 +217,8 @@
218217
{
219218
"data_source": "metrics",
220219
"name": "query1",
221-
"query": "avg:rdse.redis_process_open_fds{$cluster}",
222-
"aggregator": "avg"
220+
"query": "avg:rdse2.namedprocess_namegroup_open_filedesc{$cluster}",
221+
"aggregator": "last"
223222
}
224223
],
225224
"formulas": [
@@ -230,7 +229,7 @@
230229
}
231230
],
232231
"autoscale": true,
233-
"precision": 2,
232+
"precision": 0,
234233
"timeseries_background": {
235234
"type": "area"
236235
}
@@ -256,8 +255,8 @@
256255
{
257256
"data_source": "metrics",
258257
"name": "query1",
259-
"query": "avg:rdse.redis_process_max_fds{$cluster}",
260-
"aggregator": "avg"
258+
"query": "avg:rdse2.namedprocess_namegroup_open_filedesc{$cluster}",
259+
"aggregator": "max"
261260
}
262261
],
263262
"formulas": [
@@ -268,7 +267,7 @@
268267
}
269268
],
270269
"autoscale": true,
271-
"precision": 2,
270+
"precision": 0,
272271
"timeseries_background": {
273272
"type": "area"
274273
}
@@ -294,7 +293,7 @@
294293
{
295294
"data_source": "metrics",
296295
"name": "query1",
297-
"query": "avg:rdse.redis_process_resident_memory_bytes{$cluster}",
296+
"query": "avg:rdse2.redis_server_allocator_resident{$cluster}",
298297
"aggregator": "avg"
299298
}
300299
],
@@ -350,7 +349,7 @@
350349
"title_size": "16",
351350
"title_align": "left",
352351
"show_legend": true,
353-
"legend_layout": "horizontal",
352+
"legend_layout": "auto",
354353
"legend_columns": [
355354
"avg",
356355
"min",

redis_enterprise/datadog_checks/redis_enterprise/metrics.py

Lines changed: 39 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,44 @@
178178
'database_syncer_total_responses': 'database_syncer_total_responses',
179179
}
180180

181+
REDIS_SHARD_REPLICATION = {
182+
'redis_crdt_backlog_histlen': 'redis_crdt_backlog_histlen',
183+
'redis_crdt_backlog_idx': 'redis_crdt_backlog_idx',
184+
'redis_crdt_backlog_master_offset': 'redis_crdt_backlog_master_offset',
185+
'redis_crdt_backlog_offset': 'redis_crdt_backlog_offset',
186+
'redis_crdt_backlog_refs': 'redis_crdt_backlog_refs',
187+
'redis_crdt_backlog_size': 'redis_crdt_backlog_size',
188+
'redis_crdt_clock': 'redis_crdt_clock',
189+
'redis_crdt_effect_reqs': 'redis_crdt_effect_reqs',
190+
'redis_crdt_gc_attempted': 'redis_crdt_gc_attempted',
191+
'redis_crdt_gc_collected': 'redis_crdt_gc_collected',
192+
'redis_crdt_gc_elements_attempted': 'redis_crdt_gc_elements_attempted',
193+
'redis_crdt_gc_elements_collected': 'redis_crdt_gc_elements_collected',
194+
'redis_crdt_gc_pending': 'redis_crdt_gc_pending',
195+
'redis_crdt_gc_skipped': 'redis_crdt_gc_skipped',
196+
'redis_crdt_key_headers': 'redis_crdt_key_headers',
197+
'redis_crdt_list_trimmed_vertices': 'redis_crdt_list_trimmed_vertices',
198+
'redis_crdt_merge_reqs': 'redis_crdt_merge_reqs',
199+
'redis_crdt_oom_latch': 'redis_crdt_oom_latch',
200+
'redis_crdt_ovc_filtered_effect_reqs': 'redis_crdt_ovc_filtered_effect_reqs',
201+
'redis_crdt_peer_dst_id': 'redis_crdt_peer_dst_id',
202+
'redis_crdt_peer_id': 'redis_crdt_peer_id',
203+
'redis_crdt_peer_lag': 'redis_crdt_peer_lag',
204+
'redis_crdt_peer_offset': 'redis_crdt_peer_offset',
205+
'redis_crdt_peer_peer_state': 'redis_crdt_peer_peer_state',
206+
'redis_crdt_pending_list_trimmed_vertices': 'redis_crdt_pending_list_trimmed_vertices',
207+
'redis_crdt_raw_dbsize': 'redis_crdt_raw_dbsize',
208+
'redis_crdt_replica_config_version': 'redis_crdt_replica_config_version',
209+
'redis_crdt_replica_max_ops_lag': 'redis_crdt_replica_max_ops_lag',
210+
'redis_crdt_replica_min_ops_lag': 'redis_crdt_replica_min_ops_lag',
211+
'redis_crdt_replica_shards': 'redis_crdt_replica_shards',
212+
'redis_crdt_replica_slot_coverage_by_any_ovc': 'redis_crdt_replica_slot_coverage_by_any_ovc',
213+
'redis_crdt_replica_slot_coverage_by_only_ovc': 'redis_crdt_replica_slot_coverage_by_only_ovc',
214+
'redis_crdt_replica_slots': 'redis_crdt_replica_slots',
215+
'redis_crdt_stale_replica': 'redis_crdt_stale_replica',
216+
'redis_crdt_ts_key_headers': 'redis_crdt_ts_key_headers',
217+
}
218+
181219
REDIS_LDAP = {
182220
'directory_cache_hits': 'directory_cache_hits',
183221
'directory_cache_miss_then_hits': 'directory_cache_miss_then_hits',
@@ -358,64 +396,6 @@
358396
'node_pressure_memory_waiting_seconds_total': 'node_pressure_memory_waiting_seconds_total',
359397
}
360398

361-
REDIS_PROXY = {
362-
'listener_acc_latency': 'listener_acc_latency',
363-
'listener_acc_latency_max': 'listener_acc_latency_max',
364-
'listener_acc_other_latency': 'listener_acc_other_latency',
365-
'listener_acc_other_latency_max': 'listener_acc_other_latency_max',
366-
'listener_acc_read_latency': 'listener_acc_read_latency',
367-
'listener_acc_read_latency_max': 'listener_acc_read_latency_max',
368-
'listener_acc_write_latency': 'listener_acc_write_latency',
369-
'listener_acc_write_latency_max': 'listener_acc_write_latency_max',
370-
'listener_auth_cmds': 'listener_auth_cmds',
371-
'listener_auth_cmds_max': 'listener_auth_cmds_max',
372-
'listener_auth_errors': 'listener_auth_errors',
373-
'listener_auth_errors_max': 'listener_auth_errors_max',
374-
'listener_cmd_flush': 'listener_cmd_flush',
375-
'listener_cmd_flush_max': 'listener_cmd_flush_max',
376-
'listener_cmd_get': 'listener_cmd_get',
377-
'listener_cmd_get_max': 'listener_cmd_get_max',
378-
'listener_cmd_set': 'listener_cmd_set',
379-
'listener_cmd_set_max': 'listener_cmd_set_max',
380-
'listener_cmd_touch': 'listener_cmd_touch',
381-
'listener_cmd_touch_max': 'listener_cmd_touch_max',
382-
'listener_conns': 'listener_conns',
383-
'listener_egress_bytes': 'listener_egress_bytes',
384-
'listener_egress_bytes_max': 'listener_egress_bytes_max',
385-
'listener_ingress_bytes': 'listener_ingress_bytes',
386-
'listener_ingress_bytes_max': 'listener_ingress_bytes_max',
387-
'listener_last_req_time': 'listener_last_req_time',
388-
'listener_last_res_time': 'listener_last_res_time',
389-
'listener_max_connections_exceeded': 'listener_max_connections_exceeded',
390-
'listener_max_connections_exceeded_max': 'listener_max_connections_exceeded_max',
391-
'listener_monitor_sessions_count': 'listener_monitor_sessions_count',
392-
'listener_other_req': 'listener_other_req',
393-
'listener_other_req_max': 'listener_other_req_max',
394-
'listener_other_res': 'listener_other_res',
395-
'listener_other_res_max': 'listener_other_res_max',
396-
'listener_other_started_res': 'listener_other_started_res',
397-
'listener_other_started_res_max': 'listener_other_started_res_max',
398-
'listener_read_req': 'listener_read_req',
399-
'listener_read_req_max': 'listener_read_req_max',
400-
'listener_read_res': 'listener_read_res',
401-
'listener_read_res_max': 'listener_read_res_max',
402-
'listener_read_started_res': 'listener_read_started_res',
403-
'listener_read_started_res_max': 'listener_read_started_res_max',
404-
'listener_total_connections_received': 'listener_total_connections_received',
405-
'listener_total_connections_received_max': 'listener_total_connections_received_max',
406-
'listener_total_req': 'listener_total_req',
407-
'listener_total_req_max': 'listener_total_req_max',
408-
'listener_total_res': 'listener_total_res',
409-
'listener_total_res_max': 'listener_total_res_max',
410-
'listener_total_started_res': 'listener_total_started_res',
411-
'listener_total_started_res_max': 'listener_total_started_res_max',
412-
'listener_write_req': 'listener_write_req',
413-
'listener_write_req_max': 'listener_write_req_max',
414-
'listener_write_res': 'listener_write_res',
415-
'listener_write_res_max': 'listener_write_res_max',
416-
'listener_write_started_res': 'listener_write_started_res',
417-
'listener_write_started_res_max': 'listener_write_started_res_max',
418-
}
419399

420400
DEFAULT_METRICS = [
421401
REDIS_CLUSTER,
@@ -427,6 +407,7 @@
427407

428408
ADDITIONAL_METRICS = {
429409
'REDIS2.REPLICATION': REDIS_REPLICATION,
410+
'REDIS2.SHARDREPL': REDIS_SHARD_REPLICATION,
430411
'REDIS2.LDAP': REDIS_LDAP,
431412
'REDIS2.NETWORK': REDIS_NETWORK,
432413
'REDIS2.MEMORY': REDIS_MEMORY,

redis_enterprise/metadata.csv

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,56 @@ rdse2.promhttp_metric_handler_requests_in_flight,gauge,30,request,,Current numbe
285285
rdse2.promhttp_metric_handler_requests_total,gauge,30,request,,Total number of scrapes by HTTP status code.,1,redis_enterprise_veetwo,promhttp_metric_handler_requests_total,
286286
rdse2.proxy_accepted_connections,connection,30,connection,,Number of incoming accepted client connections,1,redis_enterprise_veetwo,proxy_accepted_connections,
287287
rdse2.proxy_dispatch_failures,gauge,30,second,,Number of client closed due to failure to be dispatched to workers,1,redis_enterprise_veetwo,proxy_dispatch_failures,
288+
rdse2.redis_server_active_defrag_running,gauge,30,status,,Automatic memory defragmentation current aggressiveness (% cpu),1,redis_enterprise_veetwo,redis_server_active_defrag_running,
289+
rdse2.redis_server_allocator_active,gauge,30,byte,,Total used memory, including external fragmentation,1,redis_enterprise_veetwo,redis_server_allocator_active,
290+
rdse2.redis_server_allocator_allocated,gauge,30,byte,,Total allocated memory,1,redis_enterprise_veetwo,redis_server_allocator_allocated,
291+
rdse2.redis_server_allocator_resident,gauge,30,byte,,Total resident memory (RSS),1,redis_enterprise_veetwo,redis_server_allocator_resident,
292+
rdse2.redis_server_aof_last_cow_size,gauge,30,byte,,Last AOFR, CopyOnWrite memory,1,redis_enterprise_veetwo,redis_server_aof_last_cow_size,
293+
rdse2.redis_server_aof_rewrite_in_progress,gauge,30,status,,The number of simultaneous AOF rewrites that are in progress,1,redis_enterprise_veetwo,redis_server_aof_rewrite_in_progress,
294+
rdse2.redis_server_aof_rewrites,gauge,30,event,,Number of AOF rewrites this process executed,1,redis_enterprise_veetwo,redis_server_aof_rewrites,
295+
rdse2.redis_server_aof_delayed_fsync,gauge,30,event,,Number of times an AOF fsync caused delays in the main Redis thread (inducing latency); this can indicate that the disk is slow or overloaded,1,redis_enterprise_veetwo,redis_server_aof_delayed_fsync,
296+
rdse2.redis_server_blocked_clients,gauge,30,connection,,Count the clients waiting on a blocking call,1,redis_enterprise_veetwo,redis_server_blocked_clients,
297+
rdse2.redis_server_connected_clients,gauge,30,connection,,Number of client connections to the specific shard,1,redis_enterprise_veetwo,redis_server_connected_clients,
298+
rdse2.redis_server_connected_slaves,gauge,30,connection,,Number of connected replicas,1,redis_enterprise_veetwo,redis_server_connected_slaves,
299+
rdse2.redis_server_db0_avg_ttl,gauge,30,time,,Average TTL of all volatile keys,1,redis_enterprise_veetwo,redis_server_db0_avg_ttl,
300+
rdse2.redis_server_expired_keys,gauge,30,key,,Total count of volatile keys,1,redis_enterprise_veetwo,redis_server_expired_keys,
301+
rdse2.redis_server_db0_keys,gauge,30,key,,Total key count,1,redis_enterprise_veetwo,redis_server_db0_keys,
302+
rdse2.redis_server_evicted_keys,gauge,30,key,,Keys evicted so far (since restart),1,redis_enterprise_veetwo,redis_server_evicted_keys,
303+
rdse2.redis_server_expire_cycle_cpu_milliseconds,gauge,30,ms,,The cumulative amount of time spent on active expiry cycles,1,redis_enterprise_veetwo,redis_server_expire_cycle_cpu_milliseconds,
304+
rdse2.redis_server_expired_keys,gauge,30,key,,Keys expired so far (since restart),1,redis_enterprise_veetwo,redis_server_expired_keys,
305+
rdse2.redis_server_forwarding_state,gauge,30,status,,Shard forwarding state (on or off),1,redis_enterprise_veetwo,redis_server_forwarding_state,
306+
rdse2.redis_server_keys_trimmed,gauge,30,key,,The number of keys that were trimmed in the current or last resharding process,1,redis_enterprise_veetwo,redis_server_keys_trimmed,
307+
rdse2.redis_server_keyspace_read_hits,gauge,30,hits,,Number of read operations accessing an existing keyspace,1,redis_enterprise_veetwo,redis_server_keyspace_read_hits,
308+
rdse2.redis_server_keyspace_read_misses,gauge,30,miss,,Number of read operations accessing a non-existing keyspace,1,redis_enterprise_veetwo,redis_server_keyspace_read_misses,
309+
rdse2.redis_server_keyspace_write_hits,gauge,30,hit,,Number of write operations accessing an existing keyspace,1,redis_enterprise_veetwo,redis_server_keyspace_write_hits,
310+
rdse2.redis_server_keyspace_write_misses,gauge,30,miss,,Number of write operations accessing a non-existing keyspace,1,redis_enterprise_veetwo,redis_server_keyspace_write_misses,
311+
rdse2.redis_server_master_link_status,gauge,30,status,,Indicates if the replica is connected to its master,1,redis_enterprise_veetwo,redis_server_master_link_status,
312+
rdse2.redis_server_master_repl_offset,gauge,30,byte,,Number of bytes sent to replicas by the shard; calculate the throughput for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_master_repl_offset,
313+
rdse2.redis_server_master_sync_in_progress,gauge,30,status,,The master shard is synchronizing (1 true,1,redis_enterprise_veetwo,redis_server_master_sync_in_progress,
314+
rdse2.redis_server_max_process_mem,gauge,30,byte,,Current memory limit configured by redis_mgr according to node free memory,1,redis_enterprise_veetwo,redis_server_max_process_mem,
315+
rdse2.redis_server_maxmemory,gauge,30,byte,,Current memory limit configured by redis_mgr according to database memory limits,1,redis_enterprise_veetwo,redis_server_maxmemory,
316+
rdse2.redis_server_mem_aof_buffer,gauge,30,byte,,Current size of AOF buffer,1,redis_enterprise_veetwo,redis_server_mem_aof_buffer,
317+
rdse2.redis_server_mem_clients_normal,gauge,30,session,,Current memory used for input and output buffers of non-replica clients,1,redis_enterprise_veetwo,redis_server_mem_clients_normal,
318+
rdse2.redis_server_mem_clients_slaves,gauge,30,session,,Current memory used for input and output buffers of replica clients,1,redis_enterprise_veetwo,redis_server_mem_clients_slaves,
319+
rdse2.redis_server_mem_fragmentation_ratio,gauge,30,percent,,Memory fragmentation ratio (1.3 means 30% overhead),1,redis_enterprise_veetwo,redis_server_mem_fragmentation_ratio,
320+
rdse2.redis_server_mem_not_counted_for_evict,gauge,30,byte,,Portion of used_memory (in bytes) that's not counted for eviction and OOM error,1,redis_enterprise_veetwo,redis_server_mem_not_counted_for_evict,
321+
rdse2.redis_server_mem_replication_backlog,gauge,30,byte,,Size of replication backlog,1,redis_enterprise_veetwo,redis_server_mem_replication_backlog,
322+
rdse2.redis_server_module_fork_in_progress,gauge,30,status,,A binary value that indicates if there is an active fork spawned by a module (1) or not (0),1,redis_enterprise_veetwo,redis_server_module_fork_in_progress,
323+
rdse2.namedprocess_namegroup_cpu_seconds_total,count,30,second,,Shard process CPU usage percentage,1,redis_enterprise_veetwo,namedprocess_namegroup_cpu_seconds_total,
324+
rdse2.namedprocess_namegroup_thread_cpu_seconds_total,count,30,second,,Shard main thread CPU time spent in seconds,1,redis_enterprise_veetwo,namedprocess_namegroup_thread_cpu_seconds_total,
325+
rdse2.namedprocess_namegroup_open_filedesc,gauge,30,inode,,Shard number of open file descriptors,1,redis_enterprise_veetwo,namedprocess_namegroup_open_filedesc,
326+
rdse2.namedprocess_namegroup_memory_bytes,gauge,30,byte,,Shard memory size in bytes,1,redis_enterprise_veetwo,namedprocess_namegroup_memory_bytes,
327+
rdse2.namedprocess_namegroup_oldest_start_time_seconds,gauge,30,second,,Shard start time of the process since unix epoch in seconds,1,redis_enterprise_veetwo,namedprocess_namegroup_oldest_start_time_seconds,
328+
rdse2.redis_server_rdb_bgsave_in_progress,gauge,30,status,,Indication if bgsave is currently in progress,1,redis_enterprise_veetwo,redis_server_rdb_bgsave_in_progress,
329+
rdse2.redis_server_rdb_last_cow_size,gauge,30,byte,,Last bgsave (or SYNC fork) used CopyOnWrite memory,1,redis_enterprise_veetwo,redis_server_rdb_last_cow_size,
330+
rdse2.redis_server_rdb_saves,gauge,30,event,,Total count of bgsaves since the process was restarted (including replica fullsync and persistence),1,redis_enterprise_veetwo,redis_server_rdb_saves,
331+
rdse2.redis_server_repl_touch_bytes,gauge,30,byte,,Number of bytes sent to replicas as TOUCH commands by the shard as a result of a READ command that was processed; calculate the throughput for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_repl_touch_bytes,
332+
rdse2.redis_server_total_commands_processed,gauge,30,connection,,Number of commands processed by the shard; calculate the number of commands for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_total_commands_processed,
333+
rdse2.redis_server_total_connections_received,gauge,30,connection,,Number of connections received by the shard; calculate the number of connections for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_total_connections_received,
334+
rdse2.redis_server_total_net_input_bytes,gauge,30,byte,,Number of bytes received by the shard; calculate the throughput for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_total_net_input_bytes,
335+
rdse2.redis_server_total_net_output_bytes,gauge,30,byte,,Number of bytes sent by the shard; calculate the throughput for a time period by comparing the value at different times,1,redis_enterprise_veetwo,redis_server_total_net_output_bytes,
336+
rdse2.redis_server_up,gauge,30,status,,Shard is up and running,1,redis_enterprise_veetwo,redis_server_up,
337+
rdse2.redis_server_used_memory,gauge,30,byte,,Memory used by shard (in BigRedis this includes flash) (bytes),1,redis_enterprise_veetwo,redis_server_used_memory,
288338
rdse2.statsd_exporter_build_info,gauge,30,second,,"A metric with a constant '1' value labeled by version, revision, branch, and goversion from which statsd_exporter was built.",1,redis_enterprise_veetwo,statsd_exporter_build_info,
289339
rdse2.statsd_exporter_event_queue_flushed_total,gauge,30,event,,Number of times events were flushed to exporter,1,redis_enterprise_veetwo,statsd_exporter_event_queue_flushed_total,
290340
rdse2.statsd_exporter_events_total,gauge,30,event,,The total number of StatsD events seen.,1,redis_enterprise_veetwo,statsd_exporter_events_total,

0 commit comments

Comments
 (0)