diff --git a/src/current/_data/v25.2/metrics/metrics.yml b/src/current/_data/v25.2/metrics/metrics-cloud.yml similarity index 100% rename from src/current/_data/v25.2/metrics/metrics.yml rename to src/current/_data/v25.2/metrics/metrics-cloud.yml diff --git a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv deleted file mode 100644 index 1cd86aace0a..00000000000 --- a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.csv +++ /dev/null @@ -1,19 +0,0 @@ -metric_id,description,y-axis label,type,unit -"security.certificate.expiration.ca","Expiration for the CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client-ca","Expiration for the client CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client","Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ui-ca","Expiration for the UI CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.node","Expiration for the node certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.node-client","Expiration for the node's client certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ui","Expiration for the UI certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.ca-client-tenant","Expiration for the Tenant Client CA certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.expiration.client-tenant","Expiration for the Tenant Client certificate. 0 means no certificate or error.","Certificate Expiration",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ca","Seconds till expiration for the CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client-ca","Seconds till expiration for the client CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client","Seconds till expiration for the client certificates, labeled by SQL user. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ui-ca","Seconds till expiration for the UI CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.node","Seconds till expiration for the node certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.node-client","Seconds till expiration for the node's client certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ui","Seconds till expiration for the UI certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.ca-client-tenant","Seconds till expiration for the Tenant Client CA certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC -"security.certificate.ttl.client-tenant","Seconds till expiration for the Tenant Client certificate. 0 means expired, no certificate or error.","Certificate TTL",GAUGE,TIMESTAMP_SEC \ No newline at end of file diff --git a/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml new file mode 100644 index 00000000000..2467c98e316 --- /dev/null +++ b/src/current/_data/v25.3/metrics/available-metrics-not-in-metrics-list.yaml @@ -0,0 +1,133 @@ +- metric_id: seconds.until.enterprise.license.expiry + description: Seconds until enterprise license expiry (0 if no license present or running without enterprise features) + y-axis label: License Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ca + labeled_name: 'security.certificate.expiration{certificate_type=ca}' + description: Expiration for the CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client-ca + labeled_name: 'security.certificate.expiration{certificate_type=client-ca}' + description: Expiration for the client CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client + labeled_name: 'security.certificate.expiration{certificate_type=client}' + description: Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ui-ca + labeled_name: 'security.certificate.expiration{certificate_type=ui-ca}' + description: Expiration for the UI CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.node + labeled_name: 'security.certificate.expiration{certificate_type=node}' + description: Expiration for the node certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.node-client + labeled_name: 'security.certificate.expiration{certificate_type=node-client}' + description: Expiration for the node's client certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ui + labeled_name: 'security.certificate.expiration{certificate_type=ui}' + description: Expiration for the UI certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.ca-client-tenant + labeled_name: 'security.certificate.expiration{certificate_type=ca-client-tenant}' + description: Expiration for the Tenant Client CA certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.expiration.client-tenant + labeled_name: 'security.certificate.expiration{certificate_type=client-tenant}' + description: Expiration for the Tenant Client certificate. 0 means no certificate or error. + y-axis label: Certificate Expiration + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. + essential: true +- metric_id: security.certificate.ttl.ca + description: Seconds till expiration for the CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client-ca + description: Seconds till expiration for the client CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client + description: Seconds till expiration for the client certificates, labeled by SQL user. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ui-ca + description: Seconds till expiration for the UI CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.node + description: Seconds till expiration for the node certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.node-client + description: Seconds till expiration for the node's client certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ui + description: Seconds till expiration for the UI certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.ca-client-tenant + description: Seconds till expiration for the Tenant Client CA certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. +- metric_id: security.certificate.ttl.client-tenant + description: Seconds till expiration for the Tenant Client certificate. 0 means expired, no certificate or error. + y-axis label: Certificate TTL + type: GAUGE + unit: TIMESTAMP_SEC + how_to_use: See Description. diff --git a/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv b/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv new file mode 100644 index 00000000000..fcca1607102 --- /dev/null +++ b/src/current/_data/v25.3/metrics/datadog-cockroachdb.csv @@ -0,0 +1,2779 @@ +prefix,datadog_id,type,description +cockroachdb,abortspanbytes,gauge,"Number of bytes in the abort span +Shown as byte" +cockroachdb,addsstable.applications,count,"[OpenMetrics v1] Number of SSTable ingestions applied (i.e. applied by Replicas) +Shown as operation" +cockroachdb,addsstable.applications.count,count,"[OpenMetrics v2] Number of SSTable ingestions applied (i.e. applied by Replicas) +Shown as operation" +cockroachdb,addsstable.copies,count,"[OpenMetrics v1] number of SSTable ingestions that required copying files during application +Shown as operation" +cockroachdb,addsstable.copies.count,count,"[OpenMetrics v2] number of SSTable ingestions that required copying files during application +Shown as operation" +cockroachdb,addsstable.delay.count,count,"Amount by which evaluation of AddSSTable requests was delayed +Shown as nanosecond" +cockroachdb,addsstable.delay.enginebackpressure.count,count,"Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure +Shown as nanosecond" +cockroachdb,addsstable.proposals,count,"[OpenMetrics v1] Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) +Shown as operation" +cockroachdb,addsstable.proposals.count,count,"[OpenMetrics v2] Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) +Shown as operation" +cockroachdb,admission.admitted.elastic_cpu.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.elastic_cpu.count,count,Number of requests admitted +cockroachdb,admission.admitted.elastic_cpu.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv,count,"[OpenMetrics v1] Number of KV requests admitted +Shown as request" +cockroachdb,admission.admitted.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.count,count,"[OpenMetrics v2] Number of KV requests admitted +Shown as request" +cockroachdb,admission.admitted.kv.high_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores,count,"[OpenMetrics v1] Number of KV stores requests admitted +Shown as request" +cockroachdb,admission.admitted.kv_stores.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.count,count,"[OpenMetrics v2] Number of KV stores requests admitted +Shown as request" +cockroachdb,admission.admitted.kv_stores.high_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.kv_stores.ttl_low_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.root.start,count,"[OpenMetrics v1] Number of SQL root start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql.root.start.count,count,"[OpenMetrics v2] Number of SQL root start requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_kv_response.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_kv_response.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_leaf_start.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_root_start.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL response requests admitted +Shown as request" +cockroachdb,admission.admitted.sql_sql_response.locking_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.admitted.sql_sql_response.normal_pri.count,count,Number of requests admitted +cockroachdb,admission.elastic_cpu.acquired_nanos.count,count,"Total CPU nanoseconds acquired by elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.available_nanos,gauge,"Instantaneous available CPU nanoseconds per second ignoring utilization limit +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.max_available_nanos.count,count,"Maximum available CPU nanoseconds per second ignoring utilization limit +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.nanos_exhausted_duration,gauge,"Total duration when elastic CPU nanoseconds were exhausted, in micros" +cockroachdb,admission.elastic_cpu.over_limit_durations.bucket,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.over_limit_durations.count,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.over_limit_durations.sum,count,"Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don’t run over) +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.pre_work_nanos.count,count,"Total CPU nanoseconds spent doing pre-work, before doing elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.returned_nanos.count,count,"Total CPU nanoseconds returned by elastic work +Shown as nanosecond" +cockroachdb,admission.elastic_cpu.utilization,gauge,"CPU utilization by elastic work +Shown as percent" +cockroachdb,admission.elastic_cpu.utilization_limit,gauge,"Utilization limit set for the elastic CPU work +Shown as percent" +cockroachdb,admission.errored.elastic_cpu.bulk_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.elastic_cpu.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.elastic_cpu.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.errored.kv.count,count,"[OpenMetrics v1] Number of KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv.countt,count,"[OpenMetrics v2] Number of KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv.high_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.bulk_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.count,count,"[OpenMetrics v1] Number of KV stores requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv_stores.countt,count,"[OpenMetrics v2] Number of KV stores requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.kv_stores.high_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.kv_stores.ttl_low_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.root.start,count,"[OpenMetrics v1] Number of SQL root start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql.root.start.count,count,"[OpenMetrics v2] Number of SQL root start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_kv_response.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_kv_response.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_leaf_start.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_root_start.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL start requests not admitted due to error +Shown as request" +cockroachdb,admission.errored.sql_sql_response.locking_normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.errored.sql_sql_response.normal_pri.count,count,Number of requests not admitted due to error +cockroachdb,admission.granter.cpu_load_long_period_duration.kv.count,count,"Total duration when CPULoad was being called with a long period, in micros" +cockroachdb,admission.granter.cpu_load_short_period_duration.kv.count,count,"Total duration when CPULoad was being called with a short period, in micros" +cockroachdb,admission.granter.elastic_io_tokens_available.kv,gauge,Number of tokens available +cockroachdb,admission.granter.io.tokens.exhausted.duration.kv,count,"[OpenMetrics v1] Total duration when IO tokens were exhausted, in micros +Shown as microsecond" +cockroachdb,admission.granter.io.tokens.exhausted.duration.kv.count,count,"[OpenMetrics v2] Total duration when IO tokens were exhausted, in micros +Shown as microsecond" +cockroachdb,admission.granter.io_tokens_available.kv,gauge,Number of tokens available +cockroachdb,admission.granter.io_tokens_bypassed.kv.count,count,"Total number of tokens taken by work bypassing admission control (for example, follower writes without flow control)" +cockroachdb,admission.granter.io_tokens_exhausted_duration.kv.count,count,"Total duration when IO tokens were exhausted, in micros" +cockroachdb,admission.granter.io_tokens_returned.kv.count,count,Total number of tokens returned +cockroachdb,admission.granter.io_tokens_taken.kv.count,count,Total number of tokens taken +cockroachdb,admission.granter.slot_adjuster_decrements.kv.count,count,Number of decrements of the total KV slots +cockroachdb,admission.granter.slot_adjuster_increments.kv.count,count,Number of increments of the total KV slots +cockroachdb,admission.granter.slots_exhausted_duration.kv.count,count,"Total duration when KV slots were exhausted, in micros" +cockroachdb,admission.granter.total.slots.kv,gauge,[OpenMetrics v1 & v2] Total slots for KV work +cockroachdb,admission.granter.total_slots.kv,gauge,Total slots for kv work +cockroachdb,admission.granter.used.slots.kv,gauge,[OpenMetrics v1 & v2] Used slots for KV work +cockroachdb,admission.granter.used.slots.sql.leaf.start,gauge,[OpenMetrics v1 & v2] Used slots for SQL leaf start work +cockroachdb,admission.granter.used.slots.sql.root.start,gauge,[OpenMetrics v1 & v2] Used slots for SQL root start work +cockroachdb,admission.granter.used_slots.kv,gauge,Used slots +cockroachdb,admission.granter.used_slots.sql_leaf_start,gauge,Used slots +cockroachdb,admission.granter.used_slots.sql_root_start,gauge,Used slots +cockroachdb,admission.io.overload,gauge,1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). +cockroachdb,admission.l0_compacted_bytes.kv.count,count,Total bytes compacted out of L0 (used to generate IO tokens) +cockroachdb,admission.l0_tokens_produced.kv.count,count,Total number of generated token of L0 +cockroachdb,admission.raft.paused_replicas,gauge,"Number of followers (i.e. Replicas) to which replication is currently paused to help them recover from I/O overload.Such Replicas will be ignored for the purposes of proposal quota, and will notreceive replication traffic. They are essentially treated as offline for thepurpose of replication. This serves as a crude form of admission control.The count is emitted by the leaseholder of each range." +cockroachdb,admission.raft.paused_replicas_dropped_msgs.count,count,Number of messages dropped instead of being sent to paused replicas.The messages are dropped to help these replicas to recover from I/O overload. +cockroachdb,admission.requested.elastic_cpu.bulk_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.elastic_cpu.count,count,Number of requests +cockroachdb,admission.requested.elastic_cpu.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv,count,"[OpenMetrics v1] Number of KV admission requests +Shown as request" +cockroachdb,admission.requested.kv.bulk_normal_pri.count,count,Number of requests admitted +cockroachdb,admission.requested.kv.count,count,"[OpenMetrics v2] Number of KV admission requests +Shown as request" +cockroachdb,admission.requested.kv.high_pri.count,count,Number of requests +cockroachdb,admission.requested.kv.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.bulk_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.count,count,"[OpenMetrics v2] Number of KV stores admission requests +Shown as request" +cockroachdb,admission.requested.kv_stores.high_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.kv_stores.ttl_low_pri.count,count,Number of requests +cockroachdb,admission.requested.sql.leaf.start,count,"[OpenMetrics v1] Number of SQL leaf start admission requests +Shown as request" +cockroachdb,admission.requested.sql.leaf.start.count,count,"[OpenMetrics v2] Number of SQL leaf start admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv.response,count,"[OpenMetrics v1] Number of SQL KV admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv.response.count,count,"[OpenMetrics v2] Number of SQL KV admission requests +Shown as request" +cockroachdb,admission.requested.sql_kv_response.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_kv_response.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_leaf_start.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_leaf_start.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_root_start.normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_sql.response,count,"[OpenMetrics v1] Number of Distributed SQL admission requests +Shown as request" +cockroachdb,admission.requested.sql_sql.response.count,count,"[OpenMetrics v2] Number of Distributed SQL admission requests +Shown as request" +cockroachdb,admission.requested.sql_sql_response.locking_normal_pri.count,count,Number of requests +cockroachdb,admission.requested.sql_sql_response.normal_pri.count,count,Number of requests +cockroachdb,admission.scheduler_latency_listener.p99_nanos,gauge,"The scheduling latency at p99 as observed by the scheduler latency listener +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv,gauge,"[OpenMetrics v1] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.bucket,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.count,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv.sum,count,"[OpenMetrics v2] Wait time durations for KV requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores,gauge,"[OpenMetrics v1] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.bucket,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.count,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.kv_stores.sum,count,"[OpenMetrics v2] Wait time durations for KV stores requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start,gauge,"[OpenMetrics v1] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.bucket,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.count,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql.leaf.start.sum,count,"[OpenMetrics v2] Wait time durations for SQL leaf start requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response,gauge,"[OpenMetrics v1] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.bucket,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.count,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_kv.response.sum,count,"[OpenMetrics v2] Wait time durations for SQL KV response requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response,gauge,"[OpenMetrics v1] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.bucket,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.count,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.durations.sql_sql.response.sum,count,"[OpenMetrics v2] Wait time durations for Distributed SQL requests that waited +Shown as nanosecond" +cockroachdb,admission.wait.queue.length.kv,gauge,[OpenMetrics v1 & v2] Length of KV wait queue +cockroachdb,admission.wait.queue.length.kv_stores,gauge,[OpenMetrics v1 & v2] Length of KV stores wait queue +cockroachdb,admission.wait.queue.length.sql.leaf.start,gauge,[OpenMetrics v1 & v2] Length of SQL leaf start wait queue +cockroachdb,admission.wait.queue.length.sql_kv.response,gauge,[OpenMetrics v1 & v2] Length of SQL KV wait queue +cockroachdb,admission.wait.queue.length.sql_sql.response,gauge,[OpenMetrics v1 & v2] Length of Distributed SQL wait queue +cockroachdb,admission.wait.queue.lengths.sql.root.start,gauge,[OpenMetrics v1 & v2] Length of SQL root start wait queue +cockroachdb,admission.wait.sum.kv,count,"[OpenMetrics v1] Total KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv.count,count,"[OpenMetrics v2] Total KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv_stores,count,"[OpenMetrics v1] Total KV stores wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.kv_stores.count,count,"[OpenMetrics v2] Total KV stores wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql.root.start,count,"[OpenMetrics v1] Total SQL root start wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql.root.start.count,count,"[OpenMetrics v2] Total SQL root start wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_kv.response,count,"[OpenMetrics v1] Total SQL KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_kv.response.count,count,"[OpenMetrics v2] Total SQL KV wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_sql.response,count,"[OpenMetrics v1] Total Distributed SQL wait time in micros +Shown as microsecond" +cockroachdb,admission.wait.sum.sql_sql.response.count,count,"[OpenMetrics v2] Total Distributed SQL wait time in micros +Shown as microsecond" +cockroachdb,admission.wait_durations.elastic_cpu.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.bulk_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.elastic_cpu.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.bulk_normal_pri,count,Number of requests admitted +cockroachdb,admission.wait_durations.kv.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.high_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.bulk_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.high_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.kv_stores.ttl_low_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_kv_response.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_leaf_start.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_root_start.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.locking_normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.bucket,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.count,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.normal_pri.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_durations.sql_sql_response.sum,count,"Wait time durations for requests that waited +Shown as nanosecond" +cockroachdb,admission.wait_queue_length.elastic_cpu,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.elastic_cpu.bulk_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.elastic_cpu.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.bulk_normal_pri,count,Number of requests admitted +cockroachdb,admission.wait_queue_length.kv.high_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.bulk_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.high_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.kv_stores.ttl_low_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_kv_response.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_leaf_start.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_root_start.normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response.locking_normal_pri,gauge,Length of wait queue +cockroachdb,admission.wait_queue_length.sql_sql_response.normal_pri,gauge,Length of wait queue +cockroachdb,backup.last_failed_time.kms_inaccessible,gauge,The unix timestamp of the most recent failure of backup due to errKMSInaccessible by a backup specified as maintaining this metric +cockroachdb,batch_requests.bytes.count,count,"Total byte count of batch requests processed +Shown as byte" +cockroachdb,batch_requests.cross_region.bytes.count,count,"Total byte count of batch requests processed cross region when region tiers are configured +Shown as byte" +cockroachdb,batch_requests.cross_zone.bytes.count,count,"Total byte count of batch requests processed cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data sent between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,batch_responses.bytes.count,count,"Total byte count of batch responses received +Shown as byte" +cockroachdb,batch_responses.cross_region.bytes.count,count,"Total byte count of batch responses received cross region when region tiers are configured +Shown as byte" +cockroachdb,batch_responses.cross_zone.bytes.count,count,"Total byte count of batch responses received cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data received between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,build.timestamp,gauge,"[OpenMetrics v1 & v2] Build information +Shown as time" +cockroachdb,capacity.available,gauge,"[OpenMetrics v1 & v2] Available storage capacity +Shown as byte" +cockroachdb,capacity.reserved,gauge,"[OpenMetrics v1 & v2] Capacity reserved for snapshots +Shown as byte" +cockroachdb,capacity.total,gauge,"[OpenMetrics v1 & v2] Total storage capacity +Shown as byte" +cockroachdb,capacity.used,gauge,"[OpenMetrics v1 & v2] Used storage capacity +Shown as byte" +cockroachdb,changefeed.admit.latency,gauge,"[OpenMetrics v1] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.bucket,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.count,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.admit.latency.sum,count,"[OpenMetrics v2] Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline +Shown as nanosecond" +cockroachdb,changefeed.aggregator_progress,gauge,The earliest timestamp up to which any aggregator is guaranteed to have emitted all values for +cockroachdb,changefeed.backfill,gauge,[OpenMetrics v1 & v2] Number of changefeeds currently executing backfill +cockroachdb,changefeed.backfill.pending.ranges,gauge,[OpenMetrics v1 & v2] Number of ranges in an ongoing backfill that are yet to be fully emitted +cockroachdb,changefeed.backfill_count,gauge,Number of changefeeds currently executing backfill +cockroachdb,changefeed.batch_reduction_count,gauge,Number of times a changefeed aggregator node attempted to reduce the size of message batches it emitted to the sink +cockroachdb,changefeed.buffer_entries.allocated_mem,gauge,"Current quota pool memory allocation +Shown as byte" +cockroachdb,changefeed.buffer_entries.flush.count,count,Number of flush elements added to the buffer +cockroachdb,changefeed.buffer_entries.in.count,count,Total entries entering the buffer between raft and changefeed sinks +cockroachdb,changefeed.buffer_entries.kv.count,count,Number of kv elements added to the buffer +cockroachdb,changefeed.buffer_entries.out.count,count,Total entries leaving the buffer between raft and changefeed sinks +cockroachdb,changefeed.buffer_entries.released.count,count,"Total entries processed, emitted and acknowledged by the sinks" +cockroachdb,changefeed.buffer_entries.resolved.count,count,Number of resolved elements added to the buffer +cockroachdb,changefeed.buffer_entries_mem.acquired.count,count,Total amount of memory acquired for entries as they enter the system +cockroachdb,changefeed.buffer_entries_mem.released.count,count,Total amount of memory released by the entries after they have been emitted +cockroachdb,changefeed.buffer_pushback.count,count,"Total time spent waiting while the buffer was full +Shown as nanosecond" +cockroachdb,changefeed.bytes.messages_pushback.count,count,"Total time spent throttled for bytes quota +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.bucket,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.count,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_hist_nanos.sum,count,"Time spent checkpointing changefeed progress +Shown as nanosecond" +cockroachdb,changefeed.checkpoint_progress,gauge,The earliest timestamp of any changefeed’s persisted checkpoint (values prior to this timestamp will never need to be re-emitted) +cockroachdb,changefeed.cloudstorage_buffered_bytes,gauge,The number of bytes buffered in cloudstorage sink files which have not been emitted yet +cockroachdb,changefeed.commit.latency,gauge,"[OpenMetrics v1] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.bucket,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.count,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.commit.latency.sum,count,"[OpenMetrics v2] Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink +Shown as nanosecond" +cockroachdb,changefeed.emitted.messages,count,[OpenMetrics v1] Messages emitted by all feeds +cockroachdb,changefeed.emitted.messages.count,count,[OpenMetrics v2] Messages emitted by all feeds +cockroachdb,changefeed.emitted_bytes,count,"Bytes emitted by all feeds +Shown as byte" +cockroachdb,changefeed.emitted_bytes.count,count,Bytes emitted by all feeds +cockroachdb,changefeed.emitted_messages,count,Messages emitted by all feeds +cockroachdb,changefeed.error.retries,count,[OpenMetrics v1] Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.error.retries.count,count,[OpenMetrics v2] Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.error_retries,count,Total retryable errors encountered by all changefeeds +cockroachdb,changefeed.failures,count,[OpenMetrics v1] Total number of changefeed jobs which have failed +cockroachdb,changefeed.failures.count,count,[OpenMetrics v2] Total number of changefeed jobs which have failed +cockroachdb,changefeed.filtered_messages.count,count,Messages filtered out by all feeds. This count does not include the number of messages that may be filtered due to the range constraints. +cockroachdb,changefeed.flush.messages_pushback.count,count,"Total time spent throttled for flush quota +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.bucket,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.count,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flush_hist_nanos.sum,count,"Time spent flushing messages across all changefeeds +Shown as nanosecond" +cockroachdb,changefeed.flushed_bytes.count,count,"Bytes emitted by all feeds; maybe different from changefeed.emitted_bytes when compression is enabled +Shown as byte" +cockroachdb,changefeed.flushes.count,count,Total flushes across all feeds +cockroachdb,changefeed.forwarded_resolved_messages.count,count,Resolved timestamps forwarded from the change aggregator to the change frontier +cockroachdb,changefeed.frontier_updates.count,count,Number of change frontier updates across all feeds +cockroachdb,changefeed.internal_retry_message,gauge,Number of messages for which an attempt to retry them within an aggregator node was made +cockroachdb,changefeed.lagging_ranges,gauge,The number of ranges considered to be lagging behind +cockroachdb,changefeed.max.behind.nanos,gauge,[OpenMetrics v1 & v2] Largest commit-to-emit duration of any running feed +cockroachdb,changefeed.max_behind_nanos,gauge,"(Deprecated in favor of checkpoint_progress) The most any changefeed’s persisted checkpoint is behind the present +Shown as nanosecond" +cockroachdb,changefeed.message.size.hist,gauge,[OpenMetrics v1] Message size histogram +cockroachdb,changefeed.message.size.hist.bucket,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message.size.hist.count,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message.size.hist.sum,count,[OpenMetrics v2] Message size histogram +cockroachdb,changefeed.message_size_hist.bucket,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.message_size_hist.count,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.message_size_hist.sum,count,"Message size histogram +Shown as byte" +cockroachdb,changefeed.messages.messages_pushback.count,count,"Total time spent throttled for messages quota +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.bucket,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.count,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_consume_event_nanos.sum,count,"Total time spent waiting to add an event to the parallel consumer +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.bucket,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.count,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_flush_nanos.sum,count,"Total time spent idle waiting for the parallel consumer to flush +Shown as nanosecond" +cockroachdb,changefeed.nprocs_in_flight,gauge,Number of buffered events in the parallel consumer +cockroachdb,changefeed.parallel_io_queue_nanos.bucket,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.parallel_io_queue_nanos.count,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.parallel_io_queue_nanos.sum,count,"Time spent with outgoing requests to the sink waiting in queue due to inflight requests with conflicting keys +Shown as nanosecond" +cockroachdb,changefeed.queue_time.count,count,"Time KV event spent waiting to be processed +Shown as nanosecond" +cockroachdb,changefeed.running,gauge,"[OpenMetrics v1 & v2] Number of currently running changefeeds, including sinkless" +cockroachdb,changefeed.schema_registry.registrations.count,count,Number of registration attempts with the schema registry +cockroachdb,changefeed.schema_registry.retry.count,count,Number of retries encountered when sending requests to the schema registry +cockroachdb,changefeed.schemafeed.table_history_scans.count,count,The number of table history scans during polling +cockroachdb,changefeed.schemafeed.table_metadata.count,count,"Time blocked while verifying table metadata histories +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.bucket,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.count,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_batch_hist_nanos.sum,count,"Time spent batched in the sink buffer before being flushed and acknowledged +Shown as nanosecond" +cockroachdb,changefeed.sink_io_inflight,gauge,The number of keys currently inflight as IO requests being sent to the sink +cockroachdb,changefeed.size_based_flushes.count,count,Total size based flushes across all feeds +cockroachdb,clock.offset.meannanos,gauge,"[OpenMetrics v1 & v2] Mean clock offset with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,clock.offset.stddevnanos,gauge,"[OpenMetrics v1 & v2] Stdddev clock offset with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,cloud.read_bytes.count,count,"Number of bytes read +Shown as byte" +cockroachdb,cloud.write_bytes.count,count,"Number of bytes read +Shown as byte" +cockroachdb,cluster.preserve_downgrade_option.last_updated,gauge,Timestamp of the last time the preserve_downgrade_option was updated +cockroachdb,compactor.compactingnanos,count,"[OpenMetrics v1] Number of nanoseconds spent compacting ranges +Shown as nanosecond" +cockroachdb,compactor.compactingnanos.count,count,"[OpenMetrics v2] Number of nanoseconds spent compacting ranges +Shown as nanosecond" +cockroachdb,compactor.compactions.failure,count,"[OpenMetrics v1] Number of failed compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.failure.count,count,"[OpenMetrics v2] Number of failed compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.success,count,"[OpenMetrics v1] Number of successful compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.compactions.success.count,count,"[OpenMetrics v2] Number of successful compaction requests sent to the storage engine +Shown as request" +cockroachdb,compactor.suggestionbytes.compacted,count,"[OpenMetrics v1] Number of logical bytes compacted from suggested compactions +Shown as byte" +cockroachdb,compactor.suggestionbytes.compacted.count,count,"[OpenMetrics v2] Number of logical bytes compacted from suggested compactions +Shown as byte" +cockroachdb,compactor.suggestionbytes.queued,gauge,"[OpenMetrics v1 & v2] Number of logical bytes in suggested compactions in the queue +Shown as byte" +cockroachdb,compactor.suggestionbytes.skipped,count,"[OpenMetrics v1] Number of logical bytes in suggested compactions which were not compacted +Shown as byte" +cockroachdb,compactor.suggestionbytes.skipped.count,count,"[OpenMetrics v2] Number of logical bytes in suggested compactions which were not compacted +Shown as byte" +cockroachdb,distsender.batch_requests.cross_region.bytes.count,count,"Total byte count of replica-addressed batch requests processed cross region when region tiers are configured +Shown as byte" +cockroachdb,distsender.batch_requests.cross_zone.bytes.count,count,"Total byte count of replica-addressed batch requests processed cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data sent between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,distsender.batch_requests.replica_addressed.bytes.count,count,"Total byte count of replica-addressed batch requests processed +Shown as byte" +cockroachdb,distsender.batch_responses.cross_region.bytes.count,count,"Total byte count of replica-addressed batch responses received cross region when region tiers are configured +Shown as byte" +cockroachdb,distsender.batch_responses.cross_zone.bytes.count,count,"Total byte count of replica-addressed batch responses received cross zone within the same region when region and zone tiers are configured. However, if the region tiers are not configured, this count may also include batch data received between different regions. Ensuring consistent configuration of region and zone tiers across nodes helps to accurately monitor the data transmitted. +Shown as byte" +cockroachdb,distsender.batch_responses.replica_addressed.bytes.count,count,"Total byte count of replica-addressed batch responses received +Shown as byte" +cockroachdb,distsender.batches.async.sent.count,count,Number of partial batches sent asynchronously +cockroachdb,distsender.batches.async.throttled.count,count,Number of partial batches not sent asynchronously due to throttling +cockroachdb,distsender.batches.count,count,Number of batches processed +cockroachdb,distsender.batches.partial,count,[OpenMetrics v1] Number of partial batches processed +cockroachdb,distsender.batches.partial.count,count,[OpenMetrics v2] Number of partial batches processed +cockroachdb,distsender.batches.total,count,[OpenMetrics v1] Number of batches processed +cockroachdb,distsender.batches.total.count,count,[OpenMetrics v2] Number of batches processed +cockroachdb,distsender.errors.inleasetransferbackoffs.count,count,Number of times backed off due to NotLeaseHolderErrors during lease transfer +cockroachdb,distsender.errors.notleaseholder,count,"[OpenMetrics v1] Number of NotLeaseHolderErrors encountered +Shown as error" +cockroachdb,distsender.errors.notleaseholder.count,count,"[OpenMetrics v2] Number of NotLeaseHolderErrors encountered +Shown as error" +cockroachdb,distsender.rangefeed.catchup_ranges,gauge,Number of ranges in catchup modeThis counts the number of ranges with an active rangefeed that are performing catchup scan. +cockroachdb,distsender.rangefeed.error_catchup_ranges.count,count,Number of ranges in catchup mode which experienced an error +cockroachdb,distsender.rangefeed.restart_ranges.count,count,Number of ranges that were restarted due to transient errors +cockroachdb,distsender.rangefeed.retry.logical_ops_missing.count,count,Number of ranges that encountered retryable LOGICAL_OPS_MISSING error +cockroachdb,distsender.rangefeed.retry.no_leaseholder.count,count,Number of ranges that encountered retryable NO_LEASEHOLDER error +cockroachdb,distsender.rangefeed.retry.node_not_found.count,count,Number of ranges that encountered retryable node not found error +cockroachdb,distsender.rangefeed.retry.raft_snapshot.count,count,Number of ranges that encountered retryable RAFT_SNAPSHOT error +cockroachdb,distsender.rangefeed.retry.range_key_mismatch.count,count,Number of ranges that encountered retryable range key mismatch error +cockroachdb,distsender.rangefeed.retry.range_merged.count,count,Number of ranges that encountered retryable RANGE_MERGED error +cockroachdb,distsender.rangefeed.retry.range_not_found.count,count,Number of ranges that encountered retryable range not found error +cockroachdb,distsender.rangefeed.retry.range_split.count,count,Number of ranges that encountered retryable RANGE_SPLIT error +cockroachdb,distsender.rangefeed.retry.rangefeed_closed.count,count,Number of ranges that encountered retryable RANGEFEED_CLOSED error +cockroachdb,distsender.rangefeed.retry.replica_removed.count,count,Number of ranges that encountered retryable REPLICA_REMOVED error +cockroachdb,distsender.rangefeed.retry.send.count,count,Number of ranges that encountered retryable send error +cockroachdb,distsender.rangefeed.retry.slow_consumer.count,count,Number of ranges that encountered retryable SLOW_CONSUMER error +cockroachdb,distsender.rangefeed.retry.store_not_found.count,count,Number of ranges that encountered retryable store not found error +cockroachdb,distsender.rangefeed.retry.stuck.count,count,Number of ranges that encountered retryable stuck error +cockroachdb,distsender.rangefeed.total_ranges,gauge,Number of ranges executing rangefeedThis counts the number of ranges with an active rangefeed. +cockroachdb,distsender.rangelookups.count,count,Number of range lookups +cockroachdb,distsender.rpc.addsstable.sent.count,count,"Number of AddSSTable requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminchangereplicas.sent.count,count,"Number of AdminChangeReplicas requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminmerge.sent.count,count,"Number of AdminMerge requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminrelocaterange.sent.count,count,"Number of AdminRelocateRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminscatter.sent.count,count,"Number of AdminScatter requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminsplit.sent.count,count,"Number of AdminSplit requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.admintransferlease.sent.count,count,"Number of AdminTransferLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminunsplit.sent.count,count,"Number of AdminUnsplit requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.adminverifyprotectedtimestamp.sent.count,count,"Number of AdminVerifyProtectedTimestamp requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.barrier.sent.count,count,"Number of Barrier requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.checkconsistency.sent.count,count,"Number of CheckConsistency requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.clearrange.sent.count,count,"Number of ClearRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.computechecksum.sent.count,count,"Number of ComputeChecksum requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.conditionalput.sent.count,count,"Number of ConditionalPut requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.delete.sent.count,count,"Number of Delete requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.deleterange.sent.count,count,"Number of DeleteRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.endtxn.sent.count,count,"Number of EndTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.err.ambiguousresulterrtype.count,count,Number of AmbiguousResultErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.batchtimestampbeforegcerrtype.count,count,Number of BatchTimestampBeforeGCErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.communicationerrtype.count,count,Number of CommunicationErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.conditionfailederrtype.count,count,Number of ConditionFailedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.errordetailtype.count,count,Number of ErrorDetailType (tagged by their number) errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.indeterminatecommiterrtype.count,count,Number of IndeterminateCommitErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.integeroverflowerrtype.count,count,Number of IntegerOverflowErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.intentmissingerrtype.count,count,Number of IntentMissingErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.internalerrtype.count,count,Number of InternalErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.invalidleaseerrtype.count,count,Number of InvalidLeaseErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.leaserejectederrtype.count,count,Number of LeaseRejectedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.lockconflicterrtype.count,count,Number of LockConflictErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mergeinprogresserrtype.count,count,Number of MergeInProgressErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mintimestampboundunsatisfiableerrtype.count,count,Number of MinTimestampBoundUnsatisfiableErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.mvcchistorymutationerrtype.count,count,Number of MVCCHistoryMutationErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.nodeunavailableerrtype.count,count,Number of NodeUnavailableErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.notleaseholdererrtype.count,count,Number of NotLeaseHolderErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.oprequirestxnerrtype.count,count,Number of OpRequiresTxnErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.optimisticevalconflictserrtype.count,count,Number of OptimisticEvalConflictsErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.raftgroupdeletederrtype.count,count,Number of RaftGroupDeletedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangefeedretryerrtype.count,count,Number of RangeFeedRetryErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangekeymismatcherrtype.count,count,Number of RangeKeyMismatchErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.rangenotfounderrtype.count,count,Number of RangeNotFoundErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.readwithinuncertaintyintervalerrtype.count,count,Number of ReadWithinUncertaintyIntervalErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.refreshfailederrtype.count,count,Number of RefreshFailedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.replicacorruptionerrtype.count,count,Number of ReplicaCorruptionErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.replicatooolderrtype.count,count,Number of ReplicaTooOldErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.storenotfounderrtype.count,count,Number of StoreNotFoundErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionabortederrtype.count,count,Number of TransactionAbortedErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionpusherrtype.count,count,Number of TransactionPushErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionretryerrtype.count,count,Number of TransactionRetryErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionretrywithprotorefresherrtype.count,count,Number of TransactionRetryWithProtoRefreshErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.transactionstatuserrtype.count,count,Number of TransactionStatusErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.txnalreadyencounterederrtype.count,count,Number of TxnAlreadyEncounteredErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.unsupportedrequesterrtype.count,count,Number of UnsupportedRequestErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.writeintenterrtype.count,count,Number of WriteIntentErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.err.writetooolderrtype.count,count,Number of WriteTooOldErrType errors received replica-bound RPCsThis counts how often error of the specified type was received back from replicasas part of executing possibly range-spanning requests. Failures to reach the targetreplica will be accounted for as ‘roachpb.CommunicationErrType’ and unclassifiederrors as ‘roachpb.InternalErrType’. +cockroachdb,distsender.rpc.export.sent.count,count,"Number of Export requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.gc.sent.count,count,"Number of GC requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.get.sent.count,count,"Number of Get requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.heartbeattxn.sent.count,count,"Number of HeartbeatTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.increment.sent.count,count,"Number of Increment requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.initput.sent.count,count,"Number of InitPut requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.isspanempty.sent.count,count,"Number of IsSpanEmpty requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.leaseinfo.sent.count,count,"Number of LeaseInfo requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.merge.sent.count,count,"Number of Merge requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.migrate.sent.count,count,"Number of Migrate requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.probe.sent.count,count,"Number of Probe requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.pushtxn.sent.count,count,"Number of PushTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.put.sent.count,count,"Number of Put requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.queryintent.sent.count,count,"Number of QueryIntent requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.querylocks.sent.count,count,"Number of QueryLocks requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.queryresolvedtimestamp.sent.count,count,"Number of QueryResolvedTimestamp requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.querytxn.sent.count,count,"Number of QueryTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.rangestats.sent.count,count,"Number of RangeStats requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.recomputestats.sent.count,count,"Number of RecomputeStats requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.recovertxn.sent.count,count,"Number of RecoverTxn requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.refresh.sent.count,count,"Number of Refresh requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.refreshrange.sent.count,count,"Number of RefreshRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.requestlease.sent.count,count,"Number of RequestLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.resolveintent.sent.count,count,"Number of ResolveIntent requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.resolveintentrange.sent.count,count,"Number of ResolveIntentRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.reversescan.sent.count,count,"Number of ReverseScan requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.revertrange.sent.count,count,"Number of RevertRange requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.scan.sent.count,count,"Number of Scan requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.sent.count,count,Number of replica-addressed RPCs sent +cockroachdb,distsender.rpc.sent.local,count,[OpenMetrics v1] Number of local RPCs sent +cockroachdb,distsender.rpc.sent.local.count,count,[OpenMetrics v2] Number of local RPCs sent +cockroachdb,distsender.rpc.sent.nextreplicaerror,count,"[OpenMetrics v1] Number of RPCs sent due to per-replica errors +Shown as error" +cockroachdb,distsender.rpc.sent.nextreplicaerror.count,count,"[OpenMetrics v2] Number of RPCs sent due to per-replica errors +Shown as error" +cockroachdb,distsender.rpc.sent.total,count,[OpenMetrics v1] Number of RPCs sent +cockroachdb,distsender.rpc.sent.total.count,count,[OpenMetrics v2] Number of replica-addressed RPCs sent +cockroachdb,distsender.rpc.subsume.sent.count,count,"Number of Subsume requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.transferlease.sent.count,count,"Number of TransferLease requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.truncatelog.sent.count,count,"Number of TruncateLog requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,distsender.rpc.writebatch.sent.count,count,"Number of WriteBatch requests processed.This counts the requests in batches handed to DistSender, not the RPCssent to individual Ranges as a result." +cockroachdb,exec.error,count,"[OpenMetrics v1] Number of batch KV requests that failed to execute on this node. These are warnings denoting cleanup rather than errors, and can be disregarded as part of operation. +Shown as request" +cockroachdb,exec.error.count,count,"[OpenMetrics v2] Number of batch KV requests that failed to execute on this node. These are warnings denoting cleanup rather than errors, and can be disregarded as part of operation. +Shown as request" +cockroachdb,exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of batch KV requests executed on this node +Shown as nanosecond" +cockroachdb,exec.success,count,"[OpenMetrics v1] Number of batch KV requests executed successfully on this node +Shown as request" +cockroachdb,exec.success.count,count,"[OpenMetrics v2] Number of batch KV requests executed successfully on this node +Shown as request" +cockroachdb,exportrequest.delay.count,count,Number of Export requests delayed due to concurrent requests. +cockroachdb,follower_reads.success_count.count,count,Number of successful follower reads +cockroachdb,gcbytesage,gauge,"[OpenMetrics v1 & v2] Cumulative age of non-live data in seconds +Shown as second" +cockroachdb,gossip.bytes.received,count,"[OpenMetrics v1] Number of received gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.received.count,count,"[OpenMetrics v2] Number of received gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.sent,count,"[OpenMetrics v1] Number of sent gossip bytes +Shown as byte" +cockroachdb,gossip.bytes.sent.count,count,"[OpenMetrics v2] Number of sent gossip bytes +Shown as byte" +cockroachdb,gossip.connections.incoming,gauge,"[OpenMetrics v1 & v2] Number of active incoming gossip connections +Shown as connection" +cockroachdb,gossip.connections.outgoing,gauge,"[OpenMetrics v1 & v2] Number of active outgoing gossip connections +Shown as connection" +cockroachdb,gossip.connections.refused,count,"[OpenMetrics v1] Number of refused incoming gossip connections +Shown as connection" +cockroachdb,gossip.connections.refused.count,count,"[OpenMetrics v2] Number of refused incoming gossip connections +Shown as connection" +cockroachdb,gossip.infos.received,count,[OpenMetrics v1] Number of received gossip Info objects +cockroachdb,gossip.infos.received.count,count,[OpenMetrics v2] Number of received gossip Info objects +cockroachdb,gossip.infos.sent,count,[OpenMetrics v1] Number of sent gossip Info objects +cockroachdb,gossip.infos.sent.count,count,[OpenMetrics v2] Number of sent gossip Info objects +cockroachdb,intentage,gauge,"[OpenMetrics v1 & v2] Cumulative age of intents in seconds +Shown as second" +cockroachdb,intentbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes in intent KV pairs +Shown as byte" +cockroachdb,intentcount,gauge,"[OpenMetrics v1 & v2] Count of intent keys +Shown as key" +cockroachdb,intentresolver.async.throttled,count,Number of intent resolution attempts not run asynchronously due to throttling +cockroachdb,intentresolver.async.throttled.count,count,Number of intent resolution attempts not run asynchronously due to throttling +cockroachdb,intentresolver.finalized_txns.failed,count,Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. +cockroachdb,intentresolver.finalized_txns.failed.count,count,Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. +cockroachdb,intentresolver.intents.failed,count,"Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch." +cockroachdb,intentresolver.intents.failed.count,count,"Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch." +cockroachdb,intents.abort_attempts,count,Count of (point or range) non-poisoning intent abort evaluation attempts +cockroachdb,intents.abort_attempts.count,count,Count of (point or range) non-poisoning intent abort evaluation attempts +cockroachdb,intents.poison_attempts,count,Count of (point or range) poisoning intent abort evaluation attempts +cockroachdb,intents.poison_attempts.count,count,Count of (point or range) poisoning intent abort evaluation attempts +cockroachdb,intents.resolve_attempts,count,Count of (point or range) intent commit evaluation attempts +cockroachdb,intents.resolve_attempts.count,count,Count of (point or range) intent commit evaluation attempts +cockroachdb,jobs.adopt_iterations.count,count,number of job-adopt iterations performed by the registry +cockroachdb,jobs.auto.create.stats.currently_paused,gauge,Number of auto_create_stats jobs currently considered Paused +cockroachdb,jobs.auto.create.stats.currently_running,gauge,Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto.create.stats.resume_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_env_runner.currently_idle,gauge,Number of auto_config_env_runner jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_env_runner.currently_paused,gauge,Number of auto_config_env_runner jobs currently considered Paused +cockroachdb,jobs.auto_config_env_runner.currently_running,gauge,Number of auto_config_env_runner jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_env_runner.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_env_runner jobs +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_completed.count,count,Number of auto_config_env_runner jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_failed.count,count,Number of auto_config_env_runner jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.fail_or_cancel_retry_error.count,count,Number of auto_config_env_runner jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_env_runner.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_env_runner jobs +Shown as second" +cockroachdb,jobs.auto_config_env_runner.protected_record_count,gauge,Number of protected timestamp records held by auto_config_env_runner jobs +cockroachdb,jobs.auto_config_env_runner.resume_completed.count,count,Number of auto_config_env_runner jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_env_runner.resume_failed.count,count,Number of auto_config_env_runner jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_env_runner.resume_retry_error.count,count,Number of auto_config_env_runner jobs which failed with a retriable error +cockroachdb,jobs.auto_config_runner.currently_idle,gauge,Number of auto_config_runner jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_runner.currently_paused,gauge,Number of auto_config_runner jobs currently considered Paused +cockroachdb,jobs.auto_config_runner.currently_running,gauge,Number of auto_config_runner jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_runner.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_runner jobs +cockroachdb,jobs.auto_config_runner.fail_or_cancel_completed.count,count,Number of auto_config_runner jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_runner.fail_or_cancel_failed.count,count,Number of auto_config_runner jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_runner.fail_or_cancel_retry_error.count,count,Number of auto_config_runner jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_runner.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_runner jobs +Shown as second" +cockroachdb,jobs.auto_config_runner.protected_record_count,gauge,Number of protected timestamp records held by auto_config_runner jobs +cockroachdb,jobs.auto_config_runner.resume_completed.count,count,Number of auto_config_runner jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_runner.resume_failed.count,count,Number of auto_config_runner jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_runner.resume_retry_error.count,count,Number of auto_config_runner jobs which failed with a retriable error +cockroachdb,jobs.auto_config_task.currently_idle,gauge,Number of auto_config_task jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_config_task.currently_paused,gauge,Number of auto_config_task jobs currently considered Paused +cockroachdb,jobs.auto_config_task.currently_running,gauge,Number of auto_config_task jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_config_task.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_config_task jobs +cockroachdb,jobs.auto_config_task.fail_or_cancel_completed.count,count,Number of auto_config_task jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_config_task.fail_or_cancel_failed.count,count,Number of auto_config_task jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_task.fail_or_cancel_retry_error.count,count,Number of auto_config_task jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_config_task.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_config_task jobs +Shown as second" +cockroachdb,jobs.auto_config_task.protected_record_count,gauge,Number of protected timestamp records held by auto_config_task jobs +cockroachdb,jobs.auto_config_task.resume_completed.count,count,Number of auto_config_task jobs which successfully resumed to completion +cockroachdb,jobs.auto_config_task.resume_failed.count,count,Number of auto_config_task jobs which failed with a non-retriable error +cockroachdb,jobs.auto_config_task.resume_retry_error.count,count,Number of auto_config_task jobs which failed with a retriable error +cockroachdb,jobs.auto_create_stats.currently_idle,gauge,Number of auto_create_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_create_stats.currently_paused,gauge,Number of auto_create_stats jobs currently considered Paused +cockroachdb,jobs.auto_create_stats.currently_running,gauge,Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_create_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_create_stats jobs +cockroachdb,jobs.auto_create_stats.fail_or_cancel_completed.count,count,Number of auto_create_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_create_stats.fail_or_cancel_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_create_stats.fail_or_cancel_retry_error.count,count,Number of auto_create_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_create_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_create_stats jobs +Shown as second" +cockroachdb,jobs.auto_create_stats.protected_record_count,gauge,Number of protected timestamp records held by auto_create_stats jobs +cockroachdb,jobs.auto_create_stats.resume_completed.count,count,Number of auto_create_stats jobs which successfully resumed to completion +cockroachdb,jobs.auto_create_stats.resume_failed.count,count,Number of auto_create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.auto_create_stats.resume_retry_error.count,count,Number of auto_create_stats jobs which failed with a retriable error +cockroachdb,jobs.auto_schema_telemetry.currently_idle,gauge,Number of auto_schema_telemetry jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_schema_telemetry.currently_paused,gauge,Number of auto_schema_telemetry jobs currently considered Paused +cockroachdb,jobs.auto_schema_telemetry.currently_running,gauge,Number of auto_schema_telemetry jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_schema_telemetry.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_schema_telemetry jobs +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_completed.count,count,Number of auto_schema_telemetry jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_failed.count,count,Number of auto_schema_telemetry jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.fail_or_cancel_retry_error.count,count,Number of auto_schema_telemetry jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_schema_telemetry.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_schema_telemetry jobs +Shown as second" +cockroachdb,jobs.auto_schema_telemetry.protected_record_count,gauge,Number of protected timestamp records held by auto_schema_telemetry jobs +cockroachdb,jobs.auto_schema_telemetry.resume_completed.count,count,Number of auto_schema_telemetry jobs which successfully resumed to completion +cockroachdb,jobs.auto_schema_telemetry.resume_failed.count,count,Number of auto_schema_telemetry jobs which failed with a non-retriable error +cockroachdb,jobs.auto_schema_telemetry.resume_retry_error.count,count,Number of auto_schema_telemetry jobs which failed with a retriable error +cockroachdb,jobs.auto_span_config_reconciliation.currently_idle,gauge,Number of auto_span_config_reconciliation jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_span_config_reconciliation.currently_paused,gauge,Number of auto_span_config_reconciliation jobs currently considered Paused +cockroachdb,jobs.auto_span_config_reconciliation.currently_running,gauge,Number of auto_span_config_reconciliation jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_span_config_reconciliation.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_span_config_reconciliation jobs +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_completed.count,count,Number of auto_span_config_reconciliation jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_failed.count,count,Number of auto_span_config_reconciliation jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error.count,count,Number of auto_span_config_reconciliation jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_span_config_reconciliation.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_span_config_reconciliation jobs +Shown as second" +cockroachdb,jobs.auto_span_config_reconciliation.protected_record_count,gauge,Number of protected timestamp records held by auto_span_config_reconciliation jobs +cockroachdb,jobs.auto_span_config_reconciliation.resume_completed.count,count,Number of auto_span_config_reconciliation jobs which successfully resumed to completion +cockroachdb,jobs.auto_span_config_reconciliation.resume_failed.count,count,Number of auto_span_config_reconciliation jobs which failed with a non-retriable error +cockroachdb,jobs.auto_span_config_reconciliation.resume_retry_error.count,count,Number of auto_span_config_reconciliation jobs which failed with a retriable error +cockroachdb,jobs.auto_sql_stats_compaction.currently_idle,gauge,Number of auto_sql_stats_compaction jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_sql_stats_compaction.currently_paused,gauge,Number of auto_sql_stats_compaction jobs currently considered Paused +cockroachdb,jobs.auto_sql_stats_compaction.currently_running,gauge,Number of auto_sql_stats_compaction jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_sql_stats_compaction.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_sql_stats_compaction jobs +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_completed.count,count,Number of auto_sql_stats_compaction jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_failed.count,count,Number of auto_sql_stats_compaction jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error.count,count,Number of auto_sql_stats_compaction jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_sql_stats_compaction.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_sql_stats_compaction jobs +Shown as second" +cockroachdb,jobs.auto_sql_stats_compaction.protected_record_count,gauge,Number of protected timestamp records held by auto_sql_stats_compaction jobs +cockroachdb,jobs.auto_sql_stats_compaction.resume_completed.count,count,Number of auto_sql_stats_compaction jobs which successfully resumed to completion +cockroachdb,jobs.auto_sql_stats_compaction.resume_failed.count,count,Number of auto_sql_stats_compaction jobs which failed with a non-retriable error +cockroachdb,jobs.auto_sql_stats_compaction.resume_retry_error.count,count,Number of auto_sql_stats_compaction jobs which failed with a retriable error +cockroachdb,jobs.auto_update_sql_activity.currently_idle,gauge,Number of auto_update_sql_activity jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.auto_update_sql_activity.currently_paused,gauge,Number of auto_update_sql_activity jobs currently considered Paused +cockroachdb,jobs.auto_update_sql_activity.currently_running,gauge,Number of auto_update_sql_activity jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.auto_update_sql_activity.expired_pts_records.count,count,Number of expired protected timestamp records owned by auto_update_sql_activity jobs +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_completed.count,count,Number of auto_update_sql_activity jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_failed.count,count,Number of auto_update_sql_activity jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.fail_or_cancel_retry_error.count,count,Number of auto_update_sql_activity jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.auto_update_sql_activity.protected_age_sec,gauge,"The age of the oldest PTS record protected by auto_update_sql_activity jobs +Shown as second" +cockroachdb,jobs.auto_update_sql_activity.protected_record_count,gauge,Number of protected timestamp records held by auto_update_sql_activity jobs +cockroachdb,jobs.auto_update_sql_activity.resume_completed.count,count,Number of auto_update_sql_activity jobs which successfully resumed to completion +cockroachdb,jobs.auto_update_sql_activity.resume_failed.count,count,Number of auto_update_sql_activity jobs which failed with a non-retriable error +cockroachdb,jobs.auto_update_sql_activity.resume_retry_error.count,count,Number of auto_update_sql_activity jobs which failed with a retriable error +cockroachdb,jobs.backup.currently_idle,gauge,"[OpenMetrics v1 & v2] Number of backup jobs currently considered Idle and can be freely shut down +Shown as job" +cockroachdb,jobs.backup.currently_paused,gauge,Number of backup jobs currently considered Paused +cockroachdb,jobs.backup.currently_running,gauge,"[OpenMetrics v1 & v2] Number of backup jobs currently running in Resume or OnFailOrCancel state +Shown as job" +cockroachdb,jobs.backup.expired_pts_records.count,count,Number of expired protected timestamp records owned by backup jobs +cockroachdb,jobs.backup.fail_or_cancel_completed.count,count,Number of backup jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.backup.fail_or_cancel_failed,count,"[OpenMetrics v1] Number of backup jobs which failed with a non-retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_failed.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a non-retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_retry_error,count,"[OpenMetrics v1] Number of backup jobs which failed with a retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.fail_or_cancel_retry_error.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a retriable error on their failure or cancelation process +Shown as job" +cockroachdb,jobs.backup.protected_age_sec,gauge,"The age of the oldest PTS record protected by backup jobs +Shown as second" +cockroachdb,jobs.backup.protected_record_count,gauge,Number of protected timestamp records held by backup jobs +cockroachdb,jobs.backup.resume_completed.count,count,Number of backup jobs which successfully resumed to completion +cockroachdb,jobs.backup.resume_failed,count,"[OpenMetrics v1] Number of backup jobs which failed with a non-retriable error +Shown as job" +cockroachdb,jobs.backup.resume_failed.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a non-retriable error +Shown as job" +cockroachdb,jobs.backup.resume_retry_error,count,"[OpenMetrics v1] Number of backup jobs which failed with a retriable error +Shown as job" +cockroachdb,jobs.backup.resume_retry_error.count,count,"[OpenMetrics v2] Number of backup jobs which failed with a retriable error +Shown as job" +cockroachdb,jobs.changefeed.currently_idle,gauge,Number of changefeed jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.changefeed.currently_paused,gauge,Number of changefeed jobs currently considered Paused +cockroachdb,jobs.changefeed.currently_running,gauge,Number of changefeed jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.changefeed.expired_pts_records.count,count,Number of expired protected timestamp records owned by changefeed jobs +cockroachdb,jobs.changefeed.fail_or_cancel_completed.count,count,Number of changefeed jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.changefeed.fail_or_cancel_failed.count,count,Number of changefeed jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.changefeed.fail_or_cancel_retry_error.count,count,Number of changefeed jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.changefeed.protected_age_sec,gauge,"The age of the oldest PTS record protected by changefeed jobs +Shown as second" +cockroachdb,jobs.changefeed.protected_record_count,gauge,Number of protected timestamp records held by changefeed jobs +cockroachdb,jobs.changefeed.resume.retry.error,count,[OpenMetrics v1] Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.changefeed.resume.retry.error.count,count,[OpenMetrics v2] Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.changefeed.resume_completed.count,count,Number of changefeed jobs which successfully resumed to completion +cockroachdb,jobs.changefeed.resume_failed.count,count,Number of changefeed jobs which failed with a non-retriable error +cockroachdb,jobs.changefeed.resume_retry_error.count,count,Number of changefeed jobs which failed with a retriable error +cockroachdb,jobs.claimed_jobs.count,count,number of jobs claimed in job-adopt iterations +cockroachdb,jobs.create.stats.currently_running,gauge,Number of create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.create_stats.currently_idle,gauge,Number of create_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.create_stats.currently_paused,gauge,Number of create_stats jobs currently considered Paused +cockroachdb,jobs.create_stats.currently_running,gauge,Number of create_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.create_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by create_stats jobs +cockroachdb,jobs.create_stats.fail_or_cancel_completed.count,count,Number of create_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.create_stats.fail_or_cancel_failed.count,count,Number of create_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.create_stats.fail_or_cancel_retry_error.count,count,Number of create_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.create_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by create_stats jobs +Shown as second" +cockroachdb,jobs.create_stats.protected_record_count,gauge,Number of protected timestamp records held by create_stats jobs +cockroachdb,jobs.create_stats.resume_completed.count,count,Number of create_stats jobs which successfully resumed to completion +cockroachdb,jobs.create_stats.resume_failed.count,count,Number of create_stats jobs which failed with a non-retriable error +cockroachdb,jobs.create_stats.resume_retry_error.count,count,Number of create_stats jobs which failed with a retriable error +cockroachdb,jobs.import.currently_idle,gauge,Number of import jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.import.currently_paused,gauge,Number of import jobs currently considered Paused +cockroachdb,jobs.import.currently_running,gauge,Number of import jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.import.expired_pts_records.count,count,Number of expired protected timestamp records owned by import jobs +cockroachdb,jobs.import.fail_or_cancel_completed.count,count,Number of import jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.import.fail_or_cancel_failed.count,count,Number of import jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.import.fail_or_cancel_retry_error.count,count,Number of import jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.import.protected_age_sec,gauge,"The age of the oldest PTS record protected by import jobs +Shown as second" +cockroachdb,jobs.import.protected_record_count,gauge,Number of protected timestamp records held by import jobs +cockroachdb,jobs.import.resume_completed.count,count,Number of import jobs which successfully resumed to completion +cockroachdb,jobs.import.resume_failed.count,count,Number of import jobs which failed with a non-retriable error +cockroachdb,jobs.import.resume_retry_error.count,count,Number of import jobs which failed with a retriable error +cockroachdb,jobs.key_visualizer.currently_idle,gauge,Number of key_visualizer jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.key_visualizer.currently_paused,gauge,Number of key_visualizer jobs currently considered Paused +cockroachdb,jobs.key_visualizer.currently_running,gauge,Number of key_visualizer jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.key_visualizer.expired_pts_records.count,count,Number of expired protected timestamp records owned by key_visualizer jobs +cockroachdb,jobs.key_visualizer.fail_or_cancel_completed.count,count,Number of key_visualizer jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.key_visualizer.fail_or_cancel_failed.count,count,Number of key_visualizer jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.key_visualizer.fail_or_cancel_retry_error.count,count,Number of key_visualizer jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.key_visualizer.protected_age_sec,gauge,"The age of the oldest PTS record protected by key_visualizer jobs +Shown as second" +cockroachdb,jobs.key_visualizer.protected_record_count,gauge,Number of protected timestamp records held by key_visualizer jobs +cockroachdb,jobs.key_visualizer.resume_completed.count,count,Number of key_visualizer jobs which successfully resumed to completion +cockroachdb,jobs.key_visualizer.resume_failed.count,count,Number of key_visualizer jobs which failed with a non-retriable error +cockroachdb,jobs.key_visualizer.resume_retry_error.count,count,Number of key_visualizer jobs which failed with a retriable error +cockroachdb,jobs.metrics.task_failed.count,count,Number of metrics sql activity updater tasks that failed +cockroachdb,jobs.migration.currently_idle,gauge,Number of migration jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.migration.currently_paused,gauge,Number of migration jobs currently considered Paused +cockroachdb,jobs.migration.currently_running,gauge,Number of migration jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.migration.expired_pts_records.count,count,Number of expired protected timestamp records owned by migration jobs +cockroachdb,jobs.migration.fail_or_cancel_completed.count,count,Number of migration jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.migration.fail_or_cancel_failed.count,count,Number of migration jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.migration.fail_or_cancel_retry_error.count,count,Number of migration jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.migration.protected_age_sec,gauge,"The age of the oldest PTS record protected by migration jobs +Shown as second" +cockroachdb,jobs.migration.protected_record_count,gauge,Number of protected timestamp records held by migration jobs +cockroachdb,jobs.migration.resume_completed.count,count,Number of migration jobs which successfully resumed to completion +cockroachdb,jobs.migration.resume_failed.count,count,Number of migration jobs which failed with a non-retriable error +cockroachdb,jobs.migration.resume_retry_error.count,count,Number of migration jobs which failed with a retriable error +cockroachdb,jobs.mvcc_statistics_update.currently_idle,gauge,Number of mvcc_statistics_update jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.mvcc_statistics_update.currently_paused,gauge,Number of mvcc_statistics_update jobs currently considered Paused +cockroachdb,jobs.mvcc_statistics_update.currently_running,gauge,Number of mvcc_statistics_update jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.mvcc_statistics_update.expired_pts_records.count,count,Number of expired protected timestamp records owned by mvcc_statistics_update jobs +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_completed.count,count,Number of mvcc_statistics_update jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_failed.count,count,Number of mvcc_statistics_update jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.fail_or_cancel_retry_error.count,count,Number of mvcc_statistics_update jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.mvcc_statistics_update.protected_age_sec,gauge,"The age of the oldest PTS record protected by mvcc_statistics_update jobs +Shown as second" +cockroachdb,jobs.mvcc_statistics_update.protected_record_count,gauge,Number of protected timestamp records held by mvcc_statistics_update jobs +cockroachdb,jobs.mvcc_statistics_update.resume_completed.count,count,Number of mvcc_statistics_update jobs which successfully resumed to completion +cockroachdb,jobs.mvcc_statistics_update.resume_failed.count,count,Number of mvcc_statistics_update jobs which failed with a non-retriable error +cockroachdb,jobs.mvcc_statistics_update.resume_retry_error.count,count,Number of mvcc_statistics_update jobs which failed with a retriable error +cockroachdb,jobs.new_schema_change.currently_idle,gauge,Number of new_schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.new_schema_change.currently_paused,gauge,Number of new_schema_change jobs currently considered Paused +cockroachdb,jobs.new_schema_change.currently_running,gauge,Number of new_schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.new_schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by new_schema_change jobs +cockroachdb,jobs.new_schema_change.fail_or_cancel_completed.count,count,Number of new_schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.new_schema_change.fail_or_cancel_failed.count,count,Number of new_schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.new_schema_change.fail_or_cancel_retry_error.count,count,Number of new_schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.new_schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by new_schema_change jobs +Shown as second" +cockroachdb,jobs.new_schema_change.protected_record_count,gauge,Number of protected timestamp records held by new_schema_change jobs +cockroachdb,jobs.new_schema_change.resume_completed.count,count,Number of new_schema_change jobs which successfully resumed to completion +cockroachdb,jobs.new_schema_change.resume_failed.count,count,Number of new_schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.new_schema_change.resume_retry_error.count,count,Number of new_schema_change jobs which failed with a retriable error +cockroachdb,jobs.poll_jobs_stats.currently_idle,gauge,Number of poll_jobs_stats jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.poll_jobs_stats.currently_paused,gauge,Number of poll_jobs_stats jobs currently considered Paused +cockroachdb,jobs.poll_jobs_stats.currently_running,gauge,Number of poll_jobs_stats jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.poll_jobs_stats.expired_pts_records.count,count,Number of expired protected timestamp records owned by poll_jobs_stats jobs +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_completed.count,count,Number of poll_jobs_stats jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_failed.count,count,Number of poll_jobs_stats jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.fail_or_cancel_retry_error.count,count,Number of poll_jobs_stats jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.poll_jobs_stats.protected_age_sec,gauge,"The age of the oldest PTS record protected by poll_jobs_stats jobs +Shown as second" +cockroachdb,jobs.poll_jobs_stats.protected_record_count,gauge,Number of protected timestamp records held by poll_jobs_stats jobs +cockroachdb,jobs.poll_jobs_stats.resume_completed.count,count,Number of poll_jobs_stats jobs which successfully resumed to completion +cockroachdb,jobs.poll_jobs_stats.resume_failed.count,count,Number of poll_jobs_stats jobs which failed with a non-retriable error +cockroachdb,jobs.poll_jobs_stats.resume_retry_error.count,count,Number of poll_jobs_stats jobs which failed with a retriable error +cockroachdb,jobs.replication_stream_ingestion.currently_idle,gauge,Number of replication_stream_ingestion jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.replication_stream_ingestion.currently_paused,gauge,Number of replication_stream_ingestion jobs currently considered Paused +cockroachdb,jobs.replication_stream_ingestion.currently_running,gauge,Number of replication_stream_ingestion jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.replication_stream_ingestion.expired_pts_records.count,count,Number of expired protected timestamp records owned by replication_stream_ingestion jobs +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_completed.count,count,Number of replication_stream_ingestion jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_failed.count,count,Number of replication_stream_ingestion jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.fail_or_cancel_retry_error.count,count,Number of replication_stream_ingestion jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_ingestion.protected_age_sec,gauge,"The age of the oldest PTS record protected by replication_stream_ingestion jobs +Shown as second" +cockroachdb,jobs.replication_stream_ingestion.protected_record_count,gauge,Number of protected timestamp records held by replication_stream_ingestion jobs +cockroachdb,jobs.replication_stream_ingestion.resume_completed.count,count,Number of replication_stream_ingestion jobs which successfully resumed to completion +cockroachdb,jobs.replication_stream_ingestion.resume_failed.count,count,Number of replication_stream_ingestion jobs which failed with a non-retriable error +cockroachdb,jobs.replication_stream_ingestion.resume_retry_error.count,count,Number of replication_stream_ingestion jobs which failed with a retriable error +cockroachdb,jobs.replication_stream_producer.currently_idle,gauge,Number of replication_stream_producer jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.replication_stream_producer.currently_paused,gauge,Number of replication_stream_producer jobs currently considered Paused +cockroachdb,jobs.replication_stream_producer.currently_running,gauge,Number of replication_stream_producer jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.replication_stream_producer.expired_pts_records.count,count,Number of expired protected timestamp records owned by replication_stream_producer jobs +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_completed.count,count,Number of replication_stream_producer jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_failed.count,count,Number of replication_stream_producer jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.fail_or_cancel_retry_error.count,count,Number of replication_stream_producer jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.replication_stream_producer.protected_age_sec,gauge,"The age of the oldest PTS record protected by replication_stream_producer jobs +Shown as second" +cockroachdb,jobs.replication_stream_producer.protected_record_count,gauge,Number of protected timestamp records held by replication_stream_producer jobs +cockroachdb,jobs.replication_stream_producer.resume_completed.count,count,Number of replication_stream_producer jobs which successfully resumed to completion +cockroachdb,jobs.replication_stream_producer.resume_failed.count,count,Number of replication_stream_producer jobs which failed with a non-retriable error +cockroachdb,jobs.replication_stream_producer.resume_retry_error.count,count,Number of replication_stream_producer jobs which failed with a retriable error +cockroachdb,jobs.restore.currently_idle,gauge,Number of restore jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.restore.currently_paused,gauge,Number of restore jobs currently considered Paused +cockroachdb,jobs.restore.currently_running,gauge,Number of restore jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.restore.expired_pts_records.count,count,Number of expired protected timestamp records owned by restore jobs +cockroachdb,jobs.restore.fail_or_cancel_completed.count,count,Number of restore jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.restore.fail_or_cancel_failed.count,count,Number of restore jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.restore.fail_or_cancel_retry_error.count,count,Number of restore jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.restore.protected_age_sec,gauge,"The age of the oldest PTS record protected by restore jobs +Shown as second" +cockroachdb,jobs.restore.protected_record_count,gauge,Number of protected timestamp records held by restore jobs +cockroachdb,jobs.restore.resume_completed.count,count,Number of restore jobs which successfully resumed to completion +cockroachdb,jobs.restore.resume_failed.count,count,Number of restore jobs which failed with a non-retriable error +cockroachdb,jobs.restore.resume_retry_error.count,count,Number of restore jobs which failed with a retriable error +cockroachdb,jobs.resumed_claimed_jobs.count,count,number of claimed-jobs resumed in job-adopt iterations +cockroachdb,jobs.row.level.ttl.currently_paused,gauge,Number of row_level_ttl jobs currently considered Paused +cockroachdb,jobs.row.level.ttl.currently_running,gauge,Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.row.level.ttl.resume_completed.count,count,Number of row_level_ttl jobs which successfully resumed to completion +cockroachdb,jobs.row.level.ttl.resume_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error +cockroachdb,jobs.row.level.ttl.rows_deleted.count,count,Number of rows deleted by the row level TTL job. +cockroachdb,jobs.row.level.ttl.rows_selected.count,count,Number of rows selected for deletion by the row level TTL job. +cockroachdb,jobs.row_level_ttl.currently_idle,gauge,Number of row_level_ttl jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.row_level_ttl.currently_paused,gauge,Number of row_level_ttl jobs currently considered Paused +cockroachdb,jobs.row_level_ttl.currently_running,gauge,Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.row_level_ttl.delete_duration.bucket,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.delete_duration.count,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.delete_duration.sum,count,"Duration for delete requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.expired_pts_records.count,count,Number of expired protected timestamp records owned by row_level_ttl jobs +cockroachdb,jobs.row_level_ttl.fail_or_cancel_completed.count,count,Number of row_level_ttl jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.row_level_ttl.fail_or_cancel_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.row_level_ttl.fail_or_cancel_retry_error.count,count,Number of row_level_ttl jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.row_level_ttl.num_active_spans,gauge,Number of active spans the TTL job is deleting from. +cockroachdb,jobs.row_level_ttl.protected_age_sec,gauge,"The age of the oldest PTS record protected by row_level_ttl jobs +Shown as second" +cockroachdb,jobs.row_level_ttl.protected_record_count,gauge,Number of protected timestamp records held by row_level_ttl jobs +cockroachdb,jobs.row_level_ttl.resume_completed.count,count,Number of row_level_ttl jobs which successfully resumed to completion +cockroachdb,jobs.row_level_ttl.resume_failed.count,count,Number of row_level_ttl jobs which failed with a non-retriable error +cockroachdb,jobs.row_level_ttl.resume_retry_error.count,count,Number of row_level_ttl jobs which failed with a retriable error +cockroachdb,jobs.row_level_ttl.rows_deleted.count,count,Number of rows deleted by the row level TTL job. +cockroachdb,jobs.row_level_ttl.rows_selected.count,count,Number of rows selected for deletion by the row level TTL job. +cockroachdb,jobs.row_level_ttl.select_duration.bucket,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.select_duration.count,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.select_duration.sum,count,"Duration for select requests during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.bucket,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.count,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.span_total_duration.sum,count,"Duration for processing a span during row level TTL. +Shown as nanosecond" +cockroachdb,jobs.row_level_ttl.total_expired_rows,gauge,Approximate number of rows that have expired the TTL on the TTL table. +cockroachdb,jobs.row_level_ttl.total_rows,gauge,Approximate number of rows on the TTL table. +cockroachdb,jobs.running_non_idle,gauge,number of running jobs that are not idle +cockroachdb,jobs.schema_change.currently_idle,gauge,Number of schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.schema_change.currently_paused,gauge,Number of schema_change jobs currently considered Paused +cockroachdb,jobs.schema_change.currently_running,gauge,Number of schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by schema_change jobs +cockroachdb,jobs.schema_change.fail_or_cancel_completed.count,count,Number of schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.schema_change.fail_or_cancel_failed.count,count,Number of schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change.fail_or_cancel_retry_error.count,count,Number of schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by schema_change jobs +Shown as second" +cockroachdb,jobs.schema_change.protected_record_count,gauge,Number of protected timestamp records held by schema_change jobs +cockroachdb,jobs.schema_change.resume_completed.count,count,Number of schema_change jobs which successfully resumed to completion +cockroachdb,jobs.schema_change.resume_failed.count,count,Number of schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.schema_change.resume_retry_error.count,count,Number of schema_change jobs which failed with a retriable error +cockroachdb,jobs.schema_change_gc.currently_idle,gauge,Number of schema_change_gc jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.schema_change_gc.currently_paused,gauge,Number of schema_change_gc jobs currently considered Paused +cockroachdb,jobs.schema_change_gc.currently_running,gauge,Number of schema_change_gc jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.schema_change_gc.expired_pts_records.count,count,Number of expired protected timestamp records owned by schema_change_gc jobs +cockroachdb,jobs.schema_change_gc.fail_or_cancel_completed.count,count,Number of schema_change_gc jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.schema_change_gc.fail_or_cancel_failed.count,count,Number of schema_change_gc jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change_gc.fail_or_cancel_retry_error.count,count,Number of schema_change_gc jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.schema_change_gc.protected_age_sec,gauge,"The age of the oldest PTS record protected by schema_change_gc jobs +Shown as second" +cockroachdb,jobs.schema_change_gc.protected_record_count,gauge,Number of protected timestamp records held by schema_change_gc jobs +cockroachdb,jobs.schema_change_gc.resume_completed.count,count,Number of schema_change_gc jobs which successfully resumed to completion +cockroachdb,jobs.schema_change_gc.resume_failed.count,count,Number of schema_change_gc jobs which failed with a non-retriable error +cockroachdb,jobs.schema_change_gc.resume_retry_error.count,count,Number of schema_change_gc jobs which failed with a retriable error +cockroachdb,jobs.typedesc_schema_change.currently_idle,gauge,Number of typedesc_schema_change jobs currently considered Idle and can be freely shut down +cockroachdb,jobs.typedesc_schema_change.currently_paused,gauge,Number of typedesc_schema_change jobs currently considered Paused +cockroachdb,jobs.typedesc_schema_change.currently_running,gauge,Number of typedesc_schema_change jobs currently running in Resume or OnFailOrCancel state +cockroachdb,jobs.typedesc_schema_change.expired_pts_records.count,count,Number of expired protected timestamp records owned by typedesc_schema_change jobs +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_completed.count,count,Number of typedesc_schema_change jobs which successfully completed their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_failed.count,count,Number of typedesc_schema_change jobs which failed with a non-retriable error on their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.fail_or_cancel_retry_error.count,count,Number of typedesc_schema_change jobs which failed with a retriable error on their failure or cancelation process +cockroachdb,jobs.typedesc_schema_change.protected_age_sec,gauge,"The age of the oldest PTS record protected by typedesc_schema_change jobs +Shown as second" +cockroachdb,jobs.typedesc_schema_change.protected_record_count,gauge,Number of protected timestamp records held by typedesc_schema_change jobs +cockroachdb,jobs.typedesc_schema_change.resume_completed.count,count,Number of typedesc_schema_change jobs which successfully resumed to completion +cockroachdb,jobs.typedesc_schema_change.resume_failed.count,count,Number of typedesc_schema_change jobs which failed with a non-retriable error +cockroachdb,jobs.typedesc_schema_change.resume_retry_error.count,count,Number of typedesc_schema_change jobs which failed with a retriable error +cockroachdb,keybytes,gauge,"[OpenMetrics v1 & v2] Number of bytes taken up by keys +Shown as byte" +cockroachdb,keycount,gauge,"[OpenMetrics v1 & v2] Count of all keys +Shown as key" +cockroachdb,kv.allocator.load_based_lease_transfers.cannot_find_better_candidate.count,count,The number times the allocator determined that the lease was on the best possible replica +cockroachdb,kv.allocator.load_based_lease_transfers.delta_not_significant.count,count,The number times the allocator determined that the delta between the existing store and the best candidate was not significant +cockroachdb,kv.allocator.load_based_lease_transfers.existing_not_overfull.count,count,The number times the allocator determined that the lease was not on an overfull store +cockroachdb,kv.allocator.load_based_lease_transfers.follow_the_workload.count,count,The number times the allocator determined that the lease should be transferred to another replica for locality. +cockroachdb,kv.allocator.load_based_lease_transfers.missing_stats_for_existing_stores.count,count,The number times the allocator was missing qps stats for the leaseholder +cockroachdb,kv.allocator.load_based_lease_transfers.should_transfer.count,count,The number times the allocator determined that the lease should be transferred to another replica for better load distribution +cockroachdb,kv.allocator.load_based_replica_rebalancing.cannot_find_better_candidate.count,count,The number times the allocator determined that the range was on the best possible stores +cockroachdb,kv.allocator.load_based_replica_rebalancing.delta_not_significant.count,count,The number times the allocator determined that the delta between an existing store and the best replacement candidate was not high enough +cockroachdb,kv.allocator.load_based_replica_rebalancing.existing_not_overfull.count,count,The number times the allocator determined that none of the range’s replicas were on overfull stores +cockroachdb,kv.allocator.load_based_replica_rebalancing.missing_stats_for_existing_store.count,count,The number times the allocator was missing the qps stats for the existing store +cockroachdb,kv.allocator.load_based_replica_rebalancing.should_transfer.count,count,The number times the allocator determined that the replica should be rebalanced to another store for better load distribution +cockroachdb,kv.closed_timestamp.max_behind_nanos,gauge,"Largest latency between realtime and replica max closed timestamp +Shown as nanosecond" +cockroachdb,kv.concurrency.avg_lock_hold_duration_nanos,gauge,"Average lock hold duration across locks currently held in lock tables. Does not include replicated locks (intents) that are not held in memory +Shown as nanosecond" +cockroachdb,kv.concurrency.avg_lock_wait_duration_nanos,gauge,"Average lock wait duration across requests currently waiting in lock wait-queues +Shown as nanosecond" +cockroachdb,kv.concurrency.lock_wait_queue_waiters,gauge,Number of requests actively waiting in a lock wait-queue +cockroachdb,kv.concurrency.locks,gauge,Number of active locks held in lock tables. Does not include replicated locks (intents) that are not held in memory +cockroachdb,kv.concurrency.locks_with_wait_queues,gauge,Number of active locks held in lock tables with active wait-queues +cockroachdb,kv.concurrency.max_lock_hold_duration_nanos,gauge,"Maximum length of time any lock in a lock table is held. Does not include replicated locks (intents) that are not held in memory +Shown as nanosecond" +cockroachdb,kv.concurrency.max_lock_wait_duration_nanos,gauge,"Maximum lock wait duration across requests currently waiting in lock wait-queues +Shown as nanosecond" +cockroachdb,kv.concurrency.max_lock_wait_queue_waiters_for_lock,gauge,Maximum number of requests actively waiting in any single lock wait-queue +cockroachdb,kv.loadsplitter.nosplitkey.count,count,Load-based splitter could not find a split key. +cockroachdb,kv.loadsplitter.popularkey.count,count,Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples. +cockroachdb,kv.prober.planning_attempts.count,count,Number of attempts at planning out probes made; in order to probe KV we need to plan out which ranges to probe; +cockroachdb,kv.prober.planning_failures.count,count,"Number of attempts at planning out probes that failed; in order to probe KV we need to plan out which ranges to probe; if planning fails, then kvprober is not able to send probes to all ranges; consider alerting on this metric as a result" +cockroachdb,kv.prober.read.attempts.count,count,"Number of attempts made to read probe KV, regardless of outcome" +cockroachdb,kv.prober.read.failures.count,count,"Number of attempts made to read probe KV that failed, whether due to error or timeout" +cockroachdb,kv.prober.read.latency.bucket,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.read.latency.count,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.read.latency.sum,count,"Latency of successful KV read probes +Shown as nanosecond" +cockroachdb,kv.prober.write.attempts.count,count,"Number of attempts made to write probe KV, regardless of outcome" +cockroachdb,kv.prober.write.failures.count,count,"Number of attempts made to write probe KV that failed, whether due to error or timeout" +cockroachdb,kv.prober.write.latency.bucket,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.latency.count,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.latency.sum,count,"Latency of successful KV write probes +Shown as nanosecond" +cockroachdb,kv.prober.write.quarantine.oldest_duration,gauge,"The duration that the oldest range in the write quarantine pool has remained +Shown as second" +cockroachdb,kv.protectedts.reconciliation.errors.count,count,number of errors encountered during reconciliation runs on this node +cockroachdb,kv.protectedts.reconciliation.num_runs.count,count,number of successful reconciliation runs on this node +cockroachdb,kv.protectedts.reconciliation.records_processed.count,count,number of records processed without error during reconciliation on this node +cockroachdb,kv.protectedts.reconciliation.records_removed.count,count,number of records removed during reconciliation runs on this node +cockroachdb,kv.rangefeed.budget_allocation_blocked.count,count,Number of times RangeFeed waited for budget availability +cockroachdb,kv.rangefeed.budget_allocation_failed.count,count,Number of times RangeFeed failed because memory budget was exceeded +cockroachdb,kv.rangefeed.catchup_scan_nanos.count,count,"Time spent in RangeFeed catchup scan +Shown as nanosecond" +cockroachdb,kv.rangefeed.mem_shared,gauge,"Memory usage by rangefeeds +Shown as byte" +cockroachdb,kv.rangefeed.mem_system,gauge,"Memory usage by rangefeeds on system ranges +Shown as byte" +cockroachdb,kv.rangefeed.processors_goroutine,gauge,Number of active RangeFeed processors using goroutines +cockroachdb,kv.rangefeed.processors_scheduler,gauge,Number of active RangeFeed processors using scheduler +cockroachdb,kv.rangefeed.registrations,gauge,Number of active RangeFeed registrations +cockroachdb,kv.rangefeed.scheduler.normal.latency.bucket,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.latency.count,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.latency.sum,count,"KV RangeFeed normal scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.normal.queue_size,gauge,Number of entries in the KV RangeFeed normal scheduler queue +cockroachdb,kv.rangefeed.scheduler.system.latency.bucket,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.latency.count,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.latency.sum,count,"KV RangeFeed system scheduler latency +Shown as nanosecond" +cockroachdb,kv.rangefeed.scheduler.system.queue_size,gauge,Number of entries in the KV RangeFeed system scheduler queue +cockroachdb,kv.replica_circuit_breaker.num_tripped_events.count,count,Number of times the per-Replica circuit breakers tripped since process start. +cockroachdb,kv.replica_circuit_breaker.num_tripped_replicas,gauge,"Number of Replicas for which the per-Replica circuit breaker is currently tripped.A nonzero value indicates range or replica unavailability, and should be investigated.Replicas in this state will fail-fast all inbound requests." +cockroachdb,kv.replica_read_batch_evaluate.dropped_latches_before_eval.count,count,Number of times read-only batches dropped latches before evaluation. +cockroachdb,kv.replica_read_batch_evaluate.latency.bucket,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.latency.count,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.latency.sum,count,"Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. +Shown as nanosecond" +cockroachdb,kv.replica_read_batch_evaluate.without_interleaving_iter.count,count,Number of read-only batches evaluated without an intent interleaving iter. +cockroachdb,kv.replica_write_batch_evaluate.latency.bucket,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.replica_write_batch_evaluate.latency.count,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.replica_write_batch_evaluate.latency.sum,count,"Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired.A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately.Note that the measurement does not include the duration for replicating the evaluated command. +Shown as nanosecond" +cockroachdb,kv.tenant_rate_limit.current_blocked,gauge,Number of requests currently blocked by the rate limiter +cockroachdb,kv.tenant_rate_limit.num_tenants,gauge,Number of tenants currently being tracked +cockroachdb,kv.tenant_rate_limit.read_batches_admitted.count,count,Number of read batches admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.read_bytes_admitted.count,count,"Number of read bytes admitted by the rate limiter +Shown as byte" +cockroachdb,kv.tenant_rate_limit.read_requests_admitted.count,count,Number of read requests admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.write_batches_admitted.count,count,Number of write batches admitted by the rate limiter +cockroachdb,kv.tenant_rate_limit.write_bytes_admitted.count,count,"Number of write bytes admitted by the rate limiter +Shown as byte" +cockroachdb,kv.tenant_rate_limit.write_requests_admitted.count,count,Number of write requests admitted by the rate limiter +cockroachdb,kvadmission.flow_controller.elastic_blocked_stream_count,gauge,Number of replication streams with no flow tokens available for elastic requests +cockroachdb,kvadmission.flow_controller.elastic_requests_admitted.count,count,Number of elastic requests admitted by the flow controller +cockroachdb,kvadmission.flow_controller.elastic_requests_bypassed.count,count,Number of elastic waiting requests that bypassed the flow controller due to disconnecting streams +cockroachdb,kvadmission.flow_controller.elastic_requests_errored.count,count,Number of elastic requests that errored out while waiting for flow tokens +cockroachdb,kvadmission.flow_controller.elastic_requests_waiting,gauge,Number of elastic requests waiting for flow tokens +cockroachdb,kvadmission.flow_controller.elastic_stream_count,gauge,Total number of replication streams for elastic requests +cockroachdb,kvadmission.flow_controller.elastic_tokens_available,gauge,"Flow tokens available for elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_deducted.count,count,"Flow tokens deducted by elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_returned.count,count,"Flow tokens returned by elastic requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_tokens_unaccounted.count,count,"Flow tokens returned by elastic requests that were unaccounted for, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.bucket,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.count,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.elastic_wait_duration.sum,count,"Latency histogram for time elastic requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_blocked_stream_count,gauge,Number of replication streams with no flow tokens available for regular requests +cockroachdb,kvadmission.flow_controller.regular_requests_admitted.count,count,Number of regular requests admitted by the flow controller +cockroachdb,kvadmission.flow_controller.regular_requests_bypassed.count,count,Number of regular waiting requests that bypassed the flow controller due to disconnecting streams +cockroachdb,kvadmission.flow_controller.regular_requests_errored.count,count,Number of regular requests that errored out while waiting for flow tokens +cockroachdb,kvadmission.flow_controller.regular_requests_waiting,gauge,Number of regular requests waiting for flow tokens +cockroachdb,kvadmission.flow_controller.regular_stream_count,gauge,Total number of replication streams for regular requests +cockroachdb,kvadmission.flow_controller.regular_tokens_available,gauge,"Flow tokens available for regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_deducted.count,count,"Flow tokens deducted by regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_returned.count,count,"Flow tokens returned by regular requests, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_tokens_unaccounted.count,count,"Flow tokens returned by regular requests that were unaccounted for, across all replication streams +Shown as byte" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.bucket,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.count,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_controller.regular_wait_duration.sum,count,"Latency histogram for time regular requests spent waiting for flow tokens +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_requests_admitted.count,count,Number of elastic requests admitted by the flow handle +cockroachdb,kvadmission.flow_handle.elastic_requests_errored.count,count,"Number of elastic requests that errored out while waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.elastic_requests_waiting,gauge,"Number of elastic requests waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.bucket,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.count,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.elastic_wait_duration.sum,count,"Latency histogram for time elastic requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_requests_admitted.count,count,Number of regular requests admitted by the flow handle +cockroachdb,kvadmission.flow_handle.regular_requests_errored.count,count,"Number of regular requests that errored out while waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.regular_requests_waiting,gauge,"Number of regular requests waiting for flow tokens, at the handle level" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.bucket,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.count,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.regular_wait_duration.sum,count,"Latency histogram for time regular requests spent waiting for flow tokens, at the handle level +Shown as nanosecond" +cockroachdb,kvadmission.flow_handle.streams_connected.count,count,"Number of times we’ve connected to a stream, at the handle level" +cockroachdb,kvadmission.flow_handle.streams_disconnected.count,count,"Number of times we’ve disconnected from a stream, at the handle level" +cockroachdb,kvadmission.flow_token_dispatch.coalesced_elastic.count,count,Number of coalesced elastic flow token dispatches (where we’re informing the sender of a higher log entry being admitted) +cockroachdb,kvadmission.flow_token_dispatch.coalesced_regular.count,count,Number of coalesced regular flow token dispatches (where we’re informing the sender of a higher log entry being admitted) +cockroachdb,kvadmission.flow_token_dispatch.local_elastic.count,count,Number of local elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.local_regular.count,count,Number of local regular flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_elastic,gauge,Number of pending elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_nodes,gauge,Number of nodes pending flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.pending_regular,gauge,Number of pending regular flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.remote_elastic.count,count,Number of remote elastic flow token dispatches +cockroachdb,kvadmission.flow_token_dispatch.remote_regular.count,count,Number of remote regular flow token dispatches +cockroachdb,lastupdatenanos,gauge,"[OpenMetrics v1 & v2] Time in nanoseconds since Unix epoch at which bytes/keys/intents metrics were last updated +Shown as nanosecond" +cockroachdb,leases.epoch,gauge,[OpenMetrics v1 & v2] Number of replica leaseholders using epoch-based leases +cockroachdb,leases.error,count,"[OpenMetrics v1] Number of failed lease requests +Shown as request" +cockroachdb,leases.error.count,count,"[OpenMetrics v2] Number of failed lease requests +Shown as request" +cockroachdb,leases.expiration,gauge,[OpenMetrics v1 & v2] Number of replica leaseholders using expiration-based leases +cockroachdb,leases.liveness,gauge,Number of replica leaseholders for the liveness range(s) +cockroachdb,leases.preferences.less_preferred,gauge,Number of replica leaseholders which satisfy a lease preference which is not the most preferred +cockroachdb,leases.preferences.violating,gauge,Number of replica leaseholders which violate lease preferences +cockroachdb,leases.requests.latency.bucket,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.requests.latency.count,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.requests.latency.sum,count,"Lease request latency (all types and outcomes, coalesced) +Shown as nanosecond" +cockroachdb,leases.success,count,"[OpenMetrics v1] Number of successful lease requests +Shown as request" +cockroachdb,leases.success.count,count,"[OpenMetrics v2] Number of successful lease requests +Shown as request" +cockroachdb,leases.transfers.error,count,[OpenMetrics v1] Number of failed lease transfers +cockroachdb,leases.transfers.error.count,count,[OpenMetrics v2] Number of failed lease transfers +cockroachdb,leases.transfers.success,count,[OpenMetrics v1] Number of successful lease transfers +cockroachdb,leases.transfers.success.count,count,[OpenMetrics v2] Number of successful lease transfers +cockroachdb,livebytes,gauge,"[OpenMetrics v1 & v2] Number of bytes of live data (keys plus values) +Shown as byte" +cockroachdb,livecount,gauge,"[OpenMetrics v1 & v2] Count of live keys +Shown as key" +cockroachdb,liveness.epochincrements,count,[OpenMetrics v1] Number of times this node has incremented its liveness epoch +cockroachdb,liveness.epochincrements.count,count,[OpenMetrics v2] Number of times this node has incremented its liveness epoch +cockroachdb,liveness.heartbeatfailures,count,[OpenMetrics v1] Number of failed node liveness heartbeats from this node +cockroachdb,liveness.heartbeatfailures.count,count,[OpenMetrics v2] Number of failed node liveness heartbeats from this node +cockroachdb,liveness.heartbeatlatency,gauge,"[OpenMetrics v1] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.bucket,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.count,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatlatency.sum,count,"[OpenMetrics v2] Node liveness heartbeat latency in nanoseconds +Shown as nanosecond" +cockroachdb,liveness.heartbeatsinflight,gauge,Number of in-flight liveness heartbeats from this node +cockroachdb,liveness.heartbeatsuccesses,count,[OpenMetrics v1] Number of successful node liveness heartbeats from this node +cockroachdb,liveness.heartbeatsuccesses.count,count,[OpenMetrics v2] Number of successful node liveness heartbeats from this node +cockroachdb,liveness.livenodes,gauge,[OpenMetrics v1 & v2] Number of live nodes in the cluster (will be 0 if this node is not itself live) +cockroachdb,lockbytes,gauge,"Number of bytes taken up by replicated lock key-values (shared and exclusive strength, not intent strength) +Shown as byte" +cockroachdb,lockcount,gauge,"Count of replicated locks (shared, exclusive, and intent strength)" +cockroachdb,log.buffered.messages.dropped.count,count,"Count of log messages that are dropped by buffered log sinks. When CRDB attempts to buffer a log message in a buffered log sink whose buffer is already full, it drops the oldest buffered messages to make space for the new message" +cockroachdb,log.fluent.sink.conn.errors.count,count,Number of connection errors experienced by fluent-server logging sinks +cockroachdb,log.messages.count,count,Count of messages logged on the node since startup. Note that this does not measure the fan-out of single log messages to the various configured logging sinks. +cockroachdb,node_id,gauge,[OpenMetrics v1 & v2] node ID with labels for advertised RPC and HTTP addresses +cockroachdb,physical_replication.admit_latency.bucket,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.admit_latency.count,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.admit_latency.sum,count,"Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.bucket,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.count,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.commit_latency.sum,count,"Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded +Shown as nanosecond" +cockroachdb,physical_replication.cutover_progress,gauge,The number of ranges left to revert in order to complete an inflight cutover +cockroachdb,physical_replication.distsql_replan_count.count,count,Total number of dist sql replanning events +cockroachdb,physical_replication.earliest_data_checkpoint_span,gauge,The earliest timestamp of the last checkpoint forwarded by an ingestion data processor +cockroachdb,physical_replication.events_ingested.count,count,Events ingested by all replication jobs +cockroachdb,physical_replication.flush_hist_nanos.bucket,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flush_hist_nanos.count,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flush_hist_nanos.sum,count,"Time spent flushing messages across all replication streams +Shown as nanosecond" +cockroachdb,physical_replication.flushes.count,count,Total flushes across all replication jobs +cockroachdb,physical_replication.job_progress_updates.count,count,Total number of updates to the ingestion job progress +cockroachdb,physical_replication.latest_data_checkpoint_span,gauge,The latest timestamp of the last checkpoint forwarded by an ingestion data processor +cockroachdb,physical_replication.logical_bytes.count,count,"Logical bytes (sum of keys + values) ingested by all replication jobs +Shown as byte" +cockroachdb,physical_replication.replicated_time_seconds,gauge,"The replicated time of the physical replication stream in seconds since the unix epoch. +Shown as second" +cockroachdb,physical_replication.resolved_events_ingested.count,count,Resolved events ingested by all replication jobs +cockroachdb,physical_replication.running,gauge,Number of currently running replication streams +cockroachdb,physical_replication.sst_bytes.count,count,"SST bytes (compressed) sent to KV by all replication jobs +Shown as byte" +cockroachdb,queue.consistency.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the consistency checker queue +cockroachdb,queue.consistency.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the consistency checker queue +cockroachdb,queue.consistency.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the consistency checker queue +cockroachdb,queue.consistency.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the consistency checker queue +cockroachdb,queue.consistency.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the consistency checker queue +cockroachdb,queue.consistency.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the consistency checker queue +Shown as nanosecond" +cockroachdb,queue.consistency.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the consistency checker queue +Shown as nanosecond" +cockroachdb,queue.gc.info.abortspanconsidered,count,[OpenMetrics v1] Number of AbortSpan entries old enough to be considered for removal +cockroachdb,queue.gc.info.abortspanconsidered.count,count,[OpenMetrics v2] Number of AbortSpan entries old enough to be considered for removal +cockroachdb,queue.gc.info.abortspangcnum,count,[OpenMetrics v1] Number of AbortSpan entries fit for removal +cockroachdb,queue.gc.info.abortspangcnum.count,count,[OpenMetrics v2] Number of AbortSpan entries fit for removal +cockroachdb,queue.gc.info.abortspanscanned,count,"[OpenMetrics v1] Number of transactions present in the AbortSpan scanned from the engine +Shown as transaction" +cockroachdb,queue.gc.info.abortspanscanned.count,count,"[OpenMetrics v2] Number of transactions present in the AbortSpan scanned from the engine +Shown as transaction" +cockroachdb,queue.gc.info.clearrangefailed.count,count,Number of failed ClearRange operations during GC +cockroachdb,queue.gc.info.clearrangesuccess.count,count,Number of successful ClearRange operations during GC +cockroachdb,queue.gc.info.enqueuehighpriority.count,count,Number of replicas enqueued for GC with high priority +cockroachdb,queue.gc.info.intentsconsidered,count,[OpenMetrics v1] Number of ‘old’ intents +cockroachdb,queue.gc.info.intentsconsidered.count,count,[OpenMetrics v2] Number of ‘old’ intents +cockroachdb,queue.gc.info.intenttxns,count,"[OpenMetrics v1] Number of associated distinct transactions +Shown as transaction" +cockroachdb,queue.gc.info.intenttxns.count,count,"[OpenMetrics v2] Number of associated distinct transactions +Shown as transaction" +cockroachdb,queue.gc.info.numkeysaffected,count,"[OpenMetrics v1] Number of keys with GC’able data +Shown as key" +cockroachdb,queue.gc.info.numkeysaffected.count,count,"[OpenMetrics v2] Number of keys with GC’able data +Shown as key" +cockroachdb,queue.gc.info.numrangekeysaffected.count,count,Number of range keys GC’able +cockroachdb,queue.gc.info.pushtxn,count,[OpenMetrics v1] Number of attempted pushes +cockroachdb,queue.gc.info.pushtxn.count,count,[OpenMetrics v2] Number of attempted pushes +cockroachdb,queue.gc.info.resolvefailed.count,count,Number of cleanup intent failures during GC +cockroachdb,queue.gc.info.resolvesuccess,count,[OpenMetrics v1] Number of successful intent resolutions +cockroachdb,queue.gc.info.resolvesuccess.count,count,[OpenMetrics v2] Number of successful intent resolutions +cockroachdb,queue.gc.info.resolvetotal,count,[OpenMetrics v1] Number of attempted intent resolutions +cockroachdb,queue.gc.info.resolvetotal.count,count,[OpenMetrics v2] Number of attempted intent resolutions +cockroachdb,queue.gc.info.transactionresolvefailed.count,count,Number of intent cleanup failures for local transactions during GC +cockroachdb,queue.gc.info.transactionspangcaborted,count,[OpenMetrics v1] Number of GC’able entries corresponding to aborted txns +cockroachdb,queue.gc.info.transactionspangcaborted.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to aborted txns +cockroachdb,queue.gc.info.transactionspangccommitted,count,[OpenMetrics v1] Number of GC’able entries corresponding to committed txns +cockroachdb,queue.gc.info.transactionspangccommitted.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to committed txns +cockroachdb,queue.gc.info.transactionspangcpending,count,[OpenMetrics v1] Number of GC’able entries corresponding to pending txns +cockroachdb,queue.gc.info.transactionspangcpending.count,count,[OpenMetrics v2] Number of GC’able entries corresponding to pending txns +cockroachdb,queue.gc.info.transactionspangcstaging.count,count,Number of GC’able entries corresponding to staging txns +cockroachdb,queue.gc.info.transactionspanscanned,count,[OpenMetrics v1] Number of entries in transaction spans scanned from the engine +cockroachdb,queue.gc.info.transactionspanscanned.count,count,[OpenMetrics v2] Number of entries in transaction spans scanned from the engine +cockroachdb,queue.gc.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the GC queue +cockroachdb,queue.gc.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the GC queue +cockroachdb,queue.gc.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the GC queue +cockroachdb,queue.gc.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the GC queue +cockroachdb,queue.gc.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the GC queue +cockroachdb,queue.gc.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the GC queue +Shown as nanosecond" +cockroachdb,queue.gc.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the GC queue +Shown as nanosecond" +cockroachdb,queue.merge.pending,gauge,Number of pending replicas in the merge queue +cockroachdb,queue.merge.process.failure.count,count,Number of replicas which failed processing in the merge queue +cockroachdb,queue.merge.process.success.count,count,Number of replicas successfully processed by the merge queue +cockroachdb,queue.merge.processingnanos.count,count,"Nanoseconds spent processing replicas in the merge queue +Shown as nanosecond" +cockroachdb,queue.merge.purgatory,gauge,"Number of replicas in the merge queue’s purgatory, waiting to become mergeable" +cockroachdb,queue.raftlog.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the Raft log queue +cockroachdb,queue.raftlog.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the Raft log queue +cockroachdb,queue.raftlog.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the Raft log queue +cockroachdb,queue.raftlog.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the Raft log queue +cockroachdb,queue.raftlog.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the Raft log queue +cockroachdb,queue.raftlog.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the Raft log queue +Shown as nanosecond" +cockroachdb,queue.raftlog.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the Raft log queue +Shown as nanosecond" +cockroachdb,queue.raftsnapshot.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the Raft repair queue +cockroachdb,queue.raftsnapshot.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the Raft repair queue +cockroachdb,queue.raftsnapshot.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the Raft repair queue +cockroachdb,queue.raftsnapshot.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the Raft repair queue +Shown as nanosecond" +cockroachdb,queue.raftsnapshot.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the Raft repair queue +Shown as nanosecond" +cockroachdb,queue.replicagc.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the replica GC queue +cockroachdb,queue.replicagc.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the replica GC queue +cockroachdb,queue.replicagc.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the replica GC queue +cockroachdb,queue.replicagc.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the replica GC queue +cockroachdb,queue.replicagc.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the replica GC queue +cockroachdb,queue.replicagc.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the replica GC queue +Shown as nanosecond" +cockroachdb,queue.replicagc.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the replica GC queue +Shown as nanosecond" +cockroachdb,queue.replicagc.removereplica,count,[OpenMetrics v1] Number of replica removals attempted by the replica gc queue +cockroachdb,queue.replicagc.removereplica.count,count,[OpenMetrics v2] Number of replica removals attempted by the replica gc queue +cockroachdb,queue.replicate.addnonvoterreplica.count,count,Number of non-voter replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica,count,[OpenMetrics v1] Number of replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica.count,count,[OpenMetrics v2] Number of replica additions attempted by the replicate queue +cockroachdb,queue.replicate.addreplica.error.count,count,Number of failed replica additions processed by the replicate queue +cockroachdb,queue.replicate.addreplica.success.count,count,Number of successful replica additions processed by the replicate queue +cockroachdb,queue.replicate.addvoterreplica.count,count,Number of voter replica additions attempted by the replicate queue +cockroachdb,queue.replicate.nonvoterpromotions.count,count,Number of non-voters promoted to voters by the replicate queue +cockroachdb,queue.replicate.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the replicate queue +cockroachdb,queue.replicate.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the replicate queue +cockroachdb,queue.replicate.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the replicate queue +cockroachdb,queue.replicate.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the replicate queue +cockroachdb,queue.replicate.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the replicate queue +cockroachdb,queue.replicate.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the replicate queue +Shown as nanosecond" +cockroachdb,queue.replicate.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the replicate queue +Shown as nanosecond" +cockroachdb,queue.replicate.purgatory,gauge,"[OpenMetrics v1 & v2] Number of replicas in the replicate queue’s purgatory, awaiting allocation options" +cockroachdb,queue.replicate.rebalancenonvoterreplica.count,count,Number of non-voter replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancereplica,count,[OpenMetrics v1] Number of replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancereplica.count,count,[OpenMetrics v2] Number of replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.rebalancevoterreplica.count,count,Number of voter replica rebalancer-initiated additions attempted by the replicate queue +cockroachdb,queue.replicate.removedeadnonvoterreplica.count,count,Number of dead non-voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica,count,[OpenMetrics v1] Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica.count,count,[OpenMetrics v2] Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedeadreplica.error.count,count,Number of failed dead replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedeadreplica.success.count,count,Number of successful dead replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedeadvoterreplica.count,count,Number of dead voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningnonvoterreplica.count,count,Number of decommissioning non-voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningreplica.count,count,Number of decommissioning replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removedecommissioningreplica.error.count,count,Number of failed decommissioning replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedecommissioningreplica.success.count,count,Number of successful decommissioning replica removals processed by the replicate queue +cockroachdb,queue.replicate.removedecommissioningvoterreplica.count,count,Number of decommissioning voter replica removals attempted by the replicate queue (typically in response to a node outage) +cockroachdb,queue.replicate.removelearnerreplica.count,count,Number of learner replica removals attempted by the replicate queue (typically due to internal race conditions) +cockroachdb,queue.replicate.removenonvoterreplica.count,count,Number of non-voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica,count,[OpenMetrics v1] Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica.count,count,[OpenMetrics v2] Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.removereplica.error.count,count,Number of failed replica removals processed by the replicate queue +cockroachdb,queue.replicate.removereplica.success.count,count,Number of successful replica removals processed by the replicate queue +cockroachdb,queue.replicate.removevoterreplica.count,count,Number of voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) +cockroachdb,queue.replicate.replacedeadreplica.error.count,count,Number of failed dead replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedeadreplica.success.count,count,Number of successful dead replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedecommissioningreplica.error.count,count,Number of failed decommissioning replica replica replacements processed by the replicate queue +cockroachdb,queue.replicate.replacedecommissioningreplica.success.count,count,Number of successful decommissioning replica replacements processed by the replicate queue +cockroachdb,queue.replicate.transferlease,count,[OpenMetrics v1] Number of range lease transfers attempted by the replicate queue +cockroachdb,queue.replicate.transferlease.count,count,[OpenMetrics v2] Number of range lease transfers attempted by the replicate queue +cockroachdb,queue.replicate.voterdemotions.count,count,Number of voters demoted to non-voters by the replicate queue +cockroachdb,queue.split.load_based.count,count,Number of range splits due to a range being greater than the configured max range load +cockroachdb,queue.split.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the split queue +cockroachdb,queue.split.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the split queue +cockroachdb,queue.split.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the split queue +cockroachdb,queue.split.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the split queue +cockroachdb,queue.split.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the split queue +cockroachdb,queue.split.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the split queue +Shown as nanosecond" +cockroachdb,queue.split.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the split queue +Shown as nanosecond" +cockroachdb,queue.split.purgatory,gauge,"Number of replicas in the split queue’s purgatory, waiting to become splittable" +cockroachdb,queue.split.size_based.count,count,Number of range splits due to a range being greater than the configured max range size +cockroachdb,queue.split.span_config_based.count,count,Number of range splits due to span configuration +cockroachdb,queue.tsmaintenance.pending,gauge,[OpenMetrics v1 & v2] Number of pending replicas in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.failure,count,[OpenMetrics v1] Number of replicas which failed processing in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.failure.count,count,[OpenMetrics v2] Number of replicas which failed processing in the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.success,count,[OpenMetrics v1] Number of replicas successfully processed by the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.process.success.count,count,[OpenMetrics v2] Number of replicas successfully processed by the timeseries maintenance queue +cockroachdb,queue.tsmaintenance.processingnanos,count,"[OpenMetrics v1] Nanoseconds spent processing replicas in the timeseries maintenance queue +Shown as nanosecond" +cockroachdb,queue.tsmaintenance.processingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent processing replicas in the timeseries maintenance queue +Shown as nanosecond" +cockroachdb,raft.commands.proposed.count,count,Number of Raft commands proposed.The number of proposals and all kinds of reproposals made by leaseholders. Thismetric approximates the number of commands submitted through Raft. +cockroachdb,raft.commands.reproposed.new_lai.count,count,"Number of Raft commands re-proposed with a newer LAI.The number of Raft commands that leaseholders re-proposed with a modified LAI.Such re-proposals happen for commands that are committed to Raft out of intendedorder, and hence can not be applied as is." +cockroachdb,raft.commands.reproposed.unchanged.count,count,"Number of Raft commands re-proposed without modification.The number of Raft commands that leaseholders re-proposed without modification.Such re-proposals happen for commands that are not committed/applied within atimeout, and have a high chance of being dropped." +cockroachdb,raft.commandsapplied,count,"[OpenMetrics v1] Count of Raft commands applied +Shown as command" +cockroachdb,raft.commandsapplied.count,count,"[OpenMetrics v2] Count of Raft commands applied +Shown as command" +cockroachdb,raft.dropped.count,count,"Number of Raft proposals dropped (this counts individial raftpb.Entry, not raftpb.MsgProp)" +cockroachdb,raft.dropped_leader.count,count,"Number of Raft proposals dropped by a Replica that believes itself to be the leader; each update also increments raft.dropped (this counts individial raftpb.Entry, not raftpb.MsgProp)" +cockroachdb,raft.enqueued.pending,gauge,[OpenMetrics v1 & v2] Number of pending outgoing messages in the Raft Transport queue +cockroachdb,raft.entrycache.accesses.count,count,Number of cache lookups in the Raft entry cache +cockroachdb,raft.entrycache.bytes,gauge,"Aggregate size of all Raft entries in the Raft entry cache +Shown as byte" +cockroachdb,raft.entrycache.hits.count,count,Number of successful cache lookups in the Raft entry cache +cockroachdb,raft.entrycache.read_bytes.count,count,"Counter of bytes in entries returned from the Raft entry cache +Shown as byte" +cockroachdb,raft.entrycache.size,gauge,Number of Raft entries in the Raft entry cache +cockroachdb,raft.heartbeats.pending,gauge,[OpenMetrics v1 & v2] Number of pending heartbeats and responses waiting to be coalesced +cockroachdb,raft.process.applycommitted.latency.bucket,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.applycommitted.latency.count,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.applycommitted.latency.sum,count,"Latency histogram for applying all committed Raft commands in a Raft ready.This measures the end-to-end latency of applying all commands in a Raft ready. Note thatthis closes over possibly multiple measurements of the ‘raft.process.commandcommit.latency’metric, which receives datapoints for each sub-batch processed in the process. +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency,gauge,"[OpenMetrics v1] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.bucket,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.count,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.commandcommit.latency.sum,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft commands +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.bucket,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.count,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.handleready.latency.sum,count,"Latency histogram for handling a Raft ready.This measures the end-to-end-latency of the Raft state advancement loop, including:- snapshot application- SST ingestion- durably appending to the Raft log (i.e. includes fsync)- entry application (incl. replicated side effects, notably log truncation). +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency,gauge,"[OpenMetrics v1] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.bucket,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.count,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.logcommit.latency.sum,count,"[OpenMetrics v2] Latency histogram in nanoseconds for committing Raft log entries +Shown as nanosecond" +cockroachdb,raft.process.tickingnanos,count,"[OpenMetrics v1] Nanoseconds spent in store.processRaft() processing replica.Tick() +Shown as nanosecond" +cockroachdb,raft.process.tickingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent in store.processRaft() processing replica.Tick() +Shown as nanosecond" +cockroachdb,raft.process.workingnanos,count,"[OpenMetrics v1] Nanoseconds spent in store.processRaft() working +Shown as nanosecond" +cockroachdb,raft.process.workingnanos.count,count,"[OpenMetrics v2] Nanoseconds spent in store.processRaft() working +Shown as nanosecond" +cockroachdb,raft.quota_pool.percent_used.bucket,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.quota_pool.percent_used.count,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.quota_pool.percent_used.sum,count,Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval +cockroachdb,raft.rcvd.app,count,[OpenMetrics v1] Number of MsgApp messages received by this store +cockroachdb,raft.rcvd.app.count,count,[OpenMetrics v2] Number of MsgApp messages received by this store +cockroachdb,raft.rcvd.appresp,count,[OpenMetrics v1] Number of MsgAppResp messages received by this store +cockroachdb,raft.rcvd.appresp.count,count,[OpenMetrics v2] Number of MsgAppResp messages received by this store +cockroachdb,raft.rcvd.bytes.count,count,"Number of bytes in Raft messages received by this store. Note that this does not include raft snapshot received. +Shown as byte" +cockroachdb,raft.rcvd.cross_region.bytes.count,count,"Number of bytes received by this store for cross region Raft messages (when region tiers are configured). Note that this does not include raft snapshot received. +Shown as byte" +cockroachdb,raft.rcvd.cross_zone.bytes.count,count,"Number of bytes received by this store for cross zone, same region Raft messages (when region and zone tiers are configured). If region tiers are not configured, this count may include data sent between different regions. +Shown as byte" +cockroachdb,raft.rcvd.dropped,count,[OpenMetrics v1] Number of dropped incoming Raft messages +cockroachdb,raft.rcvd.dropped.count,count,[OpenMetrics v2] Number of dropped incoming Raft messages +cockroachdb,raft.rcvd.dropped_bytes.count,count,"Bytes of dropped incoming Raft messages +Shown as byte" +cockroachdb,raft.rcvd.heartbeat,count,"[OpenMetrics v1] Number of (coalesced, if enabled) MsgHeartbeat messages received by this store" +cockroachdb,raft.rcvd.heartbeat.count,count,"[OpenMetrics v2] Number of (coalesced, if enabled) MsgHeartbeat messages received by this store" +cockroachdb,raft.rcvd.heartbeatresp,count,"[OpenMetrics v1] Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store" +cockroachdb,raft.rcvd.heartbeatresp.count,count,"[OpenMetrics v2] Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store" +cockroachdb,raft.rcvd.prevote,count,[OpenMetrics v1] Number of MsgPreVote messages received by this store +cockroachdb,raft.rcvd.prevote.count,count,[OpenMetrics v2] Number of MsgPreVote messages received by this store +cockroachdb,raft.rcvd.prevoteresp,count,[OpenMetrics v1] Number of MsgPreVoteResp messages received by this store +cockroachdb,raft.rcvd.prevoteresp.count,count,[OpenMetrics v2] Number of MsgPreVoteResp messages received by this store +cockroachdb,raft.rcvd.prop,count,[OpenMetrics v1] Number of MsgProp messages received by this store +cockroachdb,raft.rcvd.prop.count,count,[OpenMetrics v2] Number of MsgProp messages received by this store +cockroachdb,raft.rcvd.queued_bytes,gauge,"Number of bytes in messages currently waiting for raft processing +Shown as byte" +cockroachdb,raft.rcvd.snap,count,[OpenMetrics v1] Number of MsgSnap messages received by this store +cockroachdb,raft.rcvd.snap.count,count,[OpenMetrics v2] Number of MsgSnap messages received by this store +cockroachdb,raft.rcvd.stepped_bytes.count,count,"Number of bytes in messages processed by Raft.Messages reflected here have been handed to Raft (via RawNode.Step). This does not imply that themessages are no longer held in memory or that IO has been performed. +Shown as byte" +cockroachdb,raft.rcvd.timeoutnow,count,[OpenMetrics v1] Number of MsgTimeoutNow messages received by this store +cockroachdb,raft.rcvd.timeoutnow.count,count,[OpenMetrics v2] Number of MsgTimeoutNow messages received by this store +cockroachdb,raft.rcvd.transferleader,count,[OpenMetrics v1] Number of MsgTransferLeader messages received by this store +cockroachdb,raft.rcvd.transferleader.count,count,[OpenMetrics v2] Number of MsgTransferLeader messages received by this store +cockroachdb,raft.rcvd.vote,count,[OpenMetrics v1] Number of MsgVote messages received by this store +cockroachdb,raft.rcvd.vote.count,count,[OpenMetrics v2] Number of MsgVote messages received by this store +cockroachdb,raft.rcvd.voteresp,count,[OpenMetrics v1] Number of MsgVoteResp messages received by this store +cockroachdb,raft.rcvd.voteresp.count,count,[OpenMetrics v2] Number of MsgVoteResp messages received by this store +cockroachdb,raft.replication.latency.bucket,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.replication.latency.count,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.replication.latency.sum,count,"The duration elapsed between having evaluated a BatchRequest and it beingreflected in the proposer’s state machine (i.e. having applied fully).This encompasses time spent in the quota pool, in replication (includingreproposals), and application, but notably not sequencing latency (i.e.contention and latch acquisition)." +cockroachdb,raft.scheduler.latency,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.bucket,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.count,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.scheduler.latency.sum,count,"Queueing durations for ranges waiting to be processed by the Raft scheduler.This histogram measures the delay from when a range is registered with the schedulerfor processing to when it is actually processed. This does not include the durationof processing. +Shown as nanosecond" +cockroachdb,raft.sent.bytes.count,count,"Number of bytes in Raft messages sent by this store. Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.sent.cross_region.bytes.count,count,"Number of bytes sent by this store for cross region Raft messages (when region tiers are configured). Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.sent.cross_zone.bytes.count,count,"Number of bytes sent by this store for cross zone, same region Raft messages (when region and zone tiers are configured). If region tiers are not configured, this count may include data sent between different regions. To ensure accurate monitoring of transmitted data, it is important to set up a consistent locality configuration across nodes. Note that this does not include raft snapshot sent. +Shown as byte" +cockroachdb,raft.storage.read_bytes.count,count,"Counter of raftpb.Entry.Size() read from pebble for raft log entries.These are the bytes returned from the (raft.Storage).Entries method that were notreturned via the raft entry cache. This metric plus the raft.entrycache.read_bytesmetric represent the total bytes returned from the Entries method. +Shown as byte" +cockroachdb,raft.ticks,count,[OpenMetrics v1] Number of Raft ticks queued +cockroachdb,raft.ticks.count,count,[OpenMetrics v2] Number of Raft ticks queued +cockroachdb,raft.timeoutcampaign.count,count,Number of Raft replicas campaigning after missed heartbeats from leader +cockroachdb,raft.transport.flow_token_dispatches_dropped.count,count,Number of flow token dispatches dropped by the Raft Transport +cockroachdb,raft.transport.rcvd.count,count,Number of Raft messages received by the Raft Transport +cockroachdb,raft.transport.reverse_rcvd.count,count,"Messages received from the reverse direction of a stream.These messages should be rare. They are mostly informational, and are not actualresponses to Raft messages. Responses are received over another stream." +cockroachdb,raft.transport.reverse_sent.count,count,"Messages sent in the reverse direction of a stream.These messages should be rare. They are mostly informational, and are not actualresponses to Raft messages. Responses are sent over another stream." +cockroachdb,raft.transport.send_queue_bytes,gauge,"The total byte size of pending outgoing messages in the queue.The queue is composed of multiple bounded channels associated with differentpeers. A size higher than the average baseline could indicate issues streamingmessages to at least one peer. Use this metric together with send-queue-size, tohave a fuller picture. +Shown as byte" +cockroachdb,raft.transport.send_queue_size,gauge,Number of pending outgoing messages in the Raft Transport queue.The queue is composed of multiple bounded channels associated with differentpeers. The overall size of tens of thousands could indicate issues streamingmessages to at least one peer. Use this metric in conjunction withsend-queue-bytes. +cockroachdb,raft.transport.sends_dropped.count,count,Number of Raft message sends dropped by the Raft Transport +cockroachdb,raft.transport.sent.count,count,Number of Raft messages sent by the Raft Transport +cockroachdb,raftlog.behind,gauge,"[OpenMetrics v1 & v2] Number of Raft log entries followers on other stores are behind +Shown as entry" +cockroachdb,raftlog.truncated,count,"[OpenMetrics v1] Number of Raft log entries truncated +Shown as entry" +cockroachdb,raftlog.truncated.count,count,"[OpenMetrics v2] Number of Raft log entries truncated +Shown as entry" +cockroachdb,range.adds,count,[OpenMetrics v1] Number of range additions +cockroachdb,range.adds.count,count,[OpenMetrics v2] Number of range additions +cockroachdb,range.merges.count,count,Number of range merges +cockroachdb,range.raftleaderremovals.count,count,Number of times the current Raft leader was removed from a range +cockroachdb,range.raftleadertransfers,count,[OpenMetrics v1] Number of raft leader transfers +cockroachdb,range.raftleadertransfers.count,count,[OpenMetrics v2] Number of raft leader transfers +cockroachdb,range.recoveries.count,count,Count of offline loss of quorum recovery operations performed on ranges.This count increments for every range recovered in offline loss of quorumrecovery operation. Metric is updated when node on which survivor replicais located starts following the recovery. +cockroachdb,range.removes,count,[OpenMetrics v1] Number of range removals +cockroachdb,range.removes.count,count,[OpenMetrics v2] Number of range removals +cockroachdb,range.snapshots.applied_initial.count,count,Number of snapshots applied for initial upreplication +cockroachdb,range.snapshots.applied_non_voter.count,count,Number of snapshots applied by non-voter replicas +cockroachdb,range.snapshots.applied_voter.count,count,Number of snapshots applied by voter replicas +cockroachdb,range.snapshots.cross_region.rcvd_bytes.count,count,"Number of snapshot bytes received cross region +Shown as byte" +cockroachdb,range.snapshots.cross_region.sent_bytes.count,count,"Number of snapshot bytes sent cross region +Shown as byte" +cockroachdb,range.snapshots.cross_zone.rcvd_bytes.count,count,"Number of snapshot bytes received cross zone within same region or if region tiers are not configured. This count increases for each snapshot received between different zones within the same region. However, if the region tiers are not configured, this count may also include snapshot data received between different regions. +Shown as byte" +cockroachdb,range.snapshots.cross_zone.sent_bytes.count,count,"Number of snapshot bytes sent cross zone within same region or if region tiers are not configured. This count increases for each snapshot sent between different zones within the same region. However, if the region tiers are not configured, this count may also include snapshot data sent between different regions. +Shown as byte" +cockroachdb,range.snapshots.delegate.failures.count,count,"Number of snapshots that were delegated to a different node andresulted in failure on that delegate. There are numerous reasons a failure canoccur on a delegate such as timeout, the delegate Raft log being too far behindor the delegate being too busy to send." +cockroachdb,range.snapshots.delegate.in_progress,gauge,Number of delegated snapshots that are currently in-flight. +cockroachdb,range.snapshots.delegate.sent_bytes.count,count,"Bytes sent using a delegate.The number of bytes sent as a result of a delegate snapshot requestthat was originated from a different node. This metric is useful inevaluating the network savings of not sending cross region traffic. +Shown as byte" +cockroachdb,range.snapshots.delegate.successes.count,count,Number of snapshots that were delegated to a different node andresulted in success on that delegate. This does not count self delegated snapshots. +cockroachdb,range.snapshots.generated,count,[OpenMetrics v1] Number of generated snapshots +cockroachdb,range.snapshots.generated.count,count,[OpenMetrics v2] Number of generated snapshots +cockroachdb,range.snapshots.normal_applied,count,[OpenMetrics v1] Number of applied snapshots +cockroachdb,range.snapshots.normal_applied.count,count,[OpenMetrics v2] Number of applied snapshots +cockroachdb,range.snapshots.preemptive_applied,count,[OpenMetrics v1] Number of applied pre-emptive snapshots +cockroachdb,range.snapshots.preemptive_applied.count,count,[OpenMetrics v2] Number of applied pre-emptive snapshots +cockroachdb,range.snapshots.rcvd_bytes.count,count,"Number of snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.rebalancing.rcvd_bytes.count,count,"Number of rebalancing snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.rebalancing.sent_bytes.count,count,"Number of rebalancing snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.recovery.rcvd_bytes.count,count,"Number of recovery snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.recovery.sent_bytes.count,count,"Number of recovery snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.recv_failed.count,count,"Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred" +cockroachdb,range.snapshots.recv_in_progress,gauge,Number of non-empty snapshots being received +cockroachdb,range.snapshots.recv_queue,gauge,Number of snapshots queued to receive +cockroachdb,range.snapshots.recv_queue_bytes,gauge,"Total size of all snapshots in the snapshot receive queue +Shown as byte" +cockroachdb,range.snapshots.recv_total_in_progress,gauge,Number of total snapshots being received +cockroachdb,range.snapshots.recv_unusable.count,count,Number of range snapshot that were fully transmitted but determined to be unnecessary or unusable +cockroachdb,range.snapshots.send_in_progress,gauge,Number of non-empty snapshots being sent +cockroachdb,range.snapshots.send_queue,gauge,Number of snapshots queued to send +cockroachdb,range.snapshots.send_queue_bytes,gauge,"Total size of all snapshots in the snapshot send queue +Shown as byte" +cockroachdb,range.snapshots.send_total_in_progress,gauge,Number of total snapshots being sent +cockroachdb,range.snapshots.sent_bytes.count,count,"Number of snapshot bytes sent +Shown as byte" +cockroachdb,range.snapshots.unknown.rcvd_bytes.count,count,"Number of unknown snapshot bytes received +Shown as byte" +cockroachdb,range.snapshots.unknown.sent_bytes.count,count,"Number of unknown snapshot bytes sent +Shown as byte" +cockroachdb,range.splits.count,count,Number of range splits +cockroachdb,range.splits.total,count,[OpenMetrics v1] Number of range splits +cockroachdb,range.splits.total.count,count,[OpenMetrics v2] Number of range splits +cockroachdb,rangekeybytes,gauge,"Number of bytes taken up by range keys (e.g. MVCC range tombstones) +Shown as byte" +cockroachdb,rangekeycount,gauge,Count of all range keys (e.g. MVCC range tombstones) +cockroachdb,ranges,gauge,[OpenMetrics v1 & v2] Number of ranges +cockroachdb,ranges.overreplicated,gauge,[OpenMetrics v1 & v2] Number of ranges with more live replicas than the replication target +cockroachdb,ranges.unavailable,gauge,[OpenMetrics v1 & v2] Number of ranges with fewer live replicas than needed for quorum +cockroachdb,ranges.underreplicated,gauge,[OpenMetrics v1 & v2] Number of ranges with fewer live replicas than the replication target +cockroachdb,rangevalbytes,gauge,"Number of bytes taken up by range key values (e.g. MVCC range tombstones) +Shown as byte" +cockroachdb,rangevalcount,gauge,Count of all range key values (e.g. MVCC range tombstones) +cockroachdb,rebalancing.cpunanospersecond,gauge,"Average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.lease.transfers.count,count,Number of lease transfers motivated by store-level load imbalances +cockroachdb,rebalancing.queriespersecond,gauge,"Number of kv-level requests received per second by the store, averaged over a large time period as used in rebalancing decisions" +cockroachdb,rebalancing.range.rebalances.count,count,Number of range rebalance operations motivated by store-level load imbalances +cockroachdb,rebalancing.readbytespersecond,gauge,"Number of bytes read recently per second, considering the last 30 minutes. +Shown as byte" +cockroachdb,rebalancing.readspersecond,gauge,"Number of keys read recently per second, considering the last 30 minutes." +cockroachdb,rebalancing.replicas.cpunanospersecond.bucket,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.cpunanospersecond.count,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.cpunanospersecond.sum,count,"Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. +Shown as nanosecond" +cockroachdb,rebalancing.replicas.queriespersecond.bucket,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.replicas.queriespersecond.count,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.replicas.queriespersecond.sum,count,Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. +cockroachdb,rebalancing.requestspersecond,gauge,"Number of requests received recently per second, considering the last 30 minutes." +cockroachdb,rebalancing.state.imbalanced_overfull_options_exhausted.count,count,Number of occurrences where this store was overfull but failed to shed load after exhausting available rebalance options +cockroachdb,rebalancing.writebytespersecond,gauge,"Number of bytes written recently per second, considering the last 30 minutes. +Shown as byte" +cockroachdb,rebalancing.writespersecond,gauge,"[OpenMetrics v1 & v2] Number of keys written (i.e. applied by raft) per second to the store, averaged over a large time period as used in rebalancing decisions +Shown as key" +cockroachdb,replicas,gauge,Number of replicas +cockroachdb,replicas.commandqueue.combinedqueuesize,gauge,"[OpenMetrics v1 & v2] Number of commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.combinedreadcount,gauge,"[OpenMetrics v1 & v2] Number of read-only commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.combinedwritecount,gauge,"[OpenMetrics v1 & v2] Number of read-write commands in all CommandQueues combined +Shown as command" +cockroachdb,replicas.commandqueue.maxoverlaps,gauge,"[OpenMetrics v1 & v2] Largest number of overlapping commands seen when adding to any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxreadcount,gauge,"[OpenMetrics v1 & v2] Largest number of read-only commands in any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxsize,gauge,"[OpenMetrics v1 & v2] Largest number of commands in any CommandQueue +Shown as command" +cockroachdb,replicas.commandqueue.maxtreesize,gauge,[OpenMetrics v1 & v2] Largest number of intervals in any CommandQueue’s interval tree +cockroachdb,replicas.commandqueue.maxwritecount,gauge,"[OpenMetrics v1 & v2] Largest number of read-write commands in any CommandQueue +Shown as command" +cockroachdb,replicas.leaders,gauge,[OpenMetrics v1 & v2] Number of raft leaders +cockroachdb,replicas.leaders.not_leaseholders,gauge,[OpenMetrics v1 & v2] Number of replicas that are Raft leaders whose range lease is held by another store +cockroachdb,replicas.leaders_invalid_lease,gauge,Number of replicas that are Raft leaders whose lease is invalid +cockroachdb,replicas.leaders_not_leaseholders,gauge,Number of replicas that are Raft leaders whose range lease is held by another store +cockroachdb,replicas.leaseholders,gauge,[OpenMetrics v1 & v2] Number of lease holders +cockroachdb,replicas.quiescent,gauge,[OpenMetrics v1 & v2] Number of quiesced replicas +cockroachdb,replicas.reserved,gauge,[OpenMetrics v1 & v2] Number of replicas reserved for snapshots +cockroachdb,replicas.total,gauge,[OpenMetrics v1 & v2] Number of replicas +cockroachdb,replicas.uninitialized,gauge,"Number of uninitialized replicas, this does not include uninitialized replicas that can lie dormant in a persistent state." +cockroachdb,requests.backpressure.split,gauge,[OpenMetrics v1 & v2] Number of backpressured writes waiting on a Range split +cockroachdb,requests.slow.commandqueue,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in the command queue +Shown as request" +cockroachdb,requests.slow.distsender,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in the dist sender +Shown as request" +cockroachdb,requests.slow.latch,gauge,Number of requests that have been stuck for a long time acquiring latches.Latches moderate access to the KV keyspace for the purpose of evaluating andreplicating commands. A slow latch acquisition attempt is often caused byanother request holding and not releasing its latches in a timely manner. +cockroachdb,requests.slow.lease,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time acquiring a lease +Shown as request" +cockroachdb,requests.slow.raft,gauge,"[OpenMetrics v1 & v2] Number of requests that have been stuck for a long time in raft +Shown as request" +cockroachdb,rocksdb.block.cache.hits,gauge,[OpenMetrics v1 & v2] Count of block cache hits +cockroachdb,rocksdb.block.cache.misses,gauge,[OpenMetrics v1 & v2] Count of block cache misses +cockroachdb,rocksdb.block.cache.pinned.usage,gauge,"[OpenMetrics v1 & v2] Bytes pinned by the block cache +Shown as byte" +cockroachdb,rocksdb.block.cache.usage,gauge,"[OpenMetrics v1 & v2] Bytes used by the block cache +Shown as byte" +cockroachdb,rocksdb.bloom.filter.prefix.checked,gauge,Number of times the bloom filter was checked +cockroachdb,rocksdb.bloom.filter.prefix.useful,gauge,Number of times the bloom filter helped avoid iterator creation +cockroachdb,rocksdb.bloom_filter.prefix.checked,gauge,[OpenMetrics v1 & v2] Number of times the bloom filter was checked +cockroachdb,rocksdb.bloom_filter.prefix.useful,gauge,[OpenMetrics v1 & v2] Number of times the bloom filter helped avoid iterator creation +cockroachdb,rocksdb.compacted_bytes_read,gauge,"Bytes read during compaction +Shown as byte" +cockroachdb,rocksdb.compacted_bytes_written,gauge,"Bytes written during compaction +Shown as byte" +cockroachdb,rocksdb.compactions,gauge,Number of table compactions +cockroachdb,rocksdb.compactions.total,gauge,[OpenMetrics v1 & v2] Number of table compactions +cockroachdb,rocksdb.encryption.algorithm,gauge,"Algorithm in use for encryption-at-rest, see ccl/storageccl/engineccl/enginepbccl/key_registry.proto" +cockroachdb,rocksdb.estimated_pending_compaction,gauge,"Estimated pending compaction bytes +Shown as byte" +cockroachdb,rocksdb.flushed_bytes,gauge,"Bytes written during flush +Shown as byte" +cockroachdb,rocksdb.flushes,gauge,Number of table flushes +cockroachdb,rocksdb.flushes.total,gauge,[OpenMetrics v1 & v2] Number of table flushes +cockroachdb,rocksdb.ingested_bytes,gauge,"Bytes ingested +Shown as byte" +cockroachdb,rocksdb.memtable.total.size,gauge,"[OpenMetrics v1 & v2] Current size of memtable in bytes +Shown as byte" +cockroachdb,rocksdb.num_sstables,gauge,"[OpenMetrics v1 & v2] Number of rocksdb SSTables +Shown as table" +cockroachdb,rocksdb.read.amplification,gauge,"[OpenMetrics v1 & v2] Number of disk reads per query +Shown as read" +cockroachdb,rocksdb.table.readers.mem.estimate,gauge,[OpenMetrics v1 & v2] Memory used by index and filter blocks +cockroachdb,round_trip.latency,gauge,"[OpenMetrics v1] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.bucket,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.count,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip.latency.sum,count,"[OpenMetrics v2] Distribution of round-trip latencies with other nodes in nanoseconds +Shown as nanosecond" +cockroachdb,round_trip_latency.bucket,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric +Shown as nanosecond" +cockroachdb,round_trip_latency.count,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric. +Shown as nanosecond" +cockroachdb,round_trip_latency.sum,count,"Distribution of round-trip latencies with other nodes.This only reflects successful heartbeats and measures gRPC overhead as well aspossible head-of-line blocking. Elevated values in this metric may hint atnetwork issues and/or saturation, but they are no proof of them. CPU overloadcan similarly elevate this metric. +Shown as nanosecond" +cockroachdb,rpc.batches.recv.count,count,Number of batches processed +cockroachdb,rpc.connection.avg_round_trip_latency,gauge,"Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC.Dividing this Gauge by rpc.connection.healthy gives an approximation of averagelatency, but the top-level round-trip-latency histogram is more useful. Instead,users should consult the label families of this metric if they are available. +Shown as nanosecond" +cockroachdb,rpc.connection.failures.count,count,Counter of failed connections.This includes both the event in which a healthy connection terminates as well asunsuccessful reconnection attempts.Connections that are terminated as part of local node shutdown are excluded.Decommissioned peers are excluded. +cockroachdb,rpc.connection.healthy,gauge,Gauge of current connections in a healthy state (i.e. bidirectionally connected and heartbeating) +cockroachdb,rpc.connection.healthy_nanos,gauge,"Gauge of nanoseconds of healthy connection timeOn the prometheus endpoint scraped with the cluster setting ‘server.child_metrics.enabled’ set,the constituent parts of this metric are available on a per-peer basis and one can read offfor how long a given peer has been connected +Shown as nanosecond" +cockroachdb,rpc.connection.heartbeats.count,count,Counter of successful heartbeats. +cockroachdb,rpc.connection.inactive,gauge,"Gauge of current connections in an inactive state and pending deletion; these are not healthy but are not tracked as unhealthy either because there is reason to believe that the connection is no longer relevant,for example if the node has since been seen under a new address" +cockroachdb,rpc.connection.unhealthy,gauge,Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating) +cockroachdb,rpc.connection.unhealthy_nanos,gauge,"Gauge of nanoseconds of unhealthy connection time.On the prometheus endpoint scraped with the cluster setting ‘server.child_metrics.enabled’ set,the constituent parts of this metric are available on a per-peer basis and one can read offfor how long a given peer has been unreachable +Shown as nanosecond" +cockroachdb,rpc.method.addsstable.recv.count,count,Number of AddSSTable requests processed +cockroachdb,rpc.method.adminchangereplicas.recv.count,count,Number of AdminChangeReplicas requests processed +cockroachdb,rpc.method.adminmerge.recv.count,count,Number of AdminMerge requests processed +cockroachdb,rpc.method.adminrelocaterange.recv.count,count,Number of AdminRelocateRange requests processed +cockroachdb,rpc.method.adminscatter.recv.count,count,Number of AdminScatter requests processed +cockroachdb,rpc.method.adminsplit.recv.count,count,Number of AdminSplit requests processed +cockroachdb,rpc.method.admintransferlease.recv.count,count,Number of AdminTransferLease requests processed +cockroachdb,rpc.method.adminunsplit.recv.count,count,Number of AdminUnsplit requests processed +cockroachdb,rpc.method.adminverifyprotectedtimestamp.recv.count,count,Number of AdminVerifyProtectedTimestamp requests processed +cockroachdb,rpc.method.barrier.recv.count,count,Number of Barrier requests processed +cockroachdb,rpc.method.checkconsistency.recv.count,count,Number of CheckConsistency requests processed +cockroachdb,rpc.method.clearrange.recv.count,count,Number of ClearRange requests processed +cockroachdb,rpc.method.computechecksum.recv.count,count,Number of ComputeChecksum requests processed +cockroachdb,rpc.method.conditionalput.recv.count,count,Number of ConditionalPut requests processed +cockroachdb,rpc.method.delete.recv.count,count,Number of Delete requests processed +cockroachdb,rpc.method.deleterange.recv.count,count,Number of DeleteRange requests processed +cockroachdb,rpc.method.endtxn.recv.count,count,Number of EndTxn requests processed +cockroachdb,rpc.method.export.recv.count,count,Number of Export requests processed +cockroachdb,rpc.method.gc.recv.count,count,Number of GC requests processed +cockroachdb,rpc.method.get.recv.count,count,Number of Get requests processed +cockroachdb,rpc.method.heartbeattxn.recv.count,count,Number of HeartbeatTxn requests processed +cockroachdb,rpc.method.increment.recv.count,count,Number of Increment requests processed +cockroachdb,rpc.method.initput.recv.count,count,Number of InitPut requests processed +cockroachdb,rpc.method.isspanempty.recv.count,count,Number of IsSpanEmpty requests processed +cockroachdb,rpc.method.leaseinfo.recv.count,count,Number of LeaseInfo requests processed +cockroachdb,rpc.method.merge.recv.count,count,Number of Merge requests processed +cockroachdb,rpc.method.migrate.recv.count,count,Number of Migrate requests processed +cockroachdb,rpc.method.probe.recv.count,count,Number of Probe requests processed +cockroachdb,rpc.method.pushtxn.recv.count,count,Number of PushTxn requests processed +cockroachdb,rpc.method.put.recv.count,count,Number of Put requests processed +cockroachdb,rpc.method.queryintent.recv.count,count,Number of QueryIntent requests processed +cockroachdb,rpc.method.querylocks.recv.count,count,Number of QueryLocks requests processed +cockroachdb,rpc.method.queryresolvedtimestamp.recv.count,count,Number of QueryResolvedTimestamp requests processed +cockroachdb,rpc.method.querytxn.recv.count,count,Number of QueryTxn requests processed +cockroachdb,rpc.method.rangestats.recv.count,count,Number of RangeStats requests processed +cockroachdb,rpc.method.recomputestats.recv.count,count,Number of RecomputeStats requests processed +cockroachdb,rpc.method.recovertxn.recv.count,count,Number of RecoverTxn requests processed +cockroachdb,rpc.method.refresh.recv.count,count,Number of Refresh requests processed +cockroachdb,rpc.method.refreshrange.recv.count,count,Number of RefreshRange requests processed +cockroachdb,rpc.method.requestlease.recv.count,count,Number of RequestLease requests processed +cockroachdb,rpc.method.resolveintent.recv.count,count,Number of ResolveIntent requests processed +cockroachdb,rpc.method.resolveintentrange.recv.count,count,Number of ResolveIntentRange requests processed +cockroachdb,rpc.method.reversescan.recv.count,count,Number of ReverseScan requests processed +cockroachdb,rpc.method.revertrange.recv.count,count,Number of RevertRange requests processed +cockroachdb,rpc.method.scan.recv.count,count,Number of Scan requests processed +cockroachdb,rpc.method.subsume.recv.count,count,Number of Subsume requests processed +cockroachdb,rpc.method.transferlease.recv.count,count,Number of TransferLease requests processed +cockroachdb,rpc.method.truncatelog.recv.count,count,Number of TruncateLog requests processed +cockroachdb,rpc.method.writebatch.recv.count,count,Number of WriteBatch requests processed +cockroachdb,rpc.streams.mux_rangefeed.active,gauge,Number of currently running MuxRangeFeed streams +cockroachdb,rpc.streams.mux_rangefeed.recv.count,count,Total number of MuxRangeFeed streams +cockroachdb,rpc.streams.rangefeed.active,gauge,Number of currently running RangeFeed streams +cockroachdb,rpc.streams.rangefeed.recv.count,count,Total number of RangeFeed streams +cockroachdb,schedules.BACKUP.failed.count,count,Number of BACKUP jobs failed +cockroachdb,schedules.BACKUP.last_completed_time,gauge,The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric +cockroachdb,schedules.BACKUP.protected_age_sec,gauge,"The age of the oldest PTS record protected by BACKUP schedules +Shown as second" +cockroachdb,schedules.BACKUP.protected_record_count,gauge,Number of PTS records held by BACKUP schedules +cockroachdb,schedules.BACKUP.started.count,count,Number of BACKUP jobs started +cockroachdb,schedules.BACKUP.succeeded.count,count,Number of BACKUP jobs succeeded +cockroachdb,schedules.CHANGEFEED.failed.count,count,Number of CHANGEFEED jobs failed +cockroachdb,schedules.CHANGEFEED.started.count,count,Number of CHANGEFEED jobs started +cockroachdb,schedules.CHANGEFEED.succeeded.count,count,Number of CHANGEFEED jobs succeeded +cockroachdb,schedules.backup.failed,count,[OpenMetrics v1] Number of scheduled backup jobs failed +cockroachdb,schedules.backup.failed.count,count,[OpenMetrics v2] Number of scheduled backup jobs failed +cockroachdb,schedules.backup.last_completed_time,gauge,"[OpenMetrics v1 & v2] The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric +Shown as second" +cockroachdb,schedules.backup.started,count,[OpenMetrics v1] Number of scheduled backup jobs started +cockroachdb,schedules.backup.started.count,count,[OpenMetrics v2] Number of scheduled backup jobs started +cockroachdb,schedules.backup.succeeded,count,[OpenMetrics v1] Number of scheduled backup jobs succeeded +cockroachdb,schedules.backup.succeeded.count,count,[OpenMetrics v2] Number of scheduled backup jobs succeeded +cockroachdb,schedules.error,gauge,Number of schedules which did not execute successfully +cockroachdb,schedules.malformed,gauge,Number of malformed schedules +cockroachdb,schedules.round.jobs_started,gauge,The number of jobs started +cockroachdb,schedules.round.reschedule_skip,gauge,The number of schedules rescheduled due to SKIP policy +cockroachdb,schedules.round.reschedule_wait,gauge,The number of schedules rescheduled due to WAIT policy +cockroachdb,schedules.scheduled.row.level.ttl.executor_failed.count,count,Number of scheduled-row-level-ttl-executor jobs failed +cockroachdb,schedules.scheduled_row_level_ttl_executor.failed.count,count,Number of scheduled-row-level-ttl-executor jobs failed +cockroachdb,schedules.scheduled_row_level_ttl_executor.started.count,count,Number of scheduled-row-level-ttl-executor jobs started +cockroachdb,schedules.scheduled_row_level_ttl_executor.succeeded.count,count,Number of scheduled-row-level-ttl-executor jobs succeeded +cockroachdb,schedules.scheduled_schema_telemetry_executor.failed.count,count,Number of scheduled-schema-telemetry-executor jobs failed +cockroachdb,schedules.scheduled_schema_telemetry_executor.started.count,count,Number of scheduled-schema-telemetry-executor jobs started +cockroachdb,schedules.scheduled_schema_telemetry_executor.succeeded.count,count,Number of scheduled-schema-telemetry-executor jobs succeeded +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.failed.count,count,Number of scheduled-sql-stats-compaction-executor jobs failed +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.started.count,count,Number of scheduled-sql-stats-compaction-executor jobs started +cockroachdb,schedules.scheduled_sql_stats_compaction_executor.succeeded.count,count,Number of scheduled-sql-stats-compaction-executor jobs succeeded +cockroachdb,seconds.until.enterprise.license.expiry,gauge,Seconds until enterprise license expiry (0 if no license present or running without enterprise features) +cockroachdb,seconds_until_enterprise_license_expiry,gauge,"Seconds until enterprise license expiry (0 if no license present or running without enterprise features) +Shown as second" +cockroachdb,security.certificate.expiration.ca,gauge,Expiration for the CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ca_client_tenant,gauge,Expiration for the Tenant Client CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.client,gauge,"Minimum expiration for client certificates, labeled by SQL user. 0 means no certificate or error." +cockroachdb,security.certificate.expiration.client_ca,gauge,Expiration for the client CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.client_tenant,gauge,Expiration for the Tenant Client certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.node,gauge,Expiration for the node certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.node_client,gauge,Expiration for the node’s client certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ui,gauge,Expiration for the UI certificate. 0 means no certificate or error. +cockroachdb,security.certificate.expiration.ui_ca,gauge,Expiration for the UI CA certificate. 0 means no certificate or error. +cockroachdb,security.certificate_expiration.ca,gauge,Expiration for the CA certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.client_ca,gauge,Expiration for the client CA certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.node,gauge,Expiration for the node certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.node_client,gauge,Expiration for the node’s client certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.ui,gauge,Expiration for the UI certificate. 0 means no certificate or error +cockroachdb,security.certificate_expiration.ui_ca,gauge,Expiration for the UI CA certificate. 0 means no certificate or error +cockroachdb,spanconfig.kvsubscriber.oldest_protected_record_nanos,gauge,"Difference between the current time and the oldest protected timestamp (sudden drops indicate a record being released; an ever increasing number indicates that the oldest record is around and preventing GC if > configured GC TTL) +Shown as nanosecond" +cockroachdb,spanconfig.kvsubscriber.protected_record_count,gauge,"Number of protected timestamp records, as seen by KV" +cockroachdb,spanconfig.kvsubscriber.update_behind_nanos,gauge,"Difference between the current time and when the KVSubscriber received its last update (an ever increasing number indicates that we’re no longer receiving updates) +Shown as nanosecond" +cockroachdb,sql.bytesin,count,"[OpenMetrics v1] Number of sql bytes received +Shown as byte" +cockroachdb,sql.bytesin.count,count,"[OpenMetrics v2] Number of sql bytes received +Shown as byte" +cockroachdb,sql.bytesout,count,"[OpenMetrics v1] Number of sql bytes sent +Shown as byte" +cockroachdb,sql.bytesout.count,count,"[OpenMetrics v2] Number of sql bytes sent +Shown as byte" +cockroachdb,sql.conn.failures.count,count,Number of SQL connection failures +cockroachdb,sql.conn.latency,gauge,"[OpenMetrics v1] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.bucket,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.count,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conn.latency.sum,count,"[OpenMetrics v2] Latency to establish and authenticate a SQL connection +Shown as nanosecond" +cockroachdb,sql.conns,gauge,"[OpenMetrics v1 & v2] Number of active sql connections +Shown as connection" +cockroachdb,sql.conns_waiting_to_hash,gauge,Number of SQL connection attempts that are being throttled in order to limit password hashing concurrency +cockroachdb,sql.contention.resolver.failed_resolutions.count,count,Number of failed transaction ID resolution attempts +cockroachdb,sql.contention.resolver.queue_size,gauge,Length of queued unresolved contention events +cockroachdb,sql.contention.resolver.retries.count,count,Number of times transaction id resolution has been retried +cockroachdb,sql.contention.txn_id_cache.miss.count,count,Number of cache misses +cockroachdb,sql.contention.txn_id_cache.read.count,count,Number of cache read +cockroachdb,sql.copy.count,count,Number of COPY SQL statements successfully executed +cockroachdb,sql.copy.internal.count,count,Number of COPY SQL statements successfully executed (internal queries) +cockroachdb,sql.copy.nonatomic.count,count,Number of non-atomic COPY SQL statements successfully executed +cockroachdb,sql.copy.nonatomic.internal.count,count,Number of non-atomic COPY SQL statements successfully executed (internal queries) +cockroachdb,sql.copy.nonatomic.started.count,count,Number of non-atomic COPY SQL statements started +cockroachdb,sql.copy.nonatomic.started.internal.count,count,Number of non-atomic COPY SQL statements started (internal queries) +cockroachdb,sql.copy.started.count,count,Number of COPY SQL statements started +cockroachdb,sql.copy.started.internal.count,count,Number of COPY SQL statements started (internal queries) +cockroachdb,sql.ddl.count,count,[OpenMetrics v1 & v2] Number of SQL DDL statements +cockroachdb,sql.ddl.internal.count,count,Number of SQL DDL statements successfully executed (internal queries) +cockroachdb,sql.ddl.started.count,count,Number of SQL DDL statements started +cockroachdb,sql.ddl.started.internal.count,count,Number of SQL DDL statements started (internal queries) +cockroachdb,sql.delete.count,count,[OpenMetrics v1 & v2] Number of SQL DELETE statements +cockroachdb,sql.delete.internal.count,count,Number of SQL DELETE statements successfully executed (internal queries) +cockroachdb,sql.delete.started.count,count,Number of SQL DELETE statements started +cockroachdb,sql.delete.started.internal.count,count,Number of SQL DELETE statements started (internal queries) +cockroachdb,sql.disk.distsql.current,gauge,"Current sql statement disk usage for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.bucket,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.count,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.max.sum,count,"Disk usage per sql statement for distsql +Shown as byte" +cockroachdb,sql.disk.distsql.spilled.bytes.read.count,count,"Number of bytes read from temporary disk storage as a result of spilling +Shown as byte" +cockroachdb,sql.disk.distsql.spilled.bytes.written.count,count,"Number of bytes written to temporary disk storage as a result of spilling +Shown as byte" +cockroachdb,sql.distsql.contended.queries,count,[OpenMetrics v1] Number of SQL queries that experienced contention +cockroachdb,sql.distsql.contended.queries.count,count,[OpenMetrics v2] Number of SQL queries that experienced contention0 +cockroachdb,sql.distsql.contended_queries.count,count,Number of SQL queries that experienced contention +cockroachdb,sql.distsql.dist_query_rerun_locally.count,count,Total number of cases when distributed query error resulted in a local rerun +cockroachdb,sql.distsql.dist_query_rerun_locally.failure_count.count,count,Total number of cases when the local rerun of a distributed query resulted in an error +cockroachdb,sql.distsql.exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.bucket,count,"Latency of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.bucket,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.count,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.internal.sum,count,"Latency of DistSQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL statement execution +Shown as nanosecond" +cockroachdb,sql.distsql.flows.active,gauge,[OpenMetrics v1 & v2] Number of distributed SQL flows currently active +cockroachdb,sql.distsql.flows.count,count,[OpenMetrics v2] Number of distributed SQL flows executed +cockroachdb,sql.distsql.flows.total,count,[OpenMetrics v1] Number of distributed SQL flows executed +cockroachdb,sql.distsql.flows.total.count,count,Number of distributed SQL flows executed +cockroachdb,sql.distsql.queries.active,gauge,[OpenMetrics v1 & v2] Number of distributed SQL queries currently active +cockroachdb,sql.distsql.queries.count,count,[OpenMetrics v2] Number of distributed SQL queries executed +cockroachdb,sql.distsql.queries.spilled.count,count,Number of queries that have spilled to disk +cockroachdb,sql.distsql.queries.total,count,[OpenMetrics v1] Number of distributed SQL queries executed +cockroachdb,sql.distsql.select.count,count,[OpenMetrics v1 & v2] Number of DistSQL SELECT statements +cockroachdb,sql.distsql.select.internal.count,count,Number of DistSQL SELECT statements (internal queries) +cockroachdb,sql.distsql.service.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.bucket,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.count,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.internal.sum,count,"Latency of DistSQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.distsql.service.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of DistSQL request execution +Shown as nanosecond" +cockroachdb,sql.distsql.vec.openfds,gauge,Current number of open file descriptors used by vectorized external storage +cockroachdb,sql.exec.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.bucket,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.count,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.internal.sum,count,"Latency of SQL statement execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.exec.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of SQL statement execution +Shown as nanosecond" +cockroachdb,sql.failure,count,[OpenMetrics v1] Number of statements resulting in a planning or runtime error +cockroachdb,sql.failure.count,count,[OpenMetrics v2] Number of statements resulting in a planning or runtime error +cockroachdb,sql.failure.internal.count,count,Number of statements resulting in a planning or runtime error (internal queries) +cockroachdb,sql.feature_flag_denial.count,count,Counter of the number of statements denied by a feature flag +cockroachdb,sql.full.scan,count,[OpenMetrics v1] Number of full table or index scans +cockroachdb,sql.full.scan.count,count,[OpenMetrics v2] Number of full table or index scans +cockroachdb,sql.full.scan.internal.count,count,Number of full table or index scans (internal queries) +cockroachdb,sql.guardrails.full_scan_rejected.count,count,Number of full table or index scans that have been rejected because of disallow_full_table_scans guardrail +cockroachdb,sql.guardrails.full_scan_rejected.internal.count,count,Number of full table or index scans that have been rejected because of disallow_full_table_scans guardrail (internal queries) +cockroachdb,sql.guardrails.max_row_size_err.count,count,Number of rows observed violating sql.guardrails.max_row_size_err +cockroachdb,sql.guardrails.max_row_size_err.internal.count,count,Number of rows observed violating sql.guardrails.max_row_size_err (internal queries) +cockroachdb,sql.guardrails.max_row_size_log.count,count,Number of rows observed violating sql.guardrails.max_row_size_log +cockroachdb,sql.guardrails.max_row_size_log.internal.count,count,Number of rows observed violating sql.guardrails.max_row_size_log (internal queries) +cockroachdb,sql.guardrails.transaction_rows_read_err.count,count,Number of transactions errored because of transaction_rows_read_err guardrail +cockroachdb,sql.guardrails.transaction_rows_read_err.internal.count,count,Number of transactions errored because of transaction_rows_read_err guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_read_log.count,count,Number of transactions logged because of transaction_rows_read_log guardrail +cockroachdb,sql.guardrails.transaction_rows_read_log.internal.count,count,Number of transactions logged because of transaction_rows_read_log guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_written_err.count,count,Number of transactions errored because of transaction_rows_written_err guardrail +cockroachdb,sql.guardrails.transaction_rows_written_err.internal.count,count,Number of transactions errored because of transaction_rows_written_err guardrail (internal queries) +cockroachdb,sql.guardrails.transaction_rows_written_log.count,count,Number of transactions logged because of transaction_rows_written_log guardrail +cockroachdb,sql.guardrails.transaction_rows_written_log.internal.count,count,Number of transactions logged because of transaction_rows_written_log guardrail (internal queries) +cockroachdb,sql.hydrated_schema_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_schema_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_table_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_table_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_type_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_type_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.hydrated_udf_cache.hits.count,count,counter on the number of cache hits +cockroachdb,sql.hydrated_udf_cache.misses.count,count,counter on the number of cache misses +cockroachdb,sql.insert.count,count,[OpenMetrics v1 & v2] Number of SQL INSERT statements +cockroachdb,sql.insert.internal.count,count,Number of SQL INSERT statements successfully executed (internal queries) +cockroachdb,sql.insert.started.count,count,Number of SQL INSERT statements started +cockroachdb,sql.insert.started.internal.count,count,Number of SQL INSERT statements started (internal queries) +cockroachdb,sql.insights.anomaly_detection.evictions.count,count,Evictions of fingerprint latency summaries due to memory pressure +cockroachdb,sql.insights.anomaly_detection.fingerprints,gauge,Current number of statement fingerprints being monitored for anomaly detection +cockroachdb,sql.insights.anomaly_detection.memory,gauge,"Current memory used to support anomaly detection +Shown as byte" +cockroachdb,sql.leases.active,gauge,The number of outstanding SQL schema leases. +cockroachdb,sql.mem.admin.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for admin +cockroachdb,sql.mem.admin.max,gauge,[OpenMetrics v1] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.count,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for admin +cockroachdb,sql.mem.admin.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for admin +cockroachdb,sql.mem.admin.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for admin +cockroachdb,sql.mem.admin.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for admin +cockroachdb,sql.mem.admin.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.admin.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for admin +cockroachdb,sql.mem.bulk.current,gauge,"Current sql statement memory usage for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.bucket,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.count,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.bulk.max.sum,count,"Memory usage per sql statement for bulk operations +Shown as byte" +cockroachdb,sql.mem.client.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for client +cockroachdb,sql.mem.client.max,gauge,[OpenMetrics v1] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.count,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for client +cockroachdb,sql.mem.client.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for client +cockroachdb,sql.mem.client.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for client +cockroachdb,sql.mem.client.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for client +cockroachdb,sql.mem.client.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.client.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for client +cockroachdb,sql.mem.conns.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for conns +cockroachdb,sql.mem.conns.max,gauge,[OpenMetrics v1] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.count,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for conns +cockroachdb,sql.mem.conns.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for conns +cockroachdb,sql.mem.conns.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for conns +cockroachdb,sql.mem.conns.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for conns +cockroachdb,sql.mem.conns.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.conns.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for conns +cockroachdb,sql.mem.distsql.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for distsql +cockroachdb,sql.mem.distsql.max,gauge,[OpenMetrics v1] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.count,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.distsql.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for distsql +cockroachdb,sql.mem.internal.current,gauge,[OpenMetrics v1 & v2] Current sql statement memory usage for internal +cockroachdb,sql.mem.internal.max,gauge,[OpenMetrics v1] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.bucket,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.count,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.max.sum,count,[OpenMetrics v2] Memory usage per sql statement for internal +cockroachdb,sql.mem.internal.session.current,gauge,[OpenMetrics v1 & v2] Current sql session memory usage for internal +cockroachdb,sql.mem.internal.session.max,gauge,[OpenMetrics v1] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.bucket,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.count,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.max.sum,count,[OpenMetrics v2] Memory usage per sql session for internal +cockroachdb,sql.mem.internal.session.prepared.current,gauge,"Current sql session memory usage by prepared statements for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.bucket,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.count,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.session.prepared.max.sum,count,"Memory usage by prepared statements per sql session for internal +Shown as byte" +cockroachdb,sql.mem.internal.txn.current,gauge,[OpenMetrics v1 & v2] Current sql transaction memory usage for internal +cockroachdb,sql.mem.internal.txn.max,gauge,[OpenMetrics v1] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.bucket,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.count,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.internal.txn.max.sum,count,[OpenMetrics v2] Memory usage per sql transaction for internal +cockroachdb,sql.mem.root.current,gauge,Current sql statement memory usage for root +cockroachdb,sql.mem.root.max.bucket,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.root.max.count,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.root.max.sum,count,"Memory usage per sql statement for root +Shown as byte" +cockroachdb,sql.mem.sql.current,gauge,"Current sql statement memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.max,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.bucket,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.count,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.max.sum,count,"Memory usage per sql statement for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.current,gauge,"Current sql session memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.bucket,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.count,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.max.sum,count,"Memory usage per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.current,gauge,"Current sql session memory usage by prepared statements for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.bucket,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.count,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.session.prepared.max.sum,count,"Memory usage by prepared statements per sql session for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.current,gauge,"Current sql transaction memory usage for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.bucket,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.count,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.mem.sql.txn.max.sum,count,"Memory usage per sql transaction for sql +Shown as byte" +cockroachdb,sql.misc.count,count,[OpenMetrics v1 & v2] Number of other SQL statements +cockroachdb,sql.misc.internal.count,count,Number of other SQL statements successfully executed (internal queries) +cockroachdb,sql.misc.started.count,count,Number of other SQL statements started +cockroachdb,sql.misc.started.internal.count,count,Number of other SQL statements started (internal queries) +cockroachdb,sql.new_conns.count,count,Counter of the number of sql connections created +cockroachdb,sql.optimizer.fallback.count,count,Number of statements which the cost-based optimizer was unable to plan +cockroachdb,sql.optimizer.fallback.internal.count,count,Number of statements which the cost-based optimizer was unable to plan (internal queries) +cockroachdb,sql.optimizer.plan_cache.hits.count,count,Number of non-prepared statements for which a cached plan was used +cockroachdb,sql.optimizer.plan_cache.hits.internal.count,count,Number of non-prepared statements for which a cached plan was used (internal queries) +cockroachdb,sql.optimizer.plan_cache.misses.count,count,Number of non-prepared statements for which a cached plan was not used +cockroachdb,sql.optimizer.plan_cache.misses.internal.count,count,Number of non-prepared statements for which a cached plan was not used (internal queries) +cockroachdb,sql.pgwire_cancel.count,count,Number of pgwire query cancel requests +cockroachdb,sql.pgwire_cancel.ignored.count,count,Number of pgwire query cancel requests that were ignored due to rate limiting +cockroachdb,sql.pgwire_cancel.successful.count,count,Number of pgwire query cancel requests that were successful +cockroachdb,sql.pre_serve.bytesin.count,count,"Number of SQL bytes received prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.bytesout.count,count,"Number of SQL bytes sent prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.conn.failures.count,count,Number of SQL connection failures prior to routing the connection to the target SQL server +cockroachdb,sql.pre_serve.mem.cur,gauge,"Current memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.bucket,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.count,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.mem.max.sum,count,"Memory usage for SQL connections prior to routing the connection to the target SQL server +Shown as byte" +cockroachdb,sql.pre_serve.new_conns.count,count,Number of SQL connections created prior to routing the connection to the target SQL server +cockroachdb,sql.query.count,count,[OpenMetrics v1 & v2] Number of SQL queries +cockroachdb,sql.query.internal.count,count,Number of SQL queries executed (internal queries) +cockroachdb,sql.query.started.count,count,Number of SQL queries started +cockroachdb,sql.query.started.internal.count,count,Number of SQL queries started (internal queries) +cockroachdb,sql.restart_savepoint.count,count,Number of SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.internal.count,count,Number of SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.release.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.release.internal.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.release.started.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.release.started.internal.count,count,Number of RELEASE SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.restart_savepoint.rollback.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements successfully executed +cockroachdb,sql.restart_savepoint.rollback.internal.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements successfully executed (internal queries) +cockroachdb,sql.restart_savepoint.rollback.started.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.rollback.started.internal.count,count,Number of ROLLBACK TO SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.restart_savepoint.started.count,count,Number of SAVEPOINT cockroach_restart statements started +cockroachdb,sql.restart_savepoint.started.internal.count,count,Number of SAVEPOINT cockroach_restart statements started (internal queries) +cockroachdb,sql.savepoint.count,count,Number of SQL SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.internal.count,count,Number of SQL SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.release.count,count,Number of RELEASE SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.release.internal.count,count,Number of RELEASE SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.release.started.count,count,Number of RELEASE SAVEPOINT statements started +cockroachdb,sql.savepoint.release.started.internal.count,count,Number of RELEASE SAVEPOINT statements started (internal queries) +cockroachdb,sql.savepoint.rollback.count,count,Number of ROLLBACK TO SAVEPOINT statements successfully executed +cockroachdb,sql.savepoint.rollback.internal.count,count,Number of ROLLBACK TO SAVEPOINT statements successfully executed (internal queries) +cockroachdb,sql.savepoint.rollback.started.count,count,Number of ROLLBACK TO SAVEPOINT statements started +cockroachdb,sql.savepoint.rollback.started.internal.count,count,Number of ROLLBACK TO SAVEPOINT statements started (internal queries) +cockroachdb,sql.savepoint.started.count,count,Number of SQL SAVEPOINT statements started +cockroachdb,sql.savepoint.started.internal.count,count,Number of SQL SAVEPOINT statements started (internal queries) +cockroachdb,sql.schema.invalid_objects,gauge,Gauge of detected invalid objects within the system.descriptor table (measured by querying crdb_internal.invalid_objects) +cockroachdb,sql.schema_changer.permanent_errors.count,count,Counter of the number of permanent errors experienced by the schema changer +cockroachdb,sql.schema_changer.retry_errors.count,count,Counter of the number of retriable errors experienced by the schema changer +cockroachdb,sql.schema_changer.running,gauge,Gauge of currently running schema changes +cockroachdb,sql.schema_changer.successes.count,count,Counter of the number of schema changer resumes which succeed +cockroachdb,sql.select.count,count,[OpenMetrics v1 & v2] Number of SQL SELECT statements +cockroachdb,sql.select.internal.count,count,Number of SQL SELECT statements successfully executed (internal queries) +cockroachdb,sql.select.started.count,count,Number of SQL SELECT statements started +cockroachdb,sql.select.started.internal.count,count,Number of SQL SELECT statements started (internal queries) +cockroachdb,sql.service.latency,gauge,"[OpenMetrics v1] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.bucket,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.count,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.service.latency.internal,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.bucket,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.count,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.internal.sum,count,"Latency of SQL request execution (internal queries) +Shown as nanosecond" +cockroachdb,sql.service.latency.sum,count,"[OpenMetrics v2] Latency in nanoseconds of SQL request execution +Shown as nanosecond" +cockroachdb,sql.statements.active,gauge,[OpenMetrics v1 & v2] Number of currently active user SQL statements +cockroachdb,sql.statements.active.internal,gauge,Number of currently active user SQL statements (internal queries) +cockroachdb,sql.stats.cleanup.rows_removed.count,count,Number of stale statistics rows that are removed +cockroachdb,sql.stats.discarded.current.count,count,Number of fingerprint statistics being discarded +cockroachdb,sql.stats.flush.count,count,Number of times SQL Stats are flushed to persistent storage +cockroachdb,sql.stats.flush.duration,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.bucket,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.count,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.duration.sum,count,"Time took to in nanoseconds to complete SQL Stats flush +Shown as nanosecond" +cockroachdb,sql.stats.flush.error.count,count,Number of errors encountered when flushing SQL Stats +cockroachdb,sql.stats.mem.current,gauge,"Current memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.bucket,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.count,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.mem.max.sum,count,"Memory usage for fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.current,gauge,"Current memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.bucket,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.count,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.reported.mem.max.sum,count,"Memory usage for reported fingerprint storage +Shown as byte" +cockroachdb,sql.stats.txn_stats_collection.duration,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.bucket,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.count,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.stats.txn_stats_collection.duration.sum,count,"Time took in nanoseconds to collect transaction stats +Shown as nanosecond" +cockroachdb,sql.temp_object_cleaner.active_cleaners,gauge,number of cleaner tasks currently running on this node +cockroachdb,sql.temp_object_cleaner.schemas_deletion_error.count,count,number of errored schema deletions by the temp object cleaner on this node +cockroachdb,sql.temp_object_cleaner.schemas_deletion_success.count,count,number of successful schema deletions by the temp object cleaner on this node +cockroachdb,sql.temp_object_cleaner.schemas_to_delete.count,count,number of schemas to be deleted by the temp object cleaner on this node +cockroachdb,sql.txn.abort.count,count,[OpenMetrics v1 & v2] Number of SQL transaction ABORT statements +cockroachdb,sql.txn.abort.internal.count,count,Number of SQL transaction abort errors (internal queries) +cockroachdb,sql.txn.begin.count,count,[OpenMetrics v1 & v2] Number of SQL transaction BEGIN statements +cockroachdb,sql.txn.begin.internal.count,count,Number of SQL transaction BEGIN statements successfully executed (internal queries) +cockroachdb,sql.txn.begin.started.count,count,Number of SQL transaction BEGIN statements started +cockroachdb,sql.txn.begin.started.internal.count,count,Number of SQL transaction BEGIN statements started (internal queries) +cockroachdb,sql.txn.commit.count,count,[OpenMetrics v1 & v2] Number of SQL transaction COMMIT statements +cockroachdb,sql.txn.commit.internal.count,count,Number of SQL transaction COMMIT statements successfully executed (internal queries) +cockroachdb,sql.txn.commit.started.count,count,Number of SQL transaction COMMIT statements started +cockroachdb,sql.txn.commit.started.internal.count,count,Number of SQL transaction COMMIT statements started (internal queries) +cockroachdb,sql.txn.contended.count,count,Number of SQL transactions experienced contention +cockroachdb,sql.txn.contended.internal.count,count,Number of SQL transactions experienced contention (internal queries) +cockroachdb,sql.txn.latency,gauge,"[OpenMetrics v1] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.bucket,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.count,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.latency.internal.bucket,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.internal.count,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.internal.sum,count,"Latency of SQL transactions (internal queries) +Shown as nanosecond" +cockroachdb,sql.txn.latency.sum,count,"[OpenMetrics v2] Latency of SQL transactions +Shown as transaction" +cockroachdb,sql.txn.rollback.count,count,[OpenMetrics v1 & v2] Number of SQL transaction ROLLBACK statements +cockroachdb,sql.txn.rollback.internal.count,count,Number of SQL transaction ROLLBACK statements successfully executed (internal queries) +cockroachdb,sql.txn.rollback.started.count,count,Number of SQL transaction ROLLBACK statements started +cockroachdb,sql.txn.rollback.started.internal.count,count,Number of SQL transaction ROLLBACK statements started (internal queries) +cockroachdb,sql.txns.open,gauge,"[OpenMetrics v1 & v2] Number of currently open user SQL transactions +Shown as transaction" +cockroachdb,sql.txns.open.internal,gauge,Number of currently open user SQL transactions (internal queries) +cockroachdb,sql.update.count,count,[OpenMetrics v1 & v2] Number of SQL UPDATE statements +cockroachdb,sql.update.internal.count,count,Number of SQL UPDATE statements successfully executed (internal queries) +cockroachdb,sql.update.started.count,count,Number of SQL UPDATE statements started +cockroachdb,sql.update.started.internal.count,count,Number of SQL UPDATE statements started (internal queries) +cockroachdb,sqlliveness.is_alive.cache_hits.count,count,Number of calls to IsAlive that return from the cache +cockroachdb,sqlliveness.is_alive.cache_misses.count,count,Number of calls to IsAlive that do not return from the cache +cockroachdb,sqlliveness.sessions_deleted.count,count,Number of expired sessions which have been deleted +cockroachdb,sqlliveness.sessions_deletion_runs.count,count,Number of calls to delete sessions which have been performed +cockroachdb,sqlliveness.write_failures.count,count,Number of update or insert calls which have failed +cockroachdb,sqlliveness.write_successes.count,count,Number of update or insert calls successfully performed +cockroachdb,storage.batch_commit,gauge,Count of batch commits. See storage.AggregatedBatchCommitStats for details. +cockroachdb,storage.batch_commit.commit_wait.duration,gauge,"Cumulative time spent waiting for WAL sync, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.duration,gauge,"Cumulative time spent in batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.l0_stall.duration,gauge,"Cumulative time spent in a write stall due to high read amplification in L0, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.mem_stall.duration,gauge,"Cumulative time spent in a write stall due to too many memtables, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.sem_wait.duration,gauge,"Cumulative time spent in semaphore wait, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.wal_queue_wait.duration,gauge,"Cumulative time spent waiting for memory blocks in the WAL queue, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.batch_commit.wal_rotation.duration,gauge,"Cumulative time spent waiting for WAL rotation, for batch commit. See storage.AggregatedBatchCommitStats for details. +Shown as nanosecond" +cockroachdb,storage.checkpoints,gauge,"The number of checkpoint directories found in storage.This is the number of directories found in the auxiliary/checkpoints directory.Each represents an immutable point-in-time storage engine checkpoint. They arecheap (consisting mostly of hard links), but over time they effectively become afull copy of the old state, which increases their relative cost." +cockroachdb,storage.compactions.duration,gauge,"Cumulative sum of all compaction durations.The rate of this value provides the effective compaction concurrency of a store,which can be useful to determine whether the maximum compaction concurrency isfully utilized. +Shown as nanosecond" +cockroachdb,storage.compactions.keys.pinned,gauge,Cumulative count of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots.Various subsystems of CockroachDB take LSM snapshots to maintain a consistent viewof the database over an extended duration. +cockroachdb,storage.compactions.keys.pinned.bytes,gauge,"Cumulative size of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots.Various subsystems of CockroachDB take LSM snapshots to maintain a consistent viewof the database over an extended duration. +Shown as byte" +cockroachdb,storage.disk_slow,gauge,Number of instances of disk operations taking longer than 10s +cockroachdb,storage.disk_stalled,gauge,Number of instances of disk operations taking longer than 20s +cockroachdb,storage.flush.ingest,gauge,Flushes performing an ingest (flushable ingestions) +cockroachdb,storage.flush.ingest.table,gauge,Tables ingested via flushes (flushable ingestions) +cockroachdb,storage.flush.ingest.table.bytes,gauge,"Bytes ingested via flushes (flushable ingestions) +Shown as byte" +cockroachdb,storage.flush.utilization,gauge,"The percentage of time the storage engine is actively flushing memtables to disk. +Shown as percent" +cockroachdb,storage.ingest,gauge,Number of successful ingestions performed +cockroachdb,storage.iterator.block_load.bytes,gauge,"Bytes loaded by storage engine iterators (possibly cached). See storage.AggregatedIteratorStats for details. +Shown as byte" +cockroachdb,storage.iterator.block_load.cached_bytes,gauge,"Bytes loaded by storage engine iterators from the block cache. See storage.AggregatedIteratorStats for details. +Shown as byte" +cockroachdb,storage.iterator.block_load.read_duration,gauge,"Cumulative time storage engine iterators spent loading blocks from durable storage. See storage.AggregatedIteratorStats for details. +Shown as nanosecond" +cockroachdb,storage.iterator.external.seeks,gauge,Cumulative count of seeks performed on storage engine iterators. See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.external.steps,gauge,Cumulative count of steps performed on storage engine iterators. See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.internal.seeks,gauge,Cumulative count of seeks performed internally within storage engine iterators.A value high relative to ‘storage.iterator.external.seeks’is a good indication that there’s an accumulation of garbageinternally within the storage engine.See storage.AggregatedIteratorStats for details. +cockroachdb,storage.iterator.internal.steps,gauge,Cumulative count of steps performed internally within storage engine iterators.A value high relative to ‘storage.iterator.external.steps’is a good indication that there’s an accumulation of garbageinternally within the storage engine.See storage.AggregatedIteratorStats for more details. +cockroachdb,storage.keys.range_key_set,gauge,Approximate count of RangeKeySet internal keys across the storage engine. +cockroachdb,storage.keys.tombstone,gauge,"Approximate count of DEL, SINGLEDEL and RANGEDEL internal keys across the storage engine." +cockroachdb,storage.l0_bytes_flushed,gauge,"Number of bytes flushed (from memtables) into Level 0 +Shown as byte" +cockroachdb,storage.l0_bytes_ingested,gauge,"Number of bytes ingested directly into Level 0 +Shown as byte" +cockroachdb,storage.l0_level_score,gauge,Compaction score of level 0 +cockroachdb,storage.l0_level_size,gauge,"Size of the SSTables in level 0 +Shown as byte" +cockroachdb,storage.l0_num_files,gauge,Number of SSTables in Level 0 +cockroachdb,storage.l0_sublevels,gauge,Number of Level 0 sublevels +cockroachdb,storage.l1_bytes_ingested,gauge,"Number of bytes ingested directly into Level 1 +Shown as byte" +cockroachdb,storage.l1_level_score,gauge,Compaction score of level 1 +cockroachdb,storage.l1_level_size,gauge,"Size of the SSTables in level 1 +Shown as byte" +cockroachdb,storage.l2_bytes_ingested,gauge,"Number of bytes ingested directly into Level 2 +Shown as byte" +cockroachdb,storage.l2_level_score,gauge,Compaction score of level 2 +cockroachdb,storage.l2_level_size,gauge,"Size of the SSTables in level 2 +Shown as byte" +cockroachdb,storage.l3_bytes_ingested,gauge,"Number of bytes ingested directly into Level 3 +Shown as byte" +cockroachdb,storage.l3_level_score,gauge,Compaction score of level 3 +cockroachdb,storage.l3_level_size,gauge,"Size of the SSTables in level 3 +Shown as byte" +cockroachdb,storage.l4_bytes_ingested,gauge,"Number of bytes ingested directly into Level 4 +Shown as byte" +cockroachdb,storage.l4_level_score,gauge,Compaction score of level 4 +cockroachdb,storage.l4_level_size,gauge,"Size of the SSTables in level 4 +Shown as byte" +cockroachdb,storage.l5_bytes_ingested,gauge,"Number of bytes ingested directly into Level 5 +Shown as byte" +cockroachdb,storage.l5_level_score,gauge,Compaction score of level 5 +cockroachdb,storage.l5_level_size,gauge,"Size of the SSTables in level 5 +Shown as byte" +cockroachdb,storage.l6_bytes_ingested,gauge,"Number of bytes ingested directly into Level 6 +Shown as byte" +cockroachdb,storage.l6_level_score,gauge,Compaction score of level 6 +cockroachdb,storage.l6_level_size,gauge,"Size of the SSTables in level 6 +Shown as byte" +cockroachdb,storage.marked_for_compaction_files,gauge,Count of SSTables marked for compaction +cockroachdb,storage.queue.store_failures.count,count,Number of replicas which failed processing in replica queues due to retryable store errors +cockroachdb,storage.secondary_cache,gauge,The count of cache blocks in the secondary cache (not sstable blocks) +cockroachdb,storage.secondary_cache.evictions,gauge,The number of times a cache block was evicted from the secondary cache +cockroachdb,storage.secondary_cache.reads_full_hit,gauge,The number of reads where all data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_multi_block,gauge,The number of secondary cache reads that require reading data from 2+ cache blocks +cockroachdb,storage.secondary_cache.reads_multi_shard,gauge,The number of secondary cache reads that require reading data from 2+ shards +cockroachdb,storage.secondary_cache.reads_no_hit,gauge,The number of reads where no data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_partial_hit,gauge,The number of reads where some data returned was read from the secondary cache +cockroachdb,storage.secondary_cache.reads_total,gauge,The number of reads from the secondary cache +cockroachdb,storage.secondary_cache.size,gauge,"The number of sstable bytes stored in the secondary cache +Shown as byte" +cockroachdb,storage.secondary_cache.write_back_failures,gauge,The number of times writing a cache block to the secondary cache failed +cockroachdb,storage.shared_storage.read,gauge,"Bytes read from shared storage +Shown as byte" +cockroachdb,storage.shared_storage.write,gauge,"Bytes written to external storage +Shown as byte" +cockroachdb,storage.single_delete.ineffectual,gauge,Number of SingleDeletes that were ineffectual +cockroachdb,storage.single_delete.invariant_violation,gauge,Number of SingleDelete invariant violations +cockroachdb,storage.wal.bytes_in,gauge,The number of logical bytes the storage engine has written to the WAL +cockroachdb,storage.wal.bytes_written,gauge,The number of bytes the storage engine has written to the WAL +cockroachdb,storage.wal.fsync.latency.bucket,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.wal.fsync.latency.count,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.wal.fsync.latency.sum,count,"The write ahead log fsync latency +Shown as nanosecond" +cockroachdb,storage.write.stalls,gauge,Number of instances of intentional write stalls to backpressure incoming writes +cockroachdb,storage.write_stall_nanos,gauge,"Total write stall duration in nanos +Shown as nanosecond" +cockroachdb,storage.write_stalls,gauge,Number of instances of intentional write stalls to backpressure incoming writes +cockroachdb,sys.cgo.allocbytes,gauge,"[OpenMetrics v1 & v2] Current bytes of memory allocated by cgo +Shown as byte" +cockroachdb,sys.cgo.totalbytes,gauge,"[OpenMetrics v1 & v2] Total bytes of memory allocated by cgo, but not released +Shown as byte" +cockroachdb,sys.cgocalls,gauge,[OpenMetrics v1 & v2] Total number of cgo calls +cockroachdb,sys.cpu.combined.percent.normalized,gauge,"[OpenMetrics v1 & v2] Current user+system cpu percentage, normalized 0-1 by number of cores +Shown as fraction" +cockroachdb,sys.cpu.host.combined.percent_normalized,gauge,"Current user+system cpu percentage across the whole machine, normalized 0-1 by number of cores +Shown as percent" +cockroachdb,sys.cpu.now.ns,gauge,"The time when CPU measurements were taken, as nanoseconds since epoch +Shown as nanosecond" +cockroachdb,sys.cpu.sys.ns,gauge,"[OpenMetrics v1 & v2] Total system cpu time in nanoseconds +Shown as nanosecond" +cockroachdb,sys.cpu.sys.percent,gauge,"[OpenMetrics v1 & v2] Current system cpu percentage +Shown as core" +cockroachdb,sys.cpu.user.ns,gauge,"[OpenMetrics v1 & v2] Total user cpu time in nanoseconds +Shown as nanosecond" +cockroachdb,sys.cpu.user.percent,gauge,"[OpenMetrics v1 & v2] Current user cpu percentage +Shown as core" +cockroachdb,sys.fd.open,gauge,[OpenMetrics v1 & v2] Process open file descriptors +cockroachdb,sys.fd.softlimit,gauge,[OpenMetrics v1 & v2] Process open FD soft limit +cockroachdb,sys.gc,gauge,[OpenMetrics v2] Total number of GC runs +cockroachdb,sys.gc.count,gauge,[OpenMetrics v1] Total number of GC runs +cockroachdb,sys.gc.pause.ns,gauge,"[OpenMetrics v1 & v2] Total GC pause in nanoseconds +Shown as nanosecond" +cockroachdb,sys.gc.pause.percent,gauge,"[OpenMetrics v1 & v2] Current GC pause percentage +Shown as fraction" +cockroachdb,sys.go.allocbytes,gauge,"[OpenMetrics v1 & v2] Current bytes of memory allocated by go +Shown as byte" +cockroachdb,sys.go.totalbytes,gauge,"[OpenMetrics v1 & v2] Total bytes of memory allocated by go, but not released +Shown as byte" +cockroachdb,sys.goroutines,gauge,[OpenMetrics v1 & v2] Current number of goroutines +cockroachdb,sys.host.disk.io.time,gauge,"Time spent reading from or writing to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.iopsinprogress,gauge,IO operations currently in progress on this host +cockroachdb,sys.host.disk.read,gauge,Disk read operations across all disks since this process started +cockroachdb,sys.host.disk.read.bytes,gauge,"[OpenMetrics v1 & v2] Bytes read from all disks since this process started +Shown as byte" +cockroachdb,sys.host.disk.read.count,gauge,Disk read operations across all disks since this process started +cockroachdb,sys.host.disk.read.time,gauge,"Time spent reading from all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.weightedio.time,gauge,"Weighted time spent reading from or writing to to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.disk.write,gauge,Disk write operations across all disks since this process started +cockroachdb,sys.host.disk.write.bytes,gauge,"[OpenMetrics v1 & v2] Bytes written to all disks since this process started +Shown as byte" +cockroachdb,sys.host.disk.write.count,gauge,Disk write operations across all disks since this process started +cockroachdb,sys.host.disk.write.time,gauge,"Time spent writing to all disks since this process started +Shown as nanosecond" +cockroachdb,sys.host.net.recv.bytes,gauge,"[OpenMetrics v1 & v2] Bytes received on all network interfaces since this process started +Shown as byte" +cockroachdb,sys.host.net.recv.packets,gauge,Packets received on all network interfaces since this process started +cockroachdb,sys.host.net.send.bytes,gauge,"[OpenMetrics v1 & v2] Bytes sent on all network interfaces since this process started +Shown as byte" +cockroachdb,sys.host.net.send.packets,gauge,Packets sent on all network interfaces since this process started +cockroachdb,sys.rss,gauge,[OpenMetrics v1 & v2] Current process RSS +cockroachdb,sys.runnable.goroutines.per.cpu,gauge,"Average number of goroutines that are waiting to run, normalized by number of cores" +cockroachdb,sys.runnable.goroutines.per_cpu,gauge,"Average number of goroutines that are waiting to run, normalized by number of cores" +cockroachdb,sys.totalmem,gauge,"Total memory (both free and used) +Shown as byte" +cockroachdb,sys.uptime,gauge,"[OpenMetrics v1 & v2] Process uptime in seconds +Shown as second" +cockroachdb,sysbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes in system KV pairs +Shown as byte" +cockroachdb,syscount,gauge,[OpenMetrics v1 & v2] Count of system KV pairs +cockroachdb,tenant.consumption.cross_region_network_ru.count,count,Total number of RUs charged for cross-region network traffic +cockroachdb,tenant.consumption.external_io_egress_bytes,gauge,Total number of bytes written to external services such as cloud storage providers +cockroachdb,tenant.consumption.external_io_ingress_bytes,gauge,Total number of bytes read from external services such as cloud storage providers +cockroachdb,tenant.consumption.kv_request_units,gauge,RU consumption attributable to KV +cockroachdb,tenant.consumption.kv_request_units.count,count,RU consumption attributable to KV +cockroachdb,tenant.consumption.pgwire_egress_bytes,gauge,Total number of bytes transferred from a SQL pod to the client +cockroachdb,tenant.consumption.read_batches,gauge,Total number of KV read batches +cockroachdb,tenant.consumption.read_bytes,gauge,Total number of bytes read from KV +cockroachdb,tenant.consumption.read_requests,gauge,Total number of KV read requests +cockroachdb,tenant.consumption.request_units,gauge,Total RU consumption +cockroachdb,tenant.consumption.request_units.count,count,Total RU consumption +cockroachdb,tenant.consumption.sql_pods_cpu_seconds,gauge,"Total amount of CPU used by SQL pods +Shown as second" +cockroachdb,tenant.consumption.write_batches,gauge,Total number of KV write batches +cockroachdb,tenant.consumption.write_bytes,gauge,Total number of bytes written to KV +cockroachdb,tenant.consumption.write_requests,gauge,Total number of KV write requests +cockroachdb,timeseries.write.bytes,count,"[OpenMetrics v1] Total size in bytes of metric samples written to disk +Shown as byte" +cockroachdb,timeseries.write.bytes.count,count,"[OpenMetrics v2] Total size in bytes of metric samples written to disk +Shown as byte" +cockroachdb,timeseries.write.errors,count,"[OpenMetrics v1] Total errors encountered while attempting to write metrics to disk +Shown as error" +cockroachdb,timeseries.write.errors.count,count,"[OpenMetrics v2] Total errors encountered while attempting to write metrics to disk +Shown as error" +cockroachdb,timeseries.write.samples,count,[OpenMetrics v1] Total number of metric samples written to disk +cockroachdb,timeseries.write.samples.count,count,[OpenMetrics v2] Total number of metric samples written to disk +cockroachdb,totalbytes,gauge,"[OpenMetrics v1 & v2] Total number of bytes taken up by keys and values including non-live data +Shown as byte" +cockroachdb,tscache.skl.pages,gauge,Number of pages in the timestamp cache +cockroachdb,tscache.skl.read.pages,gauge,[OpenMetrics v1 & v2] Number of pages in the read timestamp cache +cockroachdb,tscache.skl.read.rotations,count,[OpenMetrics v1] Number of page rotations in the read timestamp cache +cockroachdb,tscache.skl.read.rotations.count,count,[OpenMetrics v2] Number of page rotations in the read timestamp cache +cockroachdb,tscache.skl.rotations.count,count,Number of page rotations in the timestamp cache +cockroachdb,tscache.skl.write.pages,gauge,[OpenMetrics v1 & v2] Number of pages in the write timestamp cache +cockroachdb,tscache.skl.write.rotations,count,[OpenMetrics v1] Number of page rotations in the write timestamp cache +cockroachdb,tscache.skl.write.rotations.count,count,[OpenMetrics v2] Number of page rotations in the write timestamp cache +cockroachdb,txn.abandons,count,[OpenMetrics v1] Number of abandoned KV transactions +cockroachdb,txn.abandons.count,count,[OpenMetrics v2] Number of abandoned KV transactions +cockroachdb,txn.aborts,count,[OpenMetrics v1] Number of aborted KV transactions +cockroachdb,txn.aborts.count,count,[OpenMetrics v2] Number of aborted KV transactions +cockroachdb,txn.autoretries,count,[OpenMetrics v1] Number of automatic retries to avoid serializable restarts +cockroachdb,txn.autoretries.count,count,[OpenMetrics v2] Number of automatic retries to avoid serializable restarts +cockroachdb,txn.commit_waits.before_commit_trigger.count,count,Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger +cockroachdb,txn.commit_waits.count,count,Number of KV transactions that had to commit-wait on commit in order to ensure linearizability. This generally happens to transactions writing to global ranges. +cockroachdb,txn.commits,count,[OpenMetrics v1] Number of committed KV transactions (including 1PC) +cockroachdb,txn.commits.count,count,[OpenMetrics v2] Number of committed KV transactions (including 1PC) +cockroachdb,txn.commits1PC,count,[OpenMetrics v1] Number of committed one-phase KV transactions +cockroachdb,txn.commits1PC.count,count,[OpenMetrics v2] Number of committed one-phase KV transactions +cockroachdb,txn.condensed_intent_spans.count,count,KV transactions that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans_gauge for a gauge of such transactions currently running. +cockroachdb,txn.condensed_intent_spans_gauge,gauge,KV transactions currently running that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans for a perpetual counter/rate. +cockroachdb,txn.condensed_intent_spans_rejected.count,count,KV transactions that have been aborted because they exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). Rejection is caused by kv.transaction.reject_over_max_intents_budget. +cockroachdb,txn.durations,gauge,[OpenMetrics v1] KV transaction durations in nanoseconds +cockroachdb,txn.durations.bucket,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.durations.count,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.durations.sum,count,[OpenMetrics v2] KV transaction durations in nanoseconds +cockroachdb,txn.parallelcommits.auto_retries.count,count,Number of commit tries after successful failed parallel commit attempts +cockroachdb,txn.parallelcommits.count,count,Number of KV transaction parallel commit attempts +cockroachdb,txn.refresh.auto_retries.count,count,Number of request retries after successful client-side refreshes +cockroachdb,txn.refresh.fail.count,count,Number of failed client-side transaction refreshes +cockroachdb,txn.refresh.fail_with_condensed_spans.count,count,"Number of failed client-side refreshes for transactions whose read tracking lost fidelity because of condensing. Such a failure could be a false conflict. Failures counted here are also counted in txn.refresh.fail, and the respective transactions are also counted in txn.refresh.memory_limit_exceeded." +cockroachdb,txn.refresh.memory_limit_exceeded.count,count,"Number of transaction which exceed the refresh span bytes limit, causing their read spans to be condensed" +cockroachdb,txn.refresh.success.count,count,"Number of successful client-side transaction refreshes. A refresh may be preemptive or reactive. A reactive refresh is performed after a request throws an error because a refresh is needed for it to succeed. In these cases, the request will be re-issued as an auto-retry (see txn.refresh.auto_retries) after the refresh succeeds." +cockroachdb,txn.refresh.success_server_side.count,count,Number of successful server-side transaction refreshes +cockroachdb,txn.restarts,gauge,[OpenMetrics v1] Number of restarted KV transactions +cockroachdb,txn.restarts.asyncwritefailure.count,count,Number of restarts due to async consensus writes that failed to leave intents +cockroachdb,txn.restarts.bucket,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.commitdeadlineexceeded.count,count,Number of restarts due to a transaction exceeding its deadline +cockroachdb,txn.restarts.count,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.deleterange,count,[OpenMetrics v1] Number of restarts due to a forwarded commit timestamp and a DeleteRange command +cockroachdb,txn.restarts.deleterange.count,count,[OpenMetrics v2] Number of restarts due to a forwarded commit timestamp and a DeleteRange command +cockroachdb,txn.restarts.possiblereplay,count,[OpenMetrics v1] Number of restarts due to possible replays of command batches at the storage layer +cockroachdb,txn.restarts.possiblereplay.count,count,[OpenMetrics v2] Number of restarts due to possible replays of command batches at the storage layer +cockroachdb,txn.restarts.readwithinuncertainty.count,count,Number of restarts due to reading a new value within the uncertainty interval +cockroachdb,txn.restarts.serializable,count,[OpenMetrics v1] Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +cockroachdb,txn.restarts.serializable.count,count,[OpenMetrics v2] Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +cockroachdb,txn.restarts.sum,count,[OpenMetrics v2] Number of restarted KV transactions +cockroachdb,txn.restarts.txnaborted.count,count,Number of restarts due to an abort by a concurrent transaction (usually due to deadlock) +cockroachdb,txn.restarts.txnpush.count,count,Number of restarts due to a transaction push failure +cockroachdb,txn.restarts.unknown.count,count,Number of restarts due to a unknown reasons +cockroachdb,txn.restarts.writetooold,count,[OpenMetrics v1] Number of restarts due to a concurrent writer committing first +cockroachdb,txn.restarts.writetooold.count,count,[OpenMetrics v2] Number of restarts due to a concurrent writer committing first +cockroachdb,txn.restarts.writetoooldmulti.count,count,Number of restarts due to multiple concurrent writers committing first +cockroachdb,txn.rollbacks.async.failed.count,count,Number of KV transaction that failed to send abort asynchronously which is not always retried +cockroachdb,txn.rollbacks.failed.count,count,Number of KV transaction that failed to send final abort +cockroachdb,txn.server_side.1PC.failure.count,count,Number of batches that attempted to commit using 1PC and failed +cockroachdb,txn.server_side.1PC.success.count,count,Number of batches that attempted to commit using 1PC and succeeded +cockroachdb,txn.server_side_retry.read_evaluation.failure.count,count,Number of read batches that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.read_evaluation.success.count,count,Number of read batches that were successfully refreshed server side +cockroachdb,txn.server_side_retry.uncertainty_interval_error.failure.count,count,Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.uncertainty_interval_error.success.count,count,Number of batches that ran into uncertainty interval errors that were successfully refreshed server side +cockroachdb,txn.server_side_retry.write_evaluation.failure.count,count,Number of write batches that were not successfully refreshed server side +cockroachdb,txn.server_side_retry.write_evaluation.success.count,count,Number of write batches that were successfully refreshed server side +cockroachdb,txnrecovery.attempts.count,count,Number of transaction recovery attempts executed +cockroachdb,txnrecovery.attempts.pending,gauge,Number of transaction recovery attempts currently in-flight +cockroachdb,txnrecovery.failures.count,count,Number of transaction recovery attempts that failed +cockroachdb,txnrecovery.successes.aborted.count,count,Number of transaction recovery attempts that aborted a transaction +cockroachdb,txnrecovery.successes.committed.count,count,Number of transaction recovery attempts that committed a transaction +cockroachdb,txnrecovery.successes.pending.count,count,Number of transaction recovery attempts that left a transaction pending +cockroachdb,txnwaitqueue.deadlocks.count,count,Number of deadlocks detected by the txn wait queue +cockroachdb,txnwaitqueue.deadlocks_total.count,count,Number of deadlocks detected by the txn wait queue +cockroachdb,txnwaitqueue.pushee.waiting,gauge,Number of pushees on the txn wait queue +cockroachdb,txnwaitqueue.pusher.slow,gauge,The total number of cases where a pusher waited more than the excessive wait threshold +cockroachdb,txnwaitqueue.pusher.wait_time.bucket,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.wait_time.count,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.wait_time.sum,count,"Histogram of durations spent in queue by pushers +Shown as nanosecond" +cockroachdb,txnwaitqueue.pusher.waiting,gauge,Number of pushers on the txn wait queue +cockroachdb,txnwaitqueue.query.wait_time.bucket,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.wait_time.count,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.wait_time.sum,count,"Histogram of durations spent in queue by queries +Shown as nanosecond" +cockroachdb,txnwaitqueue.query.waiting,gauge,Number of transaction status queries waiting for an updated transaction record +cockroachdb,valbytes,gauge,"[OpenMetrics v1 & v2] Number of bytes taken up by values +Shown as byte" +cockroachdb,valcount,gauge,[OpenMetrics v1 & v2] Count of all values \ No newline at end of file diff --git a/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv b/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv new file mode 100644 index 00000000000..b78a8595406 --- /dev/null +++ b/src/current/_data/v25.3/metrics/datadog-crdb-dedicated.csv @@ -0,0 +1,475 @@ +prefix,datadog_id,, +crdb_dedicated,addsstable.applications,count,"Number of SSTable ingestions applied i.e. applied by Replicas. Shown as operation +Shown as operation" +crdb_dedicated,addsstable.copies,count,"number of SSTable ingestions that required copying files during application. Shown as operation +Shown as operation" +crdb_dedicated,addsstable.proposals,count,"Number of SSTable ingestions proposed i.e. sent to Raft by lease holders. Shown as operation +Shown as operation" +crdb_dedicated,admission.wait.sum.kv,count,"Total wait time in micros for requests within the KV layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.kv.stores,count,"Total wait time in micros for write requests within the KV layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.sql.kv.response,count,"Total wait time in micros for responses between the KV and SQL layer +Shown as microsecond" +crdb_dedicated,admission.wait.sum.sql.sql.response,count,"Total wait time in micros for responses within the SQL layer when receiving DistSQL responses +Shown as microsecond" +crdb_dedicated,capacity,gauge,"Total storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,capacity.available,gauge,"Available storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,capacity.reserved,gauge,"Capacity reserved for snapshots. Shown as byte +Shown as byte" +crdb_dedicated,capacity.used,gauge,"Used storage capacity. Shown as byte +Shown as byte" +crdb_dedicated,changefeed.backfill.count,gauge,"Number of changefeeds currently executing backfill. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.backfill.pending.ranges,gauge,"Number of ranges in an ongoing backfill that are yet to be fully emitted. Shown as count +Shown as unit" +crdb_dedicated,changefeed.commit.latency,gauge,"Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded. Excludes latency during backfill. Shown as nanoseconds. +Shown as unit" +crdb_dedicated,changefeed.emitted.messages,count,"Messages emitted by all feeds. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.error.retries,count,"Total retryable errors encountered by all changefeeds. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.failures,count,"Total number of changefeed jobs which have failed. Shown as count. +Shown as unit" +crdb_dedicated,changefeed.max.behind.nanos,gauge,"Largest commit-to-emit duration of any running feed. Shown as nanoseconds. +Shown as nanosecond" +crdb_dedicated,changefeed.message.size.hist,gauge,"Histogram of message sizes for changefeeds. Shown as bytes. +Shown as byte" +crdb_dedicated,changefeed.running,gauge,"Number of currently running changefeeds, including sinkless. Shown as count. +Shown as unit" +crdb_dedicated,clock.offset.meannanos,gauge,"Mean clock offset with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,clock.offset.stddevnanos,gauge,"Stdddev clock offset with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,distsender.batches,count,Number of batches processed +crdb_dedicated,distsender.batches.partial,count,Number of partial batches processed +crdb_dedicated,distsender.errors.notleaseholder,count,"Number of NotLeaseHolderErrors encountered. Shown as error +Shown as error" +crdb_dedicated,distsender.rpc.sent,count,"Number of RPCs sent +Shown as request" +crdb_dedicated,distsender.rpc.sent.local,count,"Number of local RPCs sent +Shown as request" +crdb_dedicated,distsender.rpc.sent.nextreplicaerror,count,"Number of RPCs sent due to per-replica errors. Shown as error +Shown as request" +crdb_dedicated,exec.error,count,"Number of batch KV requests that failed to execute on this node. Shown as request +Shown as request" +crdb_dedicated,exec.latency,count,"Latency in nanoseconds of batch KV requests executed on this node. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,exec.success,count,"Number of batch KV requests executed successfully on this node. Shown as request +Shown as request" +crdb_dedicated,gcbytesage,gauge,"Cumulative age of non-live data in seconds. Shown as second +Shown as second" +crdb_dedicated,gossip.bytes.received,count,"Number of received gossip bytes. Shown as byte +Shown as byte" +crdb_dedicated,gossip.bytes.sent,count,"Number of sent gossip bytes. Shown as byte +Shown as byte" +crdb_dedicated,gossip.connections.incoming,gauge,"Number of active incoming gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.connections.outgoing,gauge,"Number of active outgoing gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.connections.refused,count,"Number of refused incoming gossip connections. Shown as connection +Shown as connection" +crdb_dedicated,gossip.infos.received,count,"Number of received gossip Info objects +Shown as message" +crdb_dedicated,gossip.infos.sent,count,"Number of sent gossip Info objects +Shown as message" +crdb_dedicated,intentage,gauge,"Cumulative age of intents in seconds. Shown as second +Shown as second" +crdb_dedicated,intentbytes,gauge,"Number of bytes in intent KV pairs. Shown as byte +Shown as byte" +crdb_dedicated,intentcount,gauge,"Count of intent keys. Shown as key +Shown as key" +crdb_dedicated,jobs.changefeed.resume.retry.error,count,"Number of changefeed jobs which failed with a retriable error. Shown as count. +Shown as unit" +crdb_dedicated,keybytes,gauge,"Number of bytes taken up by keys. Shown as byte +Shown as byte" +crdb_dedicated,keycount,gauge,"Count of all keys. Shown as key +Shown as key" +crdb_dedicated,leases.epoch,gauge,"Number of replica leaseholders using epoch-based leases +Shown as unit" +crdb_dedicated,leases.error,count,"Number of failed lease requests. Shown as request +Shown as request" +crdb_dedicated,leases.expiration,gauge,"Number of replica leaseholders using expiration-based leases +Shown as unit" +crdb_dedicated,leases.success,count,"Number of successful lease requests. Shown as request +Shown as request" +crdb_dedicated,leases.transfers.error,count,"Number of failed lease transfers +Shown as error" +crdb_dedicated,leases.transfers.success,count,"Number of successful lease transfers +Shown as success" +crdb_dedicated,livebytes,gauge,"Number of bytes of live data keys plus values. Shown as byte +Shown as byte" +crdb_dedicated,livecount,gauge,"Count of live keys. Shown as key +Shown as key" +crdb_dedicated,liveness.epochincrements,count,"Number of times this node has incremented its liveness epoch +Shown as unit" +crdb_dedicated,liveness.heartbeatfailures,count,"Number of failed node liveness heartbeats from this node +Shown as unit" +crdb_dedicated,liveness.heartbeatlatency,count,"Node liveness heartbeat latency in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,liveness.heartbeatsuccesses,count,"Number of successful node liveness heartbeats from this node +Shown as unit" +crdb_dedicated,liveness.livenodes,gauge,"Number of live nodes in the cluster will be 0 if this node is not itself live +Shown as unit" +crdb_dedicated,queue.consistency.pending,gauge,"Number of pending replicas in the consistency checker queue +Shown as unit" +crdb_dedicated,queue.consistency.process.failure,count,"Number of replicas which failed processing in the consistency checker queue +Shown as unit" +crdb_dedicated,queue.consistency.process.success,count,"Number of replicas successfully processed by the consistency checker queue +Shown as success" +crdb_dedicated,queue.consistency.processingnanos,count,"Nanoseconds spent processing replicas in the consistency checker queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.gc.info.abortspanconsidered,count,"Number of AbortSpan entries old enough to be considered for removal +Shown as transaction" +crdb_dedicated,queue.gc.info.abortspangcnum,count,"Number of AbortSpan entries fit for removal +Shown as transaction" +crdb_dedicated,queue.gc.info.abortspanscanned,count,"Number of transactions present in the AbortSpan scanned from the engine. Shown as transaction +Shown as transaction" +crdb_dedicated,queue.gc.info.intentsconsidered,count,"Number of ‘old’ intents +Shown as transaction" +crdb_dedicated,queue.gc.info.intenttxns,count,"Number of associated distinct transactions. Shown as transaction +Shown as key" +crdb_dedicated,queue.gc.info.numkeysaffected,count,"Number of keys with GC’able data. Shown as key +Shown as key" +crdb_dedicated,queue.gc.info.pushtxn,count,"Number of attempted pushes +Shown as attempt" +crdb_dedicated,queue.gc.info.resolvesuccess,count,"Number of successful intent resolutions +Shown as success" +crdb_dedicated,queue.gc.info.resolvetotal,count,"Number of attempted intent resolutions +Shown as attempt" +crdb_dedicated,queue.gc.info.transactionspangcaborted,count,"Number of GC’able entries corresponding to aborted txns +Shown as unit" +crdb_dedicated,queue.gc.info.transactionspangccommitted,count,"Number of GC’able entries corresponding to committed txns +Shown as commit" +crdb_dedicated,queue.gc.info.transactionspangcpending,count,"Number of GC’able entries corresponding to pending txns +Shown as unit" +crdb_dedicated,queue.gc.info.transactionspanscanned,count,"Number of entries in transaction spans scanned from the engine +Shown as unit" +crdb_dedicated,queue.gc.pending,gauge,"Number of pending replicas in the GC queue +Shown as unit" +crdb_dedicated,queue.gc.process.failure,count,"Number of replicas which failed processing in the GC queue +Shown as unit" +crdb_dedicated,queue.gc.process.success,count,"Number of replicas successfully processed by the GC queue +Shown as success" +crdb_dedicated,queue.gc.processingnanos,count,"Nanoseconds spent processing replicas in the GC queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.raftlog.pending,gauge,"Number of pending replicas in the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.process.failure,count,"Number of replicas which failed processing in the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.process.success,count,"Number of replicas successfully processed by the Raft log queue +Shown as unit" +crdb_dedicated,queue.raftlog.processingnanos,count,"Nanoseconds spent processing replicas in the Raft log queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.raftsnapshot.pending,gauge,"Number of pending replicas in the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.process.failure,count,"Number of replicas which failed processing in the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.process.success,count,"Number of replicas successfully processed by the Raft repair queue +Shown as unit" +crdb_dedicated,queue.raftsnapshot.processingnanos,count,"Nanoseconds spent processing replicas in the Raft repair queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicagc.pending,gauge,"Number of pending replicas in the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.process.failure,count,"Number of replicas which failed processing in the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.process.success,count,"Number of replicas successfully processed by the replica GC queue +Shown as unit" +crdb_dedicated,queue.replicagc.processingnanos,count,"Nanoseconds spent processing replicas in the replica GC queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicagc.removereplica,count,"Number of replica removals attempted by the replica gc queue +Shown as unit" +crdb_dedicated,queue.replicate.addreplica,count,"Number of replica additions attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.pending,gauge,"Number of pending replicas in the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.process.failure,count,"Number of replicas which failed processing in the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.process.success,count,"Number of replicas successfully processed by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.processingnanos,count,"Nanoseconds spent processing replicas in the replicate queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.replicate.purgatory,gauge,"Number of replicas in the replicate queue’s purgatory, awaiting allocation options +Shown as unit" +crdb_dedicated,queue.replicate.rebalancereplica,count,"Number of replica rebalancer-initiated additions attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.replicate.removedeadreplica,count,"Number of dead replica removals attempted by the replicate queue typically in response to a node outage +Shown as unit" +crdb_dedicated,queue.replicate.removereplica,count,"Number of replica removals attempted by the replicate queue typically in response to a rebalancer-initiated addition +Shown as unit" +crdb_dedicated,queue.replicate.transferlease,count,"Number of range lease transfers attempted by the replicate queue +Shown as unit" +crdb_dedicated,queue.split.pending,gauge,"Number of pending replicas in the split queue +Shown as unit" +crdb_dedicated,queue.split.process.failure,count,"Number of replicas which failed processing in the split queue +Shown as unit" +crdb_dedicated,queue.split.process.success,count,"Number of replicas successfully processed by the split queue +Shown as unit" +crdb_dedicated,queue.split.processingnanos,count,"Nanoseconds spent processing replicas in the split queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,queue.tsmaintenance.pending,gauge,"Number of pending replicas in the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.process.failure,count,"Number of replicas which failed processing in the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.process.success,count,"Number of replicas successfully processed by the timeseries maintenance queue +Shown as unit" +crdb_dedicated,queue.tsmaintenance.processingnanos,count,"Nanoseconds spent processing replicas in the timeseries maintenance queue. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.commandsapplied,count,"Count of Raft commands applied. Shown as command +Shown as command" +crdb_dedicated,raft.enqueued.pending,gauge,"Number of pending outgoing messages in the Raft Transport queue +Shown as unit" +crdb_dedicated,raft.heartbeats.pending,gauge,"Number of pending heartbeats and responses waiting to be coalesced +Shown as unit" +crdb_dedicated,raft.process.commandcommit.latency,count,"Latency histogram in nanoseconds for committing Raft commands. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.logcommit.latency,count,"Latency histogram in nanoseconds for committing Raft log entries. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.tickingnanos,count,"Nanoseconds spent in store.processRaft processing replica.Tick. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.process.workingnanos,count,"Nanoseconds spent in store.processRaft working. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,raft.rcvd.app,count,"Number of MsgApp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.appresp,count,"Number of MsgAppResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.dropped,count,"Number of dropped incoming Raft messages +Shown as message" +crdb_dedicated,raft.rcvd.heartbeat,count,"Number of coalesced, if enabled MsgHeartbeat messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.heartbeatresp,count,"Number of coalesced, if enabled MsgHeartbeatResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prevote,count,"Number of MsgPreVote messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prevoteresp,count,"Number of MsgPreVoteResp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.prop,count,"Number of MsgProp messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.snap,count,"Number of MsgSnap messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.timeoutnow,count,"Number of MsgTimeoutNow messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.transferleader,count,"Number of MsgTransferLeader messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.vote,count,"Number of MsgVote messages received by this store +Shown as message" +crdb_dedicated,raft.rcvd.voteresp,count,"Number of MsgVoteResp messages received by this store +Shown as message" +crdb_dedicated,raft.ticks,count,"Number of Raft ticks queued +Shown as unit" +crdb_dedicated,raftlog.behind,gauge,"Number of Raft log entries followers on other stores are behind. Shown as entry +Shown as unit" +crdb_dedicated,raftlog.truncated,count,"Number of Raft log entries truncated. Shown as entry +Shown as unit" +crdb_dedicated,range.adds,count,"Number of range additions +Shown as unit" +crdb_dedicated,range.raftleadertransfers,count,"Number of raft leader transfers +Shown as unit" +crdb_dedicated,range.removes,count,"Number of range removals +Shown as unit" +crdb_dedicated,range.snapshots.generated,count,"Number of generated snapshots +Shown as unit" +crdb_dedicated,range.splits,count,"Number of range splits +Shown as unit" +crdb_dedicated,ranges,gauge,"Number of ranges +Shown as unit" +crdb_dedicated,ranges.overreplicated,gauge,"Number of ranges with more live replicas than the replication target +Shown as unit" +crdb_dedicated,ranges.unavailable,gauge,"Number of ranges with fewer live replicas than needed for quorum +Shown as unit" +crdb_dedicated,ranges.underreplicated,gauge,"Number of ranges with fewer live replicas than the replication target +Shown as unit" +crdb_dedicated,rebalancing.writespersecond,gauge,"Number of keys written i.e. applied by raft per second to the store, averaged over a large time period as used in rebalancing decisions. Shown as key +Shown as unit" +crdb_dedicated,replicas,gauge,"Number of replicas +Shown as unit" +crdb_dedicated,replicas.leaders,gauge,"Number of raft leaders +Shown as unit" +crdb_dedicated,replicas.leaders.not_leaseholders,gauge,"Number of replicas that are Raft leaders whose range lease is held by another store +Shown as unit" +crdb_dedicated,replicas.leaseholders,gauge,"Number of lease holders +Shown as unit" +crdb_dedicated,replicas.quiescent,gauge,"Number of quiesced replicas +Shown as unit" +crdb_dedicated,replicas.reserved,gauge,"Number of replicas reserved for snapshots +Shown as unit" +crdb_dedicated,requests.backpressure.split,gauge,"Number of backpressured writes waiting on a Range split +Shown as unit" +crdb_dedicated,requests.slow.distsender,gauge,"Number of requests that have been stuck for a long time in the dist sender. Shown as request +Shown as request" +crdb_dedicated,requests.slow.lease,gauge,"Number of requests that have been stuck for a long time acquiring a lease. Shown as request +Shown as request" +crdb_dedicated,requests.slow.raft,gauge,"Number of requests that have been stuck for a long time in raft. Shown as request +Shown as request" +crdb_dedicated,rocksdb.block.cache.hits,gauge,"Count of block cache hits +Shown as hit" +crdb_dedicated,rocksdb.block.cache.misses,gauge,"Count of block cache misses +Shown as miss" +crdb_dedicated,rocksdb.block.cache.pinned.usage,gauge,"Bytes pinned by the block cache. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.block.cache.usage,gauge,"Bytes used by the block cache. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.bloom_filter.prefix.checked,gauge,"Number of times the bloom filter was checked +Shown as unit" +crdb_dedicated,rocksdb.bloom_filter.prefix.useful,gauge,"Number of times the bloom filter helped avoid iterator creation +Shown as unit" +crdb_dedicated,rocksdb.compactions,gauge,"Number of table compactions +Shown as unit" +crdb_dedicated,rocksdb.flushes,gauge,"Number of table flushes +Shown as flush" +crdb_dedicated,rocksdb.memtable.total.size,gauge,"Current size of memtable in bytes. Shown as byte +Shown as byte" +crdb_dedicated,rocksdb.num_sstables,gauge,"Number of rocksdb SSTables. Shown as table +Shown as table" +crdb_dedicated,rocksdb.read.amplification,gauge,"Number of disk reads per query. Shown as read +Shown as read" +crdb_dedicated,rocksdb.table.readers.mem.estimate,gauge,"Memory used by index and filter blocks +Shown as unit" +crdb_dedicated,round_trip.latency,count,"Distribution of round-trip latencies with other nodes in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.bytesin,count,"Number of sql bytes received. Shown as byte +Shown as byte" +crdb_dedicated,sql.bytesout,count,"Number of sql bytes sent. Shown as byte +Shown as byte" +crdb_dedicated,sql.conn.latency,count,"Latency to establish and authenticate a SQL connection. Shown as nanoseconds. +Shown as nanosecond" +crdb_dedicated,sql.conns,gauge,"Number of active sql connections. Shown as connection +Shown as connection" +crdb_dedicated,sql.ddl.count,count,"Number of SQL DDL statements +Shown as query" +crdb_dedicated,sql.delete.count,count,"Number of SQL DELETE statements +Shown as query" +crdb_dedicated,sql.distsql.contended.queries.count,count,"Number of SQL queries that experienced contention. Shown as count. +Shown as query" +crdb_dedicated,sql.distsql.exec.latency,count,"Latency in nanoseconds of DistSQL statement execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.distsql.flows.active,gauge,"Number of distributed SQL flows currently active +Shown as query" +crdb_dedicated,sql.distsql.flows.total,count,"Number of distributed SQL flows executed +Shown as query" +crdb_dedicated,sql.distsql.queries.active,gauge,"Number of distributed SQL queries currently active +Shown as query" +crdb_dedicated,sql.distsql.queries.total,count,"Number of distributed SQL queries executed +Shown as query" +crdb_dedicated,sql.distsql.select.count,count,"Number of DistSQL SELECT statements +Shown as unit" +crdb_dedicated,sql.distsql.service.latency,count,"Latency in nanoseconds of DistSQL request execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.exec.latency,count,"Latency in nanoseconds of SQL statement execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.failure.count,count,"Number of statements resulting in a planning or runtime error. Shown as count. +Shown as unit" +crdb_dedicated,sql.full.scan.count,count,"Number of full table or index scans. Shown as count. +Shown as unit" +crdb_dedicated,sql.insert.count,count,"Number of SQL INSERT statements +Shown as unit" +crdb_dedicated,sql.mem.distsql.current,gauge,"Current sql statement memory usage for distsql +Shown as unit" +crdb_dedicated,sql.mem.distsql.max,count,"Memory usage per sql statement for distsql +Shown as unit" +crdb_dedicated,sql.mem.internal.session.current,gauge,"Current sql session memory usage for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.session.max,count,"Memory usage per sql session for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.txn.current,gauge,"Current sql transaction memory usage for internal +Shown as unit" +crdb_dedicated,sql.mem.internal.txn.max,count,"Memory usage per sql transaction for internal +Shown as unit" +crdb_dedicated,sql.misc.count,count,"Number of other SQL statements +Shown as query" +crdb_dedicated,sql.new_conns.count,count,"Number of SQL connections created +Shown as connection" +crdb_dedicated,sql.query.count,count,"Number of SQL queries +Shown as query" +crdb_dedicated,sql.select.count,count,"Number of SQL SELECT statements +Shown as query" +crdb_dedicated,sql.service.latency,count,"Latency in nanoseconds of SQL request execution. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sql.statements.active,gauge,"Number of currently active user SQL statements. Shown as count. +Shown as unit" +crdb_dedicated,sql.txn.abort.count,count,"Number of SQL transaction ABORT statements +Shown as unit" +crdb_dedicated,sql.txn.begin.count,count,"Number of SQL transaction BEGIN statements +Shown as unit" +crdb_dedicated,sql.txn.commit.count,count,"Number of SQL transaction COMMIT statements +Shown as unit" +crdb_dedicated,sql.txn.latency,count,"Latency of SQL transactions. Shown as nanoseconds. +Shown as unit" +crdb_dedicated,sql.txn.rollback.count,count,"Number of SQL transaction ROLLBACK statements +Shown as unit" +crdb_dedicated,sql.txns.open,gauge,"Number of currently open SQL transactions. Shown as count. +Shown as unit" +crdb_dedicated,sql.update.count,count,"Number of SQL UPDATE statements +Shown as unit" +crdb_dedicated,sys.cgo.allocbytes,gauge,"Current bytes of memory allocated by cgo. Shown as byte +Shown as byte" +crdb_dedicated,sys.cgo.totalbytes,gauge,"Total bytes of memory allocated by cgo, but not released. Shown as byte +Shown as byte" +crdb_dedicated,sys.cgocalls,gauge,"Total number of cgo calls +Shown as unit" +crdb_dedicated,sys.cpu.combined.percent.normalized,gauge,"Current user+system cpu percentage, normalized 0-1 by number of cores. +Shown as fraction" +crdb_dedicated,sys.cpu.sys.ns,gauge,"Total system cpu time in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.cpu.sys.percent,gauge,"Current system cpu percentage +Shown as core" +crdb_dedicated,sys.cpu.user.ns,gauge,"Total user cpu time in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.cpu.user.percent,gauge,"Current user cpu percentage. Shown as percent +Shown as core" +crdb_dedicated,sys.fd.open,gauge,"Process open file descriptors +Shown as unit" +crdb_dedicated,sys.fd.softlimit,gauge,"Process open FD soft limit +Shown as unit" +crdb_dedicated,sys.gc.count,gauge,"Total number of GC runs +Shown as garbage collection" +crdb_dedicated,sys.gc.pause.ns,gauge,"Total GC pause in nanoseconds. Shown as nanosecond +Shown as nanosecond" +crdb_dedicated,sys.gc.pause.percent,gauge,"Current GC pause percentage. Shown as fraction +Shown as fraction" +crdb_dedicated,sys.go.allocbytes,gauge,"Current bytes of memory allocated by go. Shown as byte +Shown as byte" +crdb_dedicated,sys.go.totalbytes,gauge,"Total bytes of memory allocated by go, but not released. Shown as byte +Shown as byte" +crdb_dedicated,sys.goroutines,gauge,"Current number of goroutines +Shown as unit" +crdb_dedicated,sys.host.net.recv.bytes,gauge,"Bytes received on all network interfaces since this process started. +Shown as byte" +crdb_dedicated,sys.host.net.send.bytes,gauge,"Bytes sent on all network interfaces since this process started. +Shown as byte" +crdb_dedicated,sys.rss,gauge,"Current process RSS +Shown as unit" +crdb_dedicated,sys.uptime,gauge,"Process uptime in seconds. Shown as second +Shown as second" +crdb_dedicated,sysbytes,gauge,"Number of bytes in system KV pairs. Shown as byte +Shown as byte" +crdb_dedicated,syscount,gauge,"Count of system KV pairs +Shown as unit" +crdb_dedicated,timeseries.write.bytes,count,"Total size in bytes of metric samples written to disk. Shown as byte +Shown as byte" +crdb_dedicated,timeseries.write.errors,count,"Total errors encountered while attempting to write metrics to disk. Shown as error +Shown as error" +crdb_dedicated,timeseries.write.samples,count,"Total number of metric samples written to disk +Shown as unit" +crdb_dedicated,totalbytes,gauge,"Total number of bytes taken up by keys and values including non-live data. Shown as byte +Shown as byte" +crdb_dedicated,txn.aborts,count,"Number of aborted KV transactions +Shown as unit" +crdb_dedicated,txn.commits,count,"Number of committed KV transactions including 1PC +Shown as commit" +crdb_dedicated,txn.commits1PC,count,"Number of committed one-phase KV transactions +Shown as commit" +crdb_dedicated,txn.durations,count,"KV transaction durations in nanoseconds +Shown as nanosecond" +crdb_dedicated,txn.restarts,count,"Number of restarted KV transactions +Shown as unit" +crdb_dedicated,txn.restarts.serializable,count,"Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE +Shown as unit" +crdb_dedicated,txn.restarts.writetooold,count,"Number of restarts due to a concurrent writer committing first +Shown as unit" +crdb_dedicated,valbytes,gauge,"Number of bytes taken up by values. Shown as byte +Shown as byte" +crdb_dedicated,valcount,gauge,"Count of all values +Shown as unit" \ No newline at end of file diff --git a/src/current/_data/v25.3/metrics/metrics.yml b/src/current/_data/v25.3/metrics/metrics-cloud.yml similarity index 100% rename from src/current/_data/v25.3/metrics/metrics.yml rename to src/current/_data/v25.3/metrics/metrics-cloud.yml diff --git a/src/current/_data/v25.3/metrics/metrics.yaml b/src/current/_data/v25.3/metrics/metrics.yaml new file mode 100644 index 00000000000..7fe4416b4ae --- /dev/null +++ b/src/current/_data/v25.3/metrics/metrics.yaml @@ -0,0 +1,17558 @@ +layers: +- name: APPLICATION + categories: + - name: CHANGEFEEDS + metrics: + - name: changefeed.commit_latency + exported_name: changefeed_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded; Excludes latency during backfill' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the end-to-end lag between a committed change and that change applied at the destination. + essential: true + - name: changefeed.emitted_bytes + exported_name: changefeed_emitted_bytes + description: Bytes emitted by all feeds + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the throughput bytes being streamed from the CockroachDB cluster. + essential: true + - name: changefeed.emitted_messages + exported_name: changefeed_emitted_messages + description: Messages emitted by all feeds + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the rate of changes being streamed from the CockroachDB cluster. + essential: true + - name: changefeed.error_retries + exported_name: changefeed_error_retries + description: Total retryable errors encountered by all changefeeds + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric tracks transient changefeed errors. Alert on "too many" errors, such as 50 retries in 15 minutes. For example, during a rolling upgrade this counter will increase because the changefeed jobs will restart following node restarts. There is an exponential backoff, up to 10 minutes. But if there is no rolling upgrade in process or other cluster maintenance, and the error rate is high, investigate the changefeed job. + essential: true + - name: changefeed.failures + exported_name: changefeed_failures + description: Total number of changefeed jobs which have failed + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric tracks the permanent changefeed job failures that the jobs system will not try to restart. Any increase in this counter should be investigated. An alert on this metric is recommended. + essential: true + - name: changefeed.running + exported_name: changefeed_running + description: Number of currently running changefeeds, including sinkless + y_axis_label: Changefeeds + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the total number of all running changefeeds. + essential: true + - name: jobs.changefeed.currently_paused + exported_name: jobs_changefeed_currently_paused + labeled_name: 'jobs{name: changefeed, status: currently_paused}' + description: Number of changefeed jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a changefeed job in a paused state for an extended period of time. Changefeed jobs should not be paused for a long time because the protected timestamp prevents garbage collection. + essential: true + - name: jobs.changefeed.protected_age_sec + exported_name: jobs_changefeed_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: changefeed}' + description: The age of the oldest PTS record protected by changefeed jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + how_to_use: Changefeeds use protected timestamps to protect the data from being garbage collected. Ensure the protected timestamp age does not significantly exceed the GC TTL zone configuration. Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL. + essential: true + - name: DISTRIBUTED + metrics: + - name: distsender.errors.notleaseholder + exported_name: distsender_errors_notleaseholder + description: Number of NotLeaseHolderErrors encountered from replica-addressed RPCs + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Errors of this type are normal during elastic cluster topology changes when leaseholders are actively rebalancing. They are automatically retried. However they may create occasional response time spikes. In that case, this metric may provide the explanation of the cause. + essential: true + - name: distsender.rpc.sent.nextreplicaerror + exported_name: distsender_rpc_sent_nextreplicaerror + description: Number of replica-addressed RPCs sent due to per-replica errors + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: RPC errors do not necessarily indicate a problem. This metric tracks remote procedure calls that return a status value other than "success". A non-success status of an RPC should not be misconstrued as a network transport issue. It is database code logic executed on another cluster node. The non-success status is a result of an orderly execution of an RPC that reports a specific logical condition. + essential: true + - name: NETWORKING + metrics: + - name: clock-offset.meannanos + exported_name: clock_offset_meannanos + description: Mean clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric gives the node's clock skew. In a well-configured environment, the actual clock skew would be in the sub-millisecond range. A skew exceeding 5 ms is likely due to a NTP service mis-configuration. Reducing the actual clock skew reduces the probability of uncertainty related conflicts and corresponding retires which has a positive impact on workload performance. Conversely, a larger actual clock skew increases the probability of retries due to uncertainty conflicts, with potentially measurable adverse effects on workload performance. + essential: true + - name: rpc.connection.avg_round_trip_latency + exported_name: rpc_connection_avg_round_trip_latency + description: | + Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC. + + Dividing this Gauge by rpc.connection.healthy gives an approximation of average + latency, but the top-level round-trip-latency histogram is more useful. Instead, + users should consult the label families of this metric if they are available + (which requires prometheus and the cluster setting 'server.child_metrics.enabled'); + these provide per-peer moving averages. + + This metric does not track failed connection. A failed connection's contribution + is reset to zero. + y_axis_label: Latency + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric is helpful in understanding general network issues outside of CockroachDB that could be impacting the user’s workload. + essential: true + - name: rpc.connection.failures + exported_name: rpc_connection_failures + description: | + Counter of failed connections. + + This includes both the event in which a healthy connection terminates as well as + unsuccessful reconnection attempts. + + Connections that are terminated as part of local node shutdown are excluded. + Decommissioned peers are excluded. + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: See Description. + essential: true + - name: rpc.connection.healthy + exported_name: rpc_connection_healthy + description: Gauge of current connections in a healthy state (i.e. bidirectionally connected and heartbeating) + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: rpc.connection.healthy_nanos + exported_name: rpc_connection_healthy_nanos + description: |- + Gauge of nanoseconds of healthy connection time + + On the prometheus endpoint scraped with the cluster setting 'server.child_metrics.enabled' set, + the constituent parts of this metric are available on a per-peer basis and one can read off + for how long a given peer has been connected + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This can be useful for monitoring the stability and health of connections within your CockroachDB cluster. + essential: true + - name: rpc.connection.heartbeats + exported_name: rpc_connection_heartbeats + description: Counter of successful heartbeats. + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: See Description. + essential: true + - name: rpc.connection.unhealthy + exported_name: rpc_connection_unhealthy + description: Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating) + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: If the value of this metric is greater than 0, this could indicate a network partition. + essential: true + - name: rpc.connection.unhealthy_nanos + exported_name: rpc_connection_unhealthy_nanos + description: |- + Gauge of nanoseconds of unhealthy connection time. + + On the prometheus endpoint scraped with the cluster setting 'server.child_metrics.enabled' set, + the constituent parts of this metric are available on a per-peer basis and one can read off + for how long a given peer has been unreachable + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this duration is greater than 0, this could indicate how long a network partition has been occurring. + essential: true + - name: SQL + metrics: + - name: jobs.auto_create_stats.currently_paused + exported_name: jobs_auto_create_stats_currently_paused + labeled_name: 'jobs{name: auto_create_stats, status: currently_paused}' + description: Number of auto_create_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is a high-level indicator that automatically generated statistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. + essential: true + - name: jobs.auto_create_stats.currently_running + exported_name: jobs_auto_create_stats_currently_running + labeled_name: 'jobs{type: auto_create_stats, status: currently_running}' + description: Number of auto_create_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the number of active automatically generated statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. + essential: true + - name: jobs.auto_create_stats.resume_failed + exported_name: jobs_auto_create_stats_resume_failed + labeled_name: 'jobs.resume{name: auto_create_stats, status: failed}' + description: Number of auto_create_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator that automatically generated table statistics is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. + essential: true + - name: jobs.backup.currently_paused + exported_name: jobs_backup_currently_paused + labeled_name: 'jobs{name: backup, status: currently_paused}' + description: Number of backup jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a backup job in a paused state for an extended period of time. In functional areas, a paused job can hold resources or have concurrency impact or some other negative consequence. Paused backup may break the recovery point objective (RPO). + essential: true + - name: jobs.backup.currently_running + exported_name: jobs_backup_currently_running + labeled_name: 'jobs{type: backup, status: currently_running}' + description: Number of backup jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.create_stats.currently_running + exported_name: jobs_create_stats_currently_running + labeled_name: 'jobs{type: create_stats, status: currently_running}' + description: Number of create_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric tracks the number of active create statistics jobs that may be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. + essential: true + - name: schedules.BACKUP.failed + exported_name: schedules_BACKUP_failed + labeled_name: 'schedules{name: BACKUP, status: failed}' + description: Number of BACKUP jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Monitor this metric and investigate backup job failures. + essential: true + - name: schedules.BACKUP.last-completed-time + exported_name: schedules_BACKUP_last_completed_time + description: The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + how_to_use: "Monitor this metric to ensure that backups are\n\t\t\t\t\t\tmeeting the recovery point objective (RPO). Each node\n\t\t\t\t\t\texports the time that it last completed a backup on behalf\n\t\t\t\t\t\tof the schedule. If a node is restarted, it will report 0\n\t\t\t\t\t\tuntil it completes a backup. If all nodes are restarted,\n\t\t\t\t\t\tmax() is 0 until a node completes a backup.\n\n\t\t\t\t\t\tTo make use of this metric, first, from each node, take the maximum\n\t\t\t\t\t\tover a rolling window equal to or greater than the backup frequency,\n\t\t\t\t\t\tand then take the maximum of those values across nodes. For example\n\t\t\t\t\t\twith a backup frequency of 60 minutes, monitor time() -\n\t\t\t\t\t\tmax_across_nodes(max_over_time(schedules_BACKUP_last_completed_time,\n\t\t\t\t\t\t60min))." + essential: true + - name: sql.conn.failures + exported_name: sql_conn_failures + description: Number of SQL connection failures + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is incremented whenever a connection attempt fails for any reason, including timeouts. + essential: true + - name: sql.conn.latency + exported_name: sql_conn_latency + description: Latency to establish and authenticate a SQL connection + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These metrics characterize the database connection latency which can affect the application performance, for example, by having slow startup times. Connection failures are not recorded in these metrics. + essential: true + - name: sql.conns + exported_name: sql_conns + description: Number of open SQL connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric shows the number of connections as well as the distribution, or balancing, of connections across cluster nodes. An imbalance can lead to nodes becoming overloaded. Review Connection Pooling. + essential: true + - name: sql.ddl.count + exported_name: sql_ddl_count + description: Number of SQL DDL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.ddl.count.internal + exported_name: sql_ddl_count_internal + description: Number of SQL DDL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.count + exported_name: sql_delete_count + labeled_name: 'sql.count{query_type: delete}' + description: Number of SQL DELETE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.delete.count.internal + exported_name: sql_delete_count_internal + labeled_name: 'sql.count{query_type: delete, query_internal: true}' + description: Number of SQL DELETE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.contended_queries.count + exported_name: sql_distsql_contended_queries_count + description: Number of SQL queries that experienced contention + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is incremented whenever there is a non-trivial amount of contention experienced by a statement whether read-write or write-write conflicts. Monitor this metric to correlate possible workload performance issues to contention conflicts. + essential: true + - name: sql.failure.count + exported_name: sql_failure_count + description: Number of statements resulting in a planning or runtime error + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator of workload and application degradation with query failures. Use the Insights page to find failed executions with their error code to troubleshoot or use application-level logs, if instrumented, to determine the cause of error. + essential: true + - name: sql.failure.count.internal + exported_name: sql_failure_count_internal + description: Number of statements resulting in a planning or runtime error (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.full.scan.count + exported_name: sql_full_scan_count + description: Number of full table or index scans + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is a high-level indicator of potentially suboptimal query plans in the workload that may require index tuning and maintenance. To identify the statements with a full table scan, use SHOW FULL TABLE SCAN or the SQL Activity Statements page with the corresponding metric time frame. The Statements page also includes explain plans and index recommendations. Not all full scans are necessarily bad especially over smaller tables. + essential: true + - name: sql.full.scan.count.internal + exported_name: sql_full_scan_count_internal + description: Number of full table or index scans (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.count + exported_name: sql_insert_count + labeled_name: 'sql.count{query_type: insert}' + description: Number of SQL INSERT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.insert.count.internal + exported_name: sql_insert_count_internal + labeled_name: 'sql.count{query_type: insert, query_internal: true}' + description: Number of SQL INSERT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.mem.root.current + exported_name: sql_mem_root_current + description: Current sql statement memory usage for root + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: This metric shows how memory set aside for temporary materializations, such as hash tables and intermediary result sets, is utilized. Use this metric to optimize memory allocations based on long term observations. The maximum amount is set with --max_sql_memory. If the utilization of sql memory is persistently low, perhaps some portion of this memory allocation can be shifted to --cache. + essential: true + - name: sql.new_conns + exported_name: sql_new_conns + description: Number of SQL connections created + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: The rate of this metric shows how frequently new connections are being established. This can be useful in determining if a high rate of incoming new connections is causing additional load on the server due to a misconfigured application. + essential: true + - name: sql.select.count + exported_name: sql_select_count + labeled_name: 'sql.count{query_type: select}' + description: Number of SQL SELECT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.select.count.internal + exported_name: sql_select_count_internal + labeled_name: 'sql.count{query_type: select, query_internal: true}' + description: Number of SQL SELECT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.service.latency + exported_name: sql_service_latency + description: Latency of SQL request execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These high-level metrics reflect workload performance. Monitor these metrics to understand latency over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. The Statements page has P90 Latency and P99 latency columns to enable correlation with this metric. + essential: true + - name: sql.service.latency.internal + exported_name: sql_service_latency_internal + description: Latency of SQL request execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.statements.active + exported_name: sql_statements_active + description: Number of currently active user SQL statements + y_axis_label: Active Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This high-level metric reflects workload volume. + essential: true + - name: sql.statements.active.internal + exported_name: sql_statements_active_internal + description: Number of currently active user SQL statements (internal queries) + y_axis_label: SQL Internal Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.txn.abort.count + exported_name: sql_txn_abort_count + description: Number of SQL transaction abort errors + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload performance. A persistently high number of SQL transaction abort errors may negatively impact the workload performance and needs to be investigated. + essential: true + - name: sql.txn.abort.count.internal + exported_name: sql_txn_abort_count_internal + description: Number of SQL transaction abort errors (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.count + exported_name: sql_txn_begin_count + description: Number of SQL transaction BEGIN statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reflects workload volume by counting explicit transactions. Use this metric to determine whether explicit transactions can be refactored as implicit transactions (individual statements). + essential: true + - name: sql.txn.begin.count.internal + exported_name: sql_txn_begin_count_internal + description: Number of SQL transaction BEGIN statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.count + exported_name: sql_txn_commit_count + description: Number of SQL transaction COMMIT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric shows the number of transactions that completed successfully. This metric can be used as a proxy to measure the number of successful explicit transactions. + essential: true + - name: sql.txn.commit.count.internal + exported_name: sql_txn_commit_count_internal + description: Number of SQL transaction COMMIT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.latency + exported_name: sql_txn_latency + description: Latency of SQL transactions + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: These high-level metrics provide a latency histogram of all executed SQL transactions. These metrics provide an overview of the current SQL workload. + essential: true + - name: sql.txn.latency.internal + exported_name: sql_txn_latency_internal + description: Latency of SQL transactions (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.txn.rollback.count + exported_name: sql_txn_rollback_count + description: Number of SQL transaction ROLLBACK statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric shows the number of orderly transaction rollbacks. A persistently high number of rollbacks may negatively impact the workload performance and needs to be investigated. + essential: true + - name: sql.txn.rollback.count.internal + exported_name: sql_txn_rollback_count_internal + description: Number of SQL transaction ROLLBACK statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txns.open + exported_name: sql_txns_open + description: Number of currently open user SQL transactions + y_axis_label: Open SQL Transactions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric should roughly correspond to the number of cores * 4. If this metric is consistently larger, scale out the cluster. + essential: true + - name: sql.txns.open.internal + exported_name: sql_txns_open_internal + description: Number of currently open user SQL transactions (internal queries) + y_axis_label: SQL Internal Statements + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.update.count + exported_name: sql_update_count + labeled_name: 'sql.count{query_type: update}' + description: Number of SQL UPDATE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the SQL Activity pages to investigate interesting outliers or patterns. For example, on the Transactions page and the Statements page, sort on the Execution Count column. To find problematic sessions, on the Sessions page, sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. + essential: true + - name: sql.update.count.internal + exported_name: sql_update_count_internal + labeled_name: 'sql.count{query_type: update, query_internal: true}' + description: Number of SQL UPDATE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.serializable + exported_name: txn_restarts_serializable + description: Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.txnaborted + exported_name: txn_restarts_txnaborted + description: Number of restarts due to an abort by a concurrent transaction (usually due to deadlock) + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: The errors tracked by this metric are generally due to deadlocks. Deadlocks can often be prevented with a considered transaction design. Identify the conflicting transactions involved in the deadlocks, then, if possible, redesign the business logic implementation prone to deadlocks. + essential: true + - name: txn.restarts.txnpush + exported_name: txn_restarts_txnpush + description: Number of restarts due to a transaction push failure + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.unknown + exported_name: txn_restarts_unknown + description: Number of restarts due to a unknown reasons + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: txn.restarts.writetooold + exported_name: txn_restarts_writetooold + description: Number of restarts due to a concurrent writer committing first + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review transaction contention best practices and performance tuning recipes. Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. + essential: true + - name: TTL + metrics: + - name: jobs.row_level_ttl.currently_paused + exported_name: jobs_row_level_ttl_currently_paused + labeled_name: 'jobs{name: row_level_ttl, status: currently_paused}' + description: Number of row_level_ttl jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor this metric to ensure the Row Level TTL job does not remain paused inadvertently for an extended period. + essential: true + - name: jobs.row_level_ttl.currently_running + exported_name: jobs_row_level_ttl_currently_running + labeled_name: 'jobs{type: row_level_ttl, status: currently_running}' + description: Number of row_level_ttl jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: Monitor this metric to ensure there are not too many Row Level TTL jobs running at the same time. Generally, this metric should be in the low single digits. + essential: true + - name: jobs.row_level_ttl.delete_duration + exported_name: jobs_row_level_ttl_delete_duration + description: Duration for delete requests during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.num_active_spans + exported_name: jobs_row_level_ttl_num_active_spans + description: Number of active spans the TTL job is deleting from. + y_axis_label: num_active_spans + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.resume_completed + exported_name: jobs_row_level_ttl_resume_completed + labeled_name: 'jobs.resume{name: row_level_ttl, status: completed}' + description: Number of row_level_ttl jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: If Row Level TTL is enabled, this metric should be nonzero and correspond to the ttl_cron setting that was chosen. If this metric is zero, it means the job is not running + essential: true + - name: jobs.row_level_ttl.resume_failed + exported_name: jobs_row_level_ttl_resume_failed + labeled_name: 'jobs.resume{name: row_level_ttl, status: failed}' + description: Number of row_level_ttl jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric should remain at zero. Repeated errors means the Row Level TTL job is not deleting data. + essential: true + - name: jobs.row_level_ttl.rows_deleted + exported_name: jobs_row_level_ttl_rows_deleted + description: Number of rows deleted by the row level TTL job. + y_axis_label: num_rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Correlate this metric with the metric jobs.row_level_ttl.rows_selected to ensure all the rows that should be deleted are actually getting deleted. + essential: true + - name: jobs.row_level_ttl.rows_selected + exported_name: jobs_row_level_ttl_rows_selected + description: Number of rows selected for deletion by the row level TTL job. + y_axis_label: num_rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Correlate this metric with the metric jobs.row_level_ttl.rows_deleted to ensure all the rows that should be deleted are actually getting deleted. + essential: true + - name: jobs.row_level_ttl.select_duration + exported_name: jobs_row_level_ttl_select_duration + description: Duration for select requests during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.span_total_duration + exported_name: jobs_row_level_ttl_span_total_duration + description: Duration for processing a span during row level TTL. + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.total_expired_rows + exported_name: jobs_row_level_ttl_total_expired_rows + description: Approximate number of rows that have expired the TTL on the TTL table. + y_axis_label: total_expired_rows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: jobs.row_level_ttl.total_rows + exported_name: jobs_row_level_ttl_total_rows + description: Approximate number of rows on the TTL table. + y_axis_label: total_rows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: See Description. + essential: true + - name: schedules.scheduled-row-level-ttl-executor.failed + exported_name: schedules_scheduled_row_level_ttl_executor_failed + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: failed}' + description: Number of scheduled-row-level-ttl-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created. + essential: true + - name: UNSET + metrics: + - name: auth.cert.conn.latency + exported_name: auth_cert_conn_latency + description: Latency to establish and authenticate a SQL connection using certificate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.gss.conn.latency + exported_name: auth_gss_conn_latency + description: Latency to establish and authenticate a SQL connection using GSS + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.jwt.conn.latency + exported_name: auth_jwt_conn_latency + description: Latency to establish and authenticate a SQL connection using JWT Token + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.ldap.conn.latency + exported_name: auth_ldap_conn_latency + description: Latency to establish and authenticate a SQL connection using LDAP + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.password.conn.latency + exported_name: auth_password_conn_latency + description: Latency to establish and authenticate a SQL connection using password + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: auth.scram.conn.latency + exported_name: auth_scram_conn_latency + description: Latency to establish and authenticate a SQL connection using SCRAM + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: backup.last-failed-time.kms-inaccessible + exported_name: backup_last_failed_time_kms_inaccessible + description: The unix timestamp of the most recent failure of backup due to errKMSInaccessible by a backup specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: changefeed.admit_latency + exported_name: changefeed_admit_latency + description: 'Event admission latency: a difference between event MVCC timestamp and the time it was admitted into changefeed pipeline; Note: this metric includes the time spent waiting until event can be processed due to backpressure or time spent resolving schema descriptors. Also note, this metric excludes latency during backfill' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.aggregator_progress + exported_name: changefeed_aggregator_progress + description: The earliest timestamp up to which any aggregator is guaranteed to have emitted all values for + y_axis_label: Unix Timestamp Nanoseconds + type: GAUGE + unit: TIMESTAMP_NS + aggregation: AVG + derivative: NONE + - name: changefeed.backfill_count + exported_name: changefeed_backfill_count + description: Number of changefeeds currently executing backfill + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.backfill_pending_ranges + exported_name: changefeed_backfill_pending_ranges + description: Number of ranges in an ongoing backfill that are yet to be fully emitted + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.batch_reduction_count + exported_name: changefeed_batch_reduction_count + description: Number of times a changefeed aggregator node attempted to reduce the size of message batches it emitted to the sink + y_axis_label: Batch Size Reductions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem + exported_name: changefeed_buffer_entries_allocated_mem + description: Current quota pool memory allocation + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem.aggregator + exported_name: changefeed_buffer_entries_allocated_mem_aggregator + description: Current quota pool memory allocation - between the kvfeed and the sink + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.allocated_mem.rangefeed + exported_name: changefeed_buffer_entries_allocated_mem_rangefeed + description: Current quota pool memory allocation - between the rangefeed and the kvfeed + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.buffer_entries.flush + exported_name: changefeed_buffer_entries_flush + description: Number of flush elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.flush.aggregator + exported_name: changefeed_buffer_entries_flush_aggregator + description: Number of flush elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.flush.rangefeed + exported_name: changefeed_buffer_entries_flush_rangefeed + description: Number of flush elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in + exported_name: changefeed_buffer_entries_in + description: Total entries entering the buffer between raft and changefeed sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in.aggregator + exported_name: changefeed_buffer_entries_in_aggregator + description: Total entries entering the buffer between raft and changefeed sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.in.rangefeed + exported_name: changefeed_buffer_entries_in_rangefeed + description: Total entries entering the buffer between raft and changefeed sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv + exported_name: changefeed_buffer_entries_kv + description: Number of kv elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv.aggregator + exported_name: changefeed_buffer_entries_kv_aggregator + description: Number of kv elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.kv.rangefeed + exported_name: changefeed_buffer_entries_kv_rangefeed + description: Number of kv elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out + exported_name: changefeed_buffer_entries_out + description: Total entries leaving the buffer between raft and changefeed sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out.aggregator + exported_name: changefeed_buffer_entries_out_aggregator + description: Total entries leaving the buffer between raft and changefeed sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.out.rangefeed + exported_name: changefeed_buffer_entries_out_rangefeed + description: Total entries leaving the buffer between raft and changefeed sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released + exported_name: changefeed_buffer_entries_released + description: Total entries processed, emitted and acknowledged by the sinks + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released.aggregator + exported_name: changefeed_buffer_entries_released_aggregator + description: Total entries processed, emitted and acknowledged by the sinks - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.released.rangefeed + exported_name: changefeed_buffer_entries_released_rangefeed + description: Total entries processed, emitted and acknowledged by the sinks - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved + exported_name: changefeed_buffer_entries_resolved + description: Number of resolved elements added to the buffer + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved.aggregator + exported_name: changefeed_buffer_entries_resolved_aggregator + description: Number of resolved elements added to the buffer - between the kvfeed and the sink + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries.resolved.rangefeed + exported_name: changefeed_buffer_entries_resolved_rangefeed + description: Number of resolved elements added to the buffer - between the rangefeed and the kvfeed + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired + exported_name: changefeed_buffer_entries_mem_acquired + description: Total amount of memory acquired for entries as they enter the system + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired.aggregator + exported_name: changefeed_buffer_entries_mem_acquired_aggregator + description: Total amount of memory acquired for entries as they enter the system - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.acquired.rangefeed + exported_name: changefeed_buffer_entries_mem_acquired_rangefeed + description: Total amount of memory acquired for entries as they enter the system - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released + exported_name: changefeed_buffer_entries_mem_released + description: Total amount of memory released by the entries after they have been emitted + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released.aggregator + exported_name: changefeed_buffer_entries_mem_released_aggregator + description: Total amount of memory released by the entries after they have been emitted - between the kvfeed and the sink + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_entries_mem.released.rangefeed + exported_name: changefeed_buffer_entries_mem_released_rangefeed + description: Total amount of memory released by the entries after they have been emitted - between the rangefeed and the kvfeed + y_axis_label: Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos + exported_name: changefeed_buffer_pushback_nanos + description: Total time spent waiting while the buffer was full + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos.aggregator + exported_name: changefeed_buffer_pushback_nanos_aggregator + description: Total time spent waiting while the buffer was full - between the kvfeed and the sink + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.buffer_pushback_nanos.rangefeed + exported_name: changefeed_buffer_pushback_nanos_rangefeed + description: Total time spent waiting while the buffer was full - between the rangefeed and the kvfeed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.bytes.messages_pushback_nanos + exported_name: changefeed_bytes_messages_pushback_nanos + description: Total time spent throttled for bytes quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.checkpoint.create_nanos + exported_name: changefeed_checkpoint_create_nanos + description: Time it takes to create a changefeed checkpoint + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.span_count + exported_name: changefeed_checkpoint_span_count + description: Number of spans in a changefeed checkpoint + y_axis_label: Spans + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.timestamp_count + exported_name: changefeed_checkpoint_timestamp_count + description: Number of unique timestamps in a changefeed checkpoint + y_axis_label: Timestamps + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint.total_bytes + exported_name: changefeed_checkpoint_total_bytes + description: Total size of a changefeed checkpoint + y_axis_label: Bytes + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint_hist_nanos + exported_name: changefeed_checkpoint_hist_nanos + description: Time spent checkpointing changefeed progress + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.checkpoint_progress + exported_name: changefeed_checkpoint_progress + description: The earliest timestamp of any changefeed's persisted checkpoint (values prior to this timestamp will never need to be re-emitted) + y_axis_label: Unix Timestamp Nanoseconds + type: GAUGE + unit: TIMESTAMP_NS + aggregation: AVG + derivative: NONE + - name: changefeed.cloudstorage_buffered_bytes + exported_name: changefeed_cloudstorage_buffered_bytes + description: The number of bytes buffered in cloudstorage sink files which have not been emitted yet + y_axis_label: Bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.emitted_batch_sizes + exported_name: changefeed_emitted_batch_sizes + description: Size of batches emitted emitted by all feeds + y_axis_label: Number of Messages in Batch + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.filtered_messages + exported_name: changefeed_filtered_messages + description: Messages filtered out by all feeds. This count does not include the number of messages that may be filtered due to the range constraints. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flush.messages_pushback_nanos + exported_name: changefeed_flush_messages_pushback_nanos + description: Total time spent throttled for flush quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flush_hist_nanos + exported_name: changefeed_flush_hist_nanos + description: Time spent flushing messages across all changefeeds + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.flushed_bytes + exported_name: changefeed_flushed_bytes + description: Bytes emitted by all feeds; maybe different from changefeed.emitted_bytes when compression is enabled + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.flushes + exported_name: changefeed_flushes + description: Total flushes across all feeds + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.forwarded_resolved_messages + exported_name: changefeed_forwarded_resolved_messages + description: Resolved timestamps forwarded from the change aggregator to the change frontier + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.frontier_updates + exported_name: changefeed_frontier_updates + description: Number of change frontier updates across all feeds + y_axis_label: Updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.internal_retry_message_count + exported_name: changefeed_internal_retry_message_count + description: Number of messages for which an attempt to retry them within an aggregator node was made + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.kafka_throttling_hist_nanos + exported_name: changefeed_kafka_throttling_hist_nanos + description: Time spent in throttling due to exceeding kafka quota + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.lagging_ranges + exported_name: changefeed_lagging_ranges + description: The number of ranges considered to be lagging behind + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.max_behind_nanos + exported_name: changefeed_max_behind_nanos + description: The most any changefeed's persisted checkpoint is behind the present + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.message_size_hist + exported_name: changefeed_message_size_hist + description: Message size histogram + y_axis_label: Bytes + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: changefeed.messages.messages_pushback_nanos + exported_name: changefeed_messages_messages_pushback_nanos + description: Total time spent throttled for messages quota + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.network.bytes_in + exported_name: changefeed_network_bytes_in + description: The number of bytes received from the network by changefeeds + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.network.bytes_out + exported_name: changefeed_network_bytes_out + description: The number of bytes sent over the network by changefeeds + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.nprocs_consume_event_nanos + exported_name: changefeed_nprocs_consume_event_nanos + description: Total time spent waiting to add an event to the parallel consumer + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.nprocs_flush_nanos + exported_name: changefeed_nprocs_flush_nanos + description: Total time spent idle waiting for the parallel consumer to flush + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.nprocs_in_flight_count + exported_name: changefeed_nprocs_in_flight_count + description: Number of buffered events in the parallel consumer + y_axis_label: Count of Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_in_flight_keys + exported_name: changefeed_parallel_io_in_flight_keys + description: The number of keys currently in-flight which may contend with batches pending to be emitted + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_pending_rows + exported_name: changefeed_parallel_io_pending_rows + description: Number of rows which are blocked from being sent due to conflicting in-flight keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_queue_nanos + exported_name: changefeed_parallel_io_queue_nanos + description: Time that outgoing requests to the sink spend waiting in a queue due to in-flight requests with conflicting keys + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.parallel_io_result_queue_nanos + exported_name: changefeed_parallel_io_result_queue_nanos + description: Time that incoming results from the sink spend waiting in parallel io emitter before they are acknowledged by the changefeed + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.queue_time_nanos + exported_name: changefeed_queue_time_nanos + description: Time KV event spent waiting to be processed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schema_registry.registrations + exported_name: changefeed_schema_registry_registrations + description: Number of registration attempts with the schema registry + y_axis_label: Registrations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schema_registry.retry_count + exported_name: changefeed_schema_registry_retry_count + description: Number of retries encountered when sending requests to the schema registry + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schemafeed.table_history_scans + exported_name: changefeed_schemafeed_table_history_scans + description: The number of table history scans during polling + y_axis_label: Counts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.schemafeed.table_metadata_nanos + exported_name: changefeed_schemafeed_table_metadata_nanos + description: Time blocked while verifying table metadata histories + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.sink_batch_hist_nanos + exported_name: changefeed_sink_batch_hist_nanos + description: Time spent batched in the sink buffer before being flushed and acknowledged + y_axis_label: Changefeeds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.sink_errors + exported_name: changefeed_sink_errors + description: Number of changefeed errors caused by the sink + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.sink_io_inflight + exported_name: changefeed_sink_io_inflight + description: The number of keys currently inflight as IO requests being sent to the sink + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.size_based_flushes + exported_name: changefeed_size_based_flushes + description: Total size based flushes across all feeds + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.stage.checkpoint_job_progress.latency + exported_name: changefeed_stage_checkpoint_job_progress_latency + description: 'Latency of the changefeed stage: checkpointing job progress' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.downstream_client_send.latency + exported_name: changefeed_stage_downstream_client_send_latency + description: 'Latency of the changefeed stage: flushing messages from the sink''s client to its downstream. This includes sends that failed for most but not all sinks.' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.emit_row.latency + exported_name: changefeed_stage_emit_row_latency + description: 'Latency of the changefeed stage: emitting row to sink' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.encode.latency + exported_name: changefeed_stage_encode_latency + description: 'Latency of the changefeed stage: encoding data' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.kv_feed_buffer.latency + exported_name: changefeed_stage_kv_feed_buffer_latency + description: 'Latency of the changefeed stage: waiting to buffer kv events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.kv_feed_wait_for_table_event.latency + exported_name: changefeed_stage_kv_feed_wait_for_table_event_latency + description: 'Latency of the changefeed stage: waiting for a table schema event to join to the kv event' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.rangefeed_buffer_checkpoint.latency + exported_name: changefeed_stage_rangefeed_buffer_checkpoint_latency + description: 'Latency of the changefeed stage: buffering rangefeed checkpoint events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.stage.rangefeed_buffer_value.latency + exported_name: changefeed_stage_rangefeed_buffer_value_latency + description: 'Latency of the changefeed stage: buffering rangefeed value events' + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.total_ranges + exported_name: changefeed_total_ranges + description: The total number of ranges being watched by changefeed aggregators + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: changefeed.usage.error_count + exported_name: changefeed_usage_error_count + description: Count of errors encountered while generating usage metrics for changefeeds + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: changefeed.usage.query_duration + exported_name: changefeed_usage_query_duration + description: Time taken by the queries used to generate usage metrics for changefeeds + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: changefeed.usage.table_bytes + exported_name: changefeed_usage_table_bytes + description: Aggregated number of bytes of data per table watched by changefeeds + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: clock-offset.medianabsdevnanos + exported_name: clock_offset_medianabsdevnanos + description: Median Absolute Deviation (MAD) with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: clock-offset.mediannanos + exported_name: clock_offset_mediannanos + description: Median clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: clock-offset.stddevnanos + exported_name: clock_offset_stddevnanos + description: Stddev clock offset with other nodes + y_axis_label: Clock Offset + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: cloud.conns_opened + exported_name: cloud_conns_opened + description: HTTP connections opened by cloud operations + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.conns_reused + exported_name: cloud_conns_reused + description: HTTP connections reused by cloud operations + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.listing_results + exported_name: cloud_listing_results + description: Listing results by all cloud operations + y_axis_label: Results + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.listings + exported_name: cloud_listings + description: Listing operations by all cloud operations + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.open_readers + exported_name: cloud_open_readers + description: Currently open readers for cloud IO + y_axis_label: Readers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: cloud.open_writers + exported_name: cloud_open_writers + description: Currently open writers for cloud IO + y_axis_label: Writers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: cloud.read_bytes + exported_name: cloud_read_bytes + description: Bytes read from all cloud operations + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.readers_opened + exported_name: cloud_readers_opened + description: Readers opened by all cloud operations + y_axis_label: Files + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.tls_handshakes + exported_name: cloud_tls_handshakes + description: TLS handshakes done by cloud operations + y_axis_label: Handshakes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.write_bytes + exported_name: cloud_write_bytes + description: Bytes written by all cloud operations + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cloud.writers_opened + exported_name: cloud_writers_opened + description: Writers opened by all cloud operations + y_axis_label: files + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: cluster.preserve-downgrade-option.last-updated + exported_name: cluster_preserve_downgrade_option_last_updated + description: Unix timestamp of last updated time for cluster.preserve_downgrade_option + y_axis_label: Timestamp + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: distsender.batch_requests.cross_region.bytes + exported_name: distsender_batch_requests_cross_region_bytes + description: "Total byte count of replica-addressed batch requests processed cross\n\t\tregion when region tiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_requests.cross_zone.bytes + exported_name: distsender_batch_requests_cross_zone_bytes + description: "Total byte count of replica-addressed batch requests processed cross\n\t\tzone within the same region when zone tiers are configured. If region tiers\n\t\tare not set, it is assumed to be within the same region. To ensure accurate\n\t\tmonitoring of cross-zone data transfer, region and zone tiers should be\n\t\tconsistently configured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_requests.replica_addressed.bytes + exported_name: distsender_batch_requests_replica_addressed_bytes + description: Total byte count of replica-addressed batch requests processed + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.cross_region.bytes + exported_name: distsender_batch_responses_cross_region_bytes + description: "Total byte count of replica-addressed batch responses received cross\n\t\tregion when region tiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.cross_zone.bytes + exported_name: distsender_batch_responses_cross_zone_bytes + description: "Total byte count of replica-addressed batch responses received cross\n\t\tzone within the same region when zone tiers are configured. If region tiers\n\t\tare not set, it is assumed to be within the same region. To ensure accurate\n\t\tmonitoring of cross-zone data transfer, region and zone tiers should be\n\t\tconsistently configured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batch_responses.replica_addressed.bytes + exported_name: distsender_batch_responses_replica_addressed_bytes + description: Total byte count of replica-addressed batch responses received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches + exported_name: distsender_batches + description: Number of batches processed + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.in_progress + exported_name: distsender_batches_async_in_progress + description: Number of partial batches currently being executed asynchronously + y_axis_label: Partial Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.batches.async.sent + exported_name: distsender_batches_async_sent + description: Number of partial batches sent asynchronously + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.throttled + exported_name: distsender_batches_async_throttled + description: Number of partial batches not sent asynchronously due to throttling + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.async.throttled_cumulative_duration_nanos + exported_name: distsender_batches_async_throttled_cumulative_duration_nanos + description: Cumulative duration of partial batches being throttled (in nanoseconds) + y_axis_label: Throttled Duration + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.batches.partial + exported_name: distsender_batches_partial + description: Number of partial batches processed after being divided on range boundaries + y_axis_label: Partial Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.count + exported_name: distsender_circuit_breaker_replicas_count + description: Number of replicas currently tracked by DistSender circuit breakers + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.probes.failure + exported_name: distsender_circuit_breaker_replicas_probes_failure + description: Cumulative number of failed DistSender replica circuit breaker probes + y_axis_label: Probes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.probes.running + exported_name: distsender_circuit_breaker_replicas_probes_running + description: Number of currently running DistSender replica circuit breaker probes + y_axis_label: Probes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.probes.success + exported_name: distsender_circuit_breaker_replicas_probes_success + description: Cumulative number of successful DistSender replica circuit breaker probes + y_axis_label: Probes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.requests.cancelled + exported_name: distsender_circuit_breaker_replicas_requests_cancelled + description: Cumulative number of requests cancelled when DistSender replica circuit breakers trip + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.requests.rejected + exported_name: distsender_circuit_breaker_replicas_requests_rejected + description: Cumulative number of requests rejected by tripped DistSender replica circuit breakers + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.circuit_breaker.replicas.tripped + exported_name: distsender_circuit_breaker_replicas_tripped + description: Number of DistSender replica circuit breakers currently tripped + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.circuit_breaker.replicas.tripped_events + exported_name: distsender_circuit_breaker_replicas_tripped_events + description: Cumulative number of DistSender replica circuit breakers tripped over time + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.errors.inleasetransferbackoffs + exported_name: distsender_errors_inleasetransferbackoffs + description: Number of times backed off due to NotLeaseHolderErrors during lease transfer + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.catchup_ranges + exported_name: distsender_rangefeed_catchup_ranges + description: | + Number of ranges in catchup mode + + This counts the number of ranges with an active rangefeed that are performing catchup scan. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.catchup_ranges_waiting_client_side + exported_name: distsender_rangefeed_catchup_ranges_waiting_client_side + description: Number of ranges waiting on the client-side limiter to perform catchup scans + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.error_catchup_ranges + exported_name: distsender_rangefeed_error_catchup_ranges + description: Number of ranges in catchup mode which experienced an error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.local_ranges + exported_name: distsender_rangefeed_local_ranges + description: Number of ranges connected to local node. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangefeed.restart_ranges + exported_name: distsender_rangefeed_restart_ranges + description: Number of ranges that were restarted due to transient errors + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.logical_ops_missing + exported_name: distsender_rangefeed_retry_logical_ops_missing + description: Number of ranges that encountered retryable LOGICAL_OPS_MISSING error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.manual_range_split + exported_name: distsender_rangefeed_retry_manual_range_split + description: Number of ranges that encountered retryable MANUAL_RANGE_SPLIT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.no_leaseholder + exported_name: distsender_rangefeed_retry_no_leaseholder + description: Number of ranges that encountered retryable NO_LEASEHOLDER error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.node_not_found + exported_name: distsender_rangefeed_retry_node_not_found + description: Number of ranges that encountered retryable node not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.raft_snapshot + exported_name: distsender_rangefeed_retry_raft_snapshot + description: Number of ranges that encountered retryable RAFT_SNAPSHOT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_key_mismatch + exported_name: distsender_rangefeed_retry_range_key_mismatch + description: Number of ranges that encountered retryable range key mismatch error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_merged + exported_name: distsender_rangefeed_retry_range_merged + description: Number of ranges that encountered retryable RANGE_MERGED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_not_found + exported_name: distsender_rangefeed_retry_range_not_found + description: Number of ranges that encountered retryable range not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.range_split + exported_name: distsender_rangefeed_retry_range_split + description: Number of ranges that encountered retryable RANGE_SPLIT error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.rangefeed_closed + exported_name: distsender_rangefeed_retry_rangefeed_closed + description: Number of ranges that encountered retryable RANGEFEED_CLOSED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.replica_removed + exported_name: distsender_rangefeed_retry_replica_removed + description: Number of ranges that encountered retryable REPLICA_REMOVED error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.send + exported_name: distsender_rangefeed_retry_send + description: Number of ranges that encountered retryable send error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.slow_consumer + exported_name: distsender_rangefeed_retry_slow_consumer + description: Number of ranges that encountered retryable SLOW_CONSUMER error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.store_not_found + exported_name: distsender_rangefeed_retry_store_not_found + description: Number of ranges that encountered retryable store not found error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.retry.unknown + exported_name: distsender_rangefeed_retry_unknown + description: Number of ranges that encountered retryable unknown error + y_axis_label: Ranges + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rangefeed.total_ranges + exported_name: distsender_rangefeed_total_ranges + description: | + Number of ranges executing rangefeed + + This counts the number of ranges with an active rangefeed. + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: distsender.rangelookups + exported_name: distsender_rangelookups + description: Number of range lookups + y_axis_label: Range Lookups + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.addsstable.sent + exported_name: distsender_rpc_addsstable_sent + description: |- + Number of AddSSTable requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminchangereplicas.sent + exported_name: distsender_rpc_adminchangereplicas_sent + description: |- + Number of AdminChangeReplicas requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminmerge.sent + exported_name: distsender_rpc_adminmerge_sent + description: |- + Number of AdminMerge requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminrelocaterange.sent + exported_name: distsender_rpc_adminrelocaterange_sent + description: |- + Number of AdminRelocateRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminscatter.sent + exported_name: distsender_rpc_adminscatter_sent + description: |- + Number of AdminScatter requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminsplit.sent + exported_name: distsender_rpc_adminsplit_sent + description: |- + Number of AdminSplit requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.admintransferlease.sent + exported_name: distsender_rpc_admintransferlease_sent + description: |- + Number of AdminTransferLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.adminunsplit.sent + exported_name: distsender_rpc_adminunsplit_sent + description: |- + Number of AdminUnsplit requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.barrier.sent + exported_name: distsender_rpc_barrier_sent + description: |- + Number of Barrier requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.checkconsistency.sent + exported_name: distsender_rpc_checkconsistency_sent + description: |- + Number of CheckConsistency requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.clearrange.sent + exported_name: distsender_rpc_clearrange_sent + description: |- + Number of ClearRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.computechecksum.sent + exported_name: distsender_rpc_computechecksum_sent + description: |- + Number of ComputeChecksum requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.conditionalput.sent + exported_name: distsender_rpc_conditionalput_sent + description: |- + Number of ConditionalPut requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.delete.sent + exported_name: distsender_rpc_delete_sent + description: |- + Number of Delete requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.deleterange.sent + exported_name: distsender_rpc_deleterange_sent + description: |- + Number of DeleteRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.endtxn.sent + exported_name: distsender_rpc_endtxn_sent + description: |- + Number of EndTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.ambiguousresulterrtype + exported_name: distsender_rpc_err_ambiguousresulterrtype + description: | + Number of AmbiguousResultErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.batchtimestampbeforegcerrtype + exported_name: distsender_rpc_err_batchtimestampbeforegcerrtype + description: | + Number of BatchTimestampBeforeGCErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.communicationerrtype + exported_name: distsender_rpc_err_communicationerrtype + description: | + Number of CommunicationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.conditionfailederrtype + exported_name: distsender_rpc_err_conditionfailederrtype + description: | + Number of ConditionFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(0) + exported_name: distsender_rpc_err_errordetailtype_0_ + description: | + Number of ErrorDetailType(0) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(15) + exported_name: distsender_rpc_err_errordetailtype_15_ + description: | + Number of ErrorDetailType(15) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(19) + exported_name: distsender_rpc_err_errordetailtype_19_ + description: | + Number of ErrorDetailType(19) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(20) + exported_name: distsender_rpc_err_errordetailtype_20_ + description: | + Number of ErrorDetailType(20) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(21) + exported_name: distsender_rpc_err_errordetailtype_21_ + description: | + Number of ErrorDetailType(21) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(23) + exported_name: distsender_rpc_err_errordetailtype_23_ + description: | + Number of ErrorDetailType(23) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(24) + exported_name: distsender_rpc_err_errordetailtype_24_ + description: | + Number of ErrorDetailType(24) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(29) + exported_name: distsender_rpc_err_errordetailtype_29_ + description: | + Number of ErrorDetailType(29) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(30) + exported_name: distsender_rpc_err_errordetailtype_30_ + description: | + Number of ErrorDetailType(30) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.errordetailtype(33) + exported_name: distsender_rpc_err_errordetailtype_33_ + description: | + Number of ErrorDetailType(33) errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.exclusionviolationerrtype + exported_name: distsender_rpc_err_exclusionviolationerrtype + description: | + Number of ExclusionViolationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.indeterminatecommiterrtype + exported_name: distsender_rpc_err_indeterminatecommiterrtype + description: | + Number of IndeterminateCommitErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.integeroverflowerrtype + exported_name: distsender_rpc_err_integeroverflowerrtype + description: | + Number of IntegerOverflowErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.intentmissingerrtype + exported_name: distsender_rpc_err_intentmissingerrtype + description: | + Number of IntentMissingErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.internalerrtype + exported_name: distsender_rpc_err_internalerrtype + description: | + Number of InternalErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.invalidleaseerrtype + exported_name: distsender_rpc_err_invalidleaseerrtype + description: | + Number of InvalidLeaseErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.leaserejectederrtype + exported_name: distsender_rpc_err_leaserejectederrtype + description: | + Number of LeaseRejectedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.lockconflicterrtype + exported_name: distsender_rpc_err_lockconflicterrtype + description: | + Number of LockConflictErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mergeinprogresserrtype + exported_name: distsender_rpc_err_mergeinprogresserrtype + description: | + Number of MergeInProgressErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mintimestampboundunsatisfiableerrtype + exported_name: distsender_rpc_err_mintimestampboundunsatisfiableerrtype + description: | + Number of MinTimestampBoundUnsatisfiableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.mvcchistorymutationerrtype + exported_name: distsender_rpc_err_mvcchistorymutationerrtype + description: | + Number of MVCCHistoryMutationErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.nodeunavailableerrtype + exported_name: distsender_rpc_err_nodeunavailableerrtype + description: | + Number of NodeUnavailableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.notleaseholdererrtype + exported_name: distsender_rpc_err_notleaseholdererrtype + description: | + Number of NotLeaseHolderErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.oprequirestxnerrtype + exported_name: distsender_rpc_err_oprequirestxnerrtype + description: | + Number of OpRequiresTxnErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.optimisticevalconflictserrtype + exported_name: distsender_rpc_err_optimisticevalconflictserrtype + description: | + Number of OptimisticEvalConflictsErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.proxyfailederrtype + exported_name: distsender_rpc_err_proxyfailederrtype + description: | + Number of ProxyFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.raftgroupdeletederrtype + exported_name: distsender_rpc_err_raftgroupdeletederrtype + description: | + Number of RaftGroupDeletedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangefeedretryerrtype + exported_name: distsender_rpc_err_rangefeedretryerrtype + description: | + Number of RangeFeedRetryErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangekeymismatcherrtype + exported_name: distsender_rpc_err_rangekeymismatcherrtype + description: | + Number of RangeKeyMismatchErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.rangenotfounderrtype + exported_name: distsender_rpc_err_rangenotfounderrtype + description: | + Number of RangeNotFoundErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.readwithinuncertaintyintervalerrtype + exported_name: distsender_rpc_err_readwithinuncertaintyintervalerrtype + description: | + Number of ReadWithinUncertaintyIntervalErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.refreshfailederrtype + exported_name: distsender_rpc_err_refreshfailederrtype + description: | + Number of RefreshFailedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicacorruptionerrtype + exported_name: distsender_rpc_err_replicacorruptionerrtype + description: | + Number of ReplicaCorruptionErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicatooolderrtype + exported_name: distsender_rpc_err_replicatooolderrtype + description: | + Number of ReplicaTooOldErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.replicaunavailableerrtype + exported_name: distsender_rpc_err_replicaunavailableerrtype + description: | + Number of ReplicaUnavailableErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.storenotfounderrtype + exported_name: distsender_rpc_err_storenotfounderrtype + description: | + Number of StoreNotFoundErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionabortederrtype + exported_name: distsender_rpc_err_transactionabortederrtype + description: | + Number of TransactionAbortedErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionpusherrtype + exported_name: distsender_rpc_err_transactionpusherrtype + description: | + Number of TransactionPushErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionretryerrtype + exported_name: distsender_rpc_err_transactionretryerrtype + description: | + Number of TransactionRetryErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionretrywithprotorefresherrtype + exported_name: distsender_rpc_err_transactionretrywithprotorefresherrtype + description: | + Number of TransactionRetryWithProtoRefreshErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.transactionstatuserrtype + exported_name: distsender_rpc_err_transactionstatuserrtype + description: | + Number of TransactionStatusErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.txnalreadyencounterederrtype + exported_name: distsender_rpc_err_txnalreadyencounterederrtype + description: | + Number of TxnAlreadyEncounteredErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.unsupportedrequesterrtype + exported_name: distsender_rpc_err_unsupportedrequesterrtype + description: | + Number of UnsupportedRequestErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.writeintenterrtype + exported_name: distsender_rpc_err_writeintenterrtype + description: | + Number of WriteIntentErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.err.writetooolderrtype + exported_name: distsender_rpc_err_writetooolderrtype + description: | + Number of WriteTooOldErrType errors received replica-bound RPCs + + This counts how often error of the specified type was received back from replicas + as part of executing possibly range-spanning requests. Failures to reach the target + replica will be accounted for as 'roachpb.CommunicationErrType' and unclassified + errors as 'roachpb.InternalErrType'. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.excise.sent + exported_name: distsender_rpc_excise_sent + description: |- + Number of Excise requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.export.sent + exported_name: distsender_rpc_export_sent + description: |- + Number of Export requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.gc.sent + exported_name: distsender_rpc_gc_sent + description: |- + Number of GC requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.get.sent + exported_name: distsender_rpc_get_sent + description: |- + Number of Get requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.heartbeattxn.sent + exported_name: distsender_rpc_heartbeattxn_sent + description: |- + Number of HeartbeatTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.increment.sent + exported_name: distsender_rpc_increment_sent + description: |- + Number of Increment requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.isspanempty.sent + exported_name: distsender_rpc_isspanempty_sent + description: |- + Number of IsSpanEmpty requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.leaseinfo.sent + exported_name: distsender_rpc_leaseinfo_sent + description: |- + Number of LeaseInfo requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.linkexternalsstable.sent + exported_name: distsender_rpc_linkexternalsstable_sent + description: |- + Number of LinkExternalSSTable requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.merge.sent + exported_name: distsender_rpc_merge_sent + description: |- + Number of Merge requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.migrate.sent + exported_name: distsender_rpc_migrate_sent + description: |- + Number of Migrate requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.probe.sent + exported_name: distsender_rpc_probe_sent + description: |- + Number of Probe requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.err + exported_name: distsender_rpc_proxy_err + description: Number of attempts by a gateway to proxy a request which resulted in a failure. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.forward.err + exported_name: distsender_rpc_proxy_forward_err + description: Number of attempts on a follower replica to proxy a request which resulted in a failure. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.forward.sent + exported_name: distsender_rpc_proxy_forward_sent + description: Number of attempts on a follower replica to proxy a request to an unreachable leaseholder. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.proxy.sent + exported_name: distsender_rpc_proxy_sent + description: Number of attempts by a gateway to proxy a request to an unreachable leaseholder via a follower replica. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.pushtxn.sent + exported_name: distsender_rpc_pushtxn_sent + description: |- + Number of PushTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.put.sent + exported_name: distsender_rpc_put_sent + description: |- + Number of Put requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.queryintent.sent + exported_name: distsender_rpc_queryintent_sent + description: |- + Number of QueryIntent requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.querylocks.sent + exported_name: distsender_rpc_querylocks_sent + description: |- + Number of QueryLocks requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.queryresolvedtimestamp.sent + exported_name: distsender_rpc_queryresolvedtimestamp_sent + description: |- + Number of QueryResolvedTimestamp requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.querytxn.sent + exported_name: distsender_rpc_querytxn_sent + description: |- + Number of QueryTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.rangestats.sent + exported_name: distsender_rpc_rangestats_sent + description: |- + Number of RangeStats requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.recomputestats.sent + exported_name: distsender_rpc_recomputestats_sent + description: |- + Number of RecomputeStats requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.recovertxn.sent + exported_name: distsender_rpc_recovertxn_sent + description: |- + Number of RecoverTxn requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.refresh.sent + exported_name: distsender_rpc_refresh_sent + description: |- + Number of Refresh requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.refreshrange.sent + exported_name: distsender_rpc_refreshrange_sent + description: |- + Number of RefreshRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.requestlease.sent + exported_name: distsender_rpc_requestlease_sent + description: |- + Number of RequestLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.resolveintent.sent + exported_name: distsender_rpc_resolveintent_sent + description: |- + Number of ResolveIntent requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.resolveintentrange.sent + exported_name: distsender_rpc_resolveintentrange_sent + description: |- + Number of ResolveIntentRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.reversescan.sent + exported_name: distsender_rpc_reversescan_sent + description: |- + Number of ReverseScan requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.revertrange.sent + exported_name: distsender_rpc_revertrange_sent + description: |- + Number of RevertRange requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.scan.sent + exported_name: distsender_rpc_scan_sent + description: |- + Number of Scan requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.sent + exported_name: distsender_rpc_sent + description: Number of replica-addressed RPCs sent + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.sent.local + exported_name: distsender_rpc_sent_local + description: Number of replica-addressed RPCs sent through the local-server optimization + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.subsume.sent + exported_name: distsender_rpc_subsume_sent + description: |- + Number of Subsume requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.transferlease.sent + exported_name: distsender_rpc_transferlease_sent + description: |- + Number of TransferLease requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.truncatelog.sent + exported_name: distsender_rpc_truncatelog_sent + description: |- + Number of TruncateLog requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.rpc.writebatch.sent + exported_name: distsender_rpc_writebatch_sent + description: |- + Number of WriteBatch requests processed. + + This counts the requests in batches handed to DistSender, not the RPCs + sent to individual Ranges as a result. + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: distsender.slow.replicarpcs + exported_name: distsender_slow_replicarpcs + description: |- + Number of slow replica-bound RPCs. + + Note that this is not a good signal for KV health. The remote side of the + RPCs tracked here may experience contention, so an end user can easily + cause values for this metric to be emitted by leaving a transaction open + for a long time and contending with it using a second transaction. + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.adopt_iterations + exported_name: jobs_adopt_iterations + description: number of job-adopt iterations performed by the registry + y_axis_label: iterations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.currently_idle + exported_name: jobs_auto_config_env_runner_currently_idle + labeled_name: 'jobs{type: auto_config_env_runner, status: currently_idle}' + description: Number of auto_config_env_runner jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.currently_paused + exported_name: jobs_auto_config_env_runner_currently_paused + labeled_name: 'jobs{name: auto_config_env_runner, status: currently_paused}' + description: Number of auto_config_env_runner jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.currently_running + exported_name: jobs_auto_config_env_runner_currently_running + labeled_name: 'jobs{type: auto_config_env_runner, status: currently_running}' + description: Number of auto_config_env_runner jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.expired_pts_records + exported_name: jobs_auto_config_env_runner_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_env_runner}' + description: Number of expired protected timestamp records owned by auto_config_env_runner jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_completed + exported_name: jobs_auto_config_env_runner_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: completed}' + description: Number of auto_config_env_runner jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_failed + exported_name: jobs_auto_config_env_runner_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: failed}' + description: Number of auto_config_env_runner jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.fail_or_cancel_retry_error + exported_name: jobs_auto_config_env_runner_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_env_runner, status: retry_error}' + description: Number of auto_config_env_runner jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.protected_age_sec + exported_name: jobs_auto_config_env_runner_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_env_runner}' + description: The age of the oldest PTS record protected by auto_config_env_runner jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.protected_record_count + exported_name: jobs_auto_config_env_runner_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_env_runner}' + description: Number of protected timestamp records held by auto_config_env_runner jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_env_runner.resume_completed + exported_name: jobs_auto_config_env_runner_resume_completed + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: completed}' + description: Number of auto_config_env_runner jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.resume_failed + exported_name: jobs_auto_config_env_runner_resume_failed + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: failed}' + description: Number of auto_config_env_runner jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_env_runner.resume_retry_error + exported_name: jobs_auto_config_env_runner_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_env_runner, status: retry_error}' + description: Number of auto_config_env_runner jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.currently_idle + exported_name: jobs_auto_config_runner_currently_idle + labeled_name: 'jobs{type: auto_config_runner, status: currently_idle}' + description: Number of auto_config_runner jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.currently_paused + exported_name: jobs_auto_config_runner_currently_paused + labeled_name: 'jobs{name: auto_config_runner, status: currently_paused}' + description: Number of auto_config_runner jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.currently_running + exported_name: jobs_auto_config_runner_currently_running + labeled_name: 'jobs{type: auto_config_runner, status: currently_running}' + description: Number of auto_config_runner jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.expired_pts_records + exported_name: jobs_auto_config_runner_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_runner}' + description: Number of expired protected timestamp records owned by auto_config_runner jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_completed + exported_name: jobs_auto_config_runner_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: completed}' + description: Number of auto_config_runner jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_failed + exported_name: jobs_auto_config_runner_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: failed}' + description: Number of auto_config_runner jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.fail_or_cancel_retry_error + exported_name: jobs_auto_config_runner_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_runner, status: retry_error}' + description: Number of auto_config_runner jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.protected_age_sec + exported_name: jobs_auto_config_runner_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_runner}' + description: The age of the oldest PTS record protected by auto_config_runner jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.protected_record_count + exported_name: jobs_auto_config_runner_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_runner}' + description: Number of protected timestamp records held by auto_config_runner jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_runner.resume_completed + exported_name: jobs_auto_config_runner_resume_completed + labeled_name: 'jobs.resume{name: auto_config_runner, status: completed}' + description: Number of auto_config_runner jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.resume_failed + exported_name: jobs_auto_config_runner_resume_failed + labeled_name: 'jobs.resume{name: auto_config_runner, status: failed}' + description: Number of auto_config_runner jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_runner.resume_retry_error + exported_name: jobs_auto_config_runner_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_runner, status: retry_error}' + description: Number of auto_config_runner jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.currently_idle + exported_name: jobs_auto_config_task_currently_idle + labeled_name: 'jobs{type: auto_config_task, status: currently_idle}' + description: Number of auto_config_task jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.currently_paused + exported_name: jobs_auto_config_task_currently_paused + labeled_name: 'jobs{name: auto_config_task, status: currently_paused}' + description: Number of auto_config_task jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.currently_running + exported_name: jobs_auto_config_task_currently_running + labeled_name: 'jobs{type: auto_config_task, status: currently_running}' + description: Number of auto_config_task jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.expired_pts_records + exported_name: jobs_auto_config_task_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_config_task}' + description: Number of expired protected timestamp records owned by auto_config_task jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_completed + exported_name: jobs_auto_config_task_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: completed}' + description: Number of auto_config_task jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_failed + exported_name: jobs_auto_config_task_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: failed}' + description: Number of auto_config_task jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.fail_or_cancel_retry_error + exported_name: jobs_auto_config_task_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_config_task, status: retry_error}' + description: Number of auto_config_task jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.protected_age_sec + exported_name: jobs_auto_config_task_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_config_task}' + description: The age of the oldest PTS record protected by auto_config_task jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.protected_record_count + exported_name: jobs_auto_config_task_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_config_task}' + description: Number of protected timestamp records held by auto_config_task jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_config_task.resume_completed + exported_name: jobs_auto_config_task_resume_completed + labeled_name: 'jobs.resume{name: auto_config_task, status: completed}' + description: Number of auto_config_task jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.resume_failed + exported_name: jobs_auto_config_task_resume_failed + labeled_name: 'jobs.resume{name: auto_config_task, status: failed}' + description: Number of auto_config_task jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_config_task.resume_retry_error + exported_name: jobs_auto_config_task_resume_retry_error + labeled_name: 'jobs.resume{name: auto_config_task, status: retry_error}' + description: Number of auto_config_task jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.currently_idle + exported_name: jobs_auto_create_partial_stats_currently_idle + labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_idle}' + description: Number of auto_create_partial_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.currently_paused + exported_name: jobs_auto_create_partial_stats_currently_paused + labeled_name: 'jobs{name: auto_create_partial_stats, status: currently_paused}' + description: Number of auto_create_partial_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.currently_running + exported_name: jobs_auto_create_partial_stats_currently_running + labeled_name: 'jobs{type: auto_create_partial_stats, status: currently_running}' + description: Number of auto_create_partial_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.expired_pts_records + exported_name: jobs_auto_create_partial_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_create_partial_stats}' + description: Number of expired protected timestamp records owned by auto_create_partial_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_completed + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: completed}' + description: Number of auto_create_partial_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_failed + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: failed}' + description: Number of auto_create_partial_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.fail_or_cancel_retry_error + exported_name: jobs_auto_create_partial_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_create_partial_stats, status: retry_error}' + description: Number of auto_create_partial_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.protected_age_sec + exported_name: jobs_auto_create_partial_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_create_partial_stats}' + description: The age of the oldest PTS record protected by auto_create_partial_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.protected_record_count + exported_name: jobs_auto_create_partial_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_create_partial_stats}' + description: Number of protected timestamp records held by auto_create_partial_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_partial_stats.resume_completed + exported_name: jobs_auto_create_partial_stats_resume_completed + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: completed}' + description: Number of auto_create_partial_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.resume_failed + exported_name: jobs_auto_create_partial_stats_resume_failed + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: failed}' + description: Number of auto_create_partial_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_partial_stats.resume_retry_error + exported_name: jobs_auto_create_partial_stats_resume_retry_error + labeled_name: 'jobs.resume{name: auto_create_partial_stats, status: retry_error}' + description: Number of auto_create_partial_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.currently_idle + exported_name: jobs_auto_create_stats_currently_idle + labeled_name: 'jobs{type: auto_create_stats, status: currently_idle}' + description: Number of auto_create_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.expired_pts_records + exported_name: jobs_auto_create_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_create_stats}' + description: Number of expired protected timestamp records owned by auto_create_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_completed + exported_name: jobs_auto_create_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: completed}' + description: Number of auto_create_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_failed + exported_name: jobs_auto_create_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: failed}' + description: Number of auto_create_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.fail_or_cancel_retry_error + exported_name: jobs_auto_create_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_create_stats, status: retry_error}' + description: Number of auto_create_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.protected_age_sec + exported_name: jobs_auto_create_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_create_stats}' + description: The age of the oldest PTS record protected by auto_create_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.protected_record_count + exported_name: jobs_auto_create_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_create_stats}' + description: Number of protected timestamp records held by auto_create_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_create_stats.resume_completed + exported_name: jobs_auto_create_stats_resume_completed + labeled_name: 'jobs.resume{name: auto_create_stats, status: completed}' + description: Number of auto_create_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_create_stats.resume_retry_error + exported_name: jobs_auto_create_stats_resume_retry_error + labeled_name: 'jobs.resume{name: auto_create_stats, status: retry_error}' + description: Number of auto_create_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.currently_idle + exported_name: jobs_auto_schema_telemetry_currently_idle + labeled_name: 'jobs{type: auto_schema_telemetry, status: currently_idle}' + description: Number of auto_schema_telemetry jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.currently_paused + exported_name: jobs_auto_schema_telemetry_currently_paused + labeled_name: 'jobs{name: auto_schema_telemetry, status: currently_paused}' + description: Number of auto_schema_telemetry jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.currently_running + exported_name: jobs_auto_schema_telemetry_currently_running + labeled_name: 'jobs{type: auto_schema_telemetry, status: currently_running}' + description: Number of auto_schema_telemetry jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.expired_pts_records + exported_name: jobs_auto_schema_telemetry_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_schema_telemetry}' + description: Number of expired protected timestamp records owned by auto_schema_telemetry jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_completed + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: completed}' + description: Number of auto_schema_telemetry jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_failed + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: failed}' + description: Number of auto_schema_telemetry jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.fail_or_cancel_retry_error + exported_name: jobs_auto_schema_telemetry_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_schema_telemetry, status: retry_error}' + description: Number of auto_schema_telemetry jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.protected_age_sec + exported_name: jobs_auto_schema_telemetry_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_schema_telemetry}' + description: The age of the oldest PTS record protected by auto_schema_telemetry jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.protected_record_count + exported_name: jobs_auto_schema_telemetry_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_schema_telemetry}' + description: Number of protected timestamp records held by auto_schema_telemetry jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_schema_telemetry.resume_completed + exported_name: jobs_auto_schema_telemetry_resume_completed + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: completed}' + description: Number of auto_schema_telemetry jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.resume_failed + exported_name: jobs_auto_schema_telemetry_resume_failed + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: failed}' + description: Number of auto_schema_telemetry jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_schema_telemetry.resume_retry_error + exported_name: jobs_auto_schema_telemetry_resume_retry_error + labeled_name: 'jobs.resume{name: auto_schema_telemetry, status: retry_error}' + description: Number of auto_schema_telemetry jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.currently_idle + exported_name: jobs_auto_span_config_reconciliation_currently_idle + labeled_name: 'jobs{type: auto_span_config_reconciliation, status: currently_idle}' + description: Number of auto_span_config_reconciliation jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.currently_paused + exported_name: jobs_auto_span_config_reconciliation_currently_paused + labeled_name: 'jobs{name: auto_span_config_reconciliation, status: currently_paused}' + description: Number of auto_span_config_reconciliation jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.currently_running + exported_name: jobs_auto_span_config_reconciliation_currently_running + labeled_name: 'jobs{type: auto_span_config_reconciliation, status: currently_running}' + description: Number of auto_span_config_reconciliation jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.expired_pts_records + exported_name: jobs_auto_span_config_reconciliation_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_span_config_reconciliation}' + description: Number of expired protected timestamp records owned by auto_span_config_reconciliation jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_completed + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: completed}' + description: Number of auto_span_config_reconciliation jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_failed + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: failed}' + description: Number of auto_span_config_reconciliation jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.fail_or_cancel_retry_error + exported_name: jobs_auto_span_config_reconciliation_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_span_config_reconciliation, status: retry_error}' + description: Number of auto_span_config_reconciliation jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.protected_age_sec + exported_name: jobs_auto_span_config_reconciliation_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_span_config_reconciliation}' + description: The age of the oldest PTS record protected by auto_span_config_reconciliation jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.protected_record_count + exported_name: jobs_auto_span_config_reconciliation_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_span_config_reconciliation}' + description: Number of protected timestamp records held by auto_span_config_reconciliation jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_span_config_reconciliation.resume_completed + exported_name: jobs_auto_span_config_reconciliation_resume_completed + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: completed}' + description: Number of auto_span_config_reconciliation jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.resume_failed + exported_name: jobs_auto_span_config_reconciliation_resume_failed + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: failed}' + description: Number of auto_span_config_reconciliation jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_span_config_reconciliation.resume_retry_error + exported_name: jobs_auto_span_config_reconciliation_resume_retry_error + labeled_name: 'jobs.resume{name: auto_span_config_reconciliation, status: retry_error}' + description: Number of auto_span_config_reconciliation jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.currently_idle + exported_name: jobs_auto_sql_stats_compaction_currently_idle + labeled_name: 'jobs{type: auto_sql_stats_compaction, status: currently_idle}' + description: Number of auto_sql_stats_compaction jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.currently_paused + exported_name: jobs_auto_sql_stats_compaction_currently_paused + labeled_name: 'jobs{name: auto_sql_stats_compaction, status: currently_paused}' + description: Number of auto_sql_stats_compaction jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.currently_running + exported_name: jobs_auto_sql_stats_compaction_currently_running + labeled_name: 'jobs{type: auto_sql_stats_compaction, status: currently_running}' + description: Number of auto_sql_stats_compaction jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.expired_pts_records + exported_name: jobs_auto_sql_stats_compaction_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_sql_stats_compaction}' + description: Number of expired protected timestamp records owned by auto_sql_stats_compaction jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_completed + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: completed}' + description: Number of auto_sql_stats_compaction jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_failed + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: failed}' + description: Number of auto_sql_stats_compaction jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.fail_or_cancel_retry_error + exported_name: jobs_auto_sql_stats_compaction_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_sql_stats_compaction, status: retry_error}' + description: Number of auto_sql_stats_compaction jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.protected_age_sec + exported_name: jobs_auto_sql_stats_compaction_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_sql_stats_compaction}' + description: The age of the oldest PTS record protected by auto_sql_stats_compaction jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.protected_record_count + exported_name: jobs_auto_sql_stats_compaction_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_sql_stats_compaction}' + description: Number of protected timestamp records held by auto_sql_stats_compaction jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_sql_stats_compaction.resume_completed + exported_name: jobs_auto_sql_stats_compaction_resume_completed + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: completed}' + description: Number of auto_sql_stats_compaction jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.resume_failed + exported_name: jobs_auto_sql_stats_compaction_resume_failed + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: failed}' + description: Number of auto_sql_stats_compaction jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_sql_stats_compaction.resume_retry_error + exported_name: jobs_auto_sql_stats_compaction_resume_retry_error + labeled_name: 'jobs.resume{name: auto_sql_stats_compaction, status: retry_error}' + description: Number of auto_sql_stats_compaction jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.currently_idle + exported_name: jobs_auto_update_sql_activity_currently_idle + labeled_name: 'jobs{type: auto_update_sql_activity, status: currently_idle}' + description: Number of auto_update_sql_activity jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.currently_paused + exported_name: jobs_auto_update_sql_activity_currently_paused + labeled_name: 'jobs{name: auto_update_sql_activity, status: currently_paused}' + description: Number of auto_update_sql_activity jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.currently_running + exported_name: jobs_auto_update_sql_activity_currently_running + labeled_name: 'jobs{type: auto_update_sql_activity, status: currently_running}' + description: Number of auto_update_sql_activity jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.expired_pts_records + exported_name: jobs_auto_update_sql_activity_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: auto_update_sql_activity}' + description: Number of expired protected timestamp records owned by auto_update_sql_activity jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_completed + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: completed}' + description: Number of auto_update_sql_activity jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_failed + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: failed}' + description: Number of auto_update_sql_activity jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.fail_or_cancel_retry_error + exported_name: jobs_auto_update_sql_activity_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: auto_update_sql_activity, status: retry_error}' + description: Number of auto_update_sql_activity jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.protected_age_sec + exported_name: jobs_auto_update_sql_activity_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: auto_update_sql_activity}' + description: The age of the oldest PTS record protected by auto_update_sql_activity jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.protected_record_count + exported_name: jobs_auto_update_sql_activity_protected_record_count + labeled_name: 'jobs.protected_record_count{type: auto_update_sql_activity}' + description: Number of protected timestamp records held by auto_update_sql_activity jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.auto_update_sql_activity.resume_completed + exported_name: jobs_auto_update_sql_activity_resume_completed + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: completed}' + description: Number of auto_update_sql_activity jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.resume_failed + exported_name: jobs_auto_update_sql_activity_resume_failed + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: failed}' + description: Number of auto_update_sql_activity jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.auto_update_sql_activity.resume_retry_error + exported_name: jobs_auto_update_sql_activity_resume_retry_error + labeled_name: 'jobs.resume{name: auto_update_sql_activity, status: retry_error}' + description: Number of auto_update_sql_activity jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.currently_idle + exported_name: jobs_backup_currently_idle + labeled_name: 'jobs{type: backup, status: currently_idle}' + description: Number of backup jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.backup.expired_pts_records + exported_name: jobs_backup_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: backup}' + description: Number of expired protected timestamp records owned by backup jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_completed + exported_name: jobs_backup_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: backup, status: completed}' + description: Number of backup jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_failed + exported_name: jobs_backup_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: backup, status: failed}' + description: Number of backup jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.fail_or_cancel_retry_error + exported_name: jobs_backup_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: backup, status: retry_error}' + description: Number of backup jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.protected_age_sec + exported_name: jobs_backup_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: backup}' + description: The age of the oldest PTS record protected by backup jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.backup.protected_record_count + exported_name: jobs_backup_protected_record_count + labeled_name: 'jobs.protected_record_count{type: backup}' + description: Number of protected timestamp records held by backup jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.backup.resume_completed + exported_name: jobs_backup_resume_completed + labeled_name: 'jobs.resume{name: backup, status: completed}' + description: Number of backup jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.resume_failed + exported_name: jobs_backup_resume_failed + labeled_name: 'jobs.resume{name: backup, status: failed}' + description: Number of backup jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.backup.resume_retry_error + exported_name: jobs_backup_resume_retry_error + labeled_name: 'jobs.resume{name: backup, status: retry_error}' + description: Number of backup jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.currently_idle + exported_name: jobs_changefeed_currently_idle + labeled_name: 'jobs{type: changefeed, status: currently_idle}' + description: Number of changefeed jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.currently_running + exported_name: jobs_changefeed_currently_running + labeled_name: 'jobs{type: changefeed, status: currently_running}' + description: Number of changefeed jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.expired_pts_records + exported_name: jobs_changefeed_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: changefeed}' + description: Number of expired protected timestamp records owned by changefeed jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_completed + exported_name: jobs_changefeed_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: completed}' + description: Number of changefeed jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_failed + exported_name: jobs_changefeed_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: failed}' + description: Number of changefeed jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.fail_or_cancel_retry_error + exported_name: jobs_changefeed_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: changefeed, status: retry_error}' + description: Number of changefeed jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.protected_record_count + exported_name: jobs_changefeed_protected_record_count + labeled_name: 'jobs.protected_record_count{type: changefeed}' + description: Number of protected timestamp records held by changefeed jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.changefeed.resume_completed + exported_name: jobs_changefeed_resume_completed + labeled_name: 'jobs.resume{name: changefeed, status: completed}' + description: Number of changefeed jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.resume_failed + exported_name: jobs_changefeed_resume_failed + labeled_name: 'jobs.resume{name: changefeed, status: failed}' + description: Number of changefeed jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.changefeed.resume_retry_error + exported_name: jobs_changefeed_resume_retry_error + labeled_name: 'jobs.resume{name: changefeed, status: retry_error}' + description: Number of changefeed jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.claimed_jobs + exported_name: jobs_claimed_jobs + description: number of jobs claimed in job-adopt iterations + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.currently_idle + exported_name: jobs_create_stats_currently_idle + labeled_name: 'jobs{type: create_stats, status: currently_idle}' + description: Number of create_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.currently_paused + exported_name: jobs_create_stats_currently_paused + labeled_name: 'jobs{name: create_stats, status: currently_paused}' + description: Number of create_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.expired_pts_records + exported_name: jobs_create_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: create_stats}' + description: Number of expired protected timestamp records owned by create_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_completed + exported_name: jobs_create_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: completed}' + description: Number of create_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_failed + exported_name: jobs_create_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: failed}' + description: Number of create_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.fail_or_cancel_retry_error + exported_name: jobs_create_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: create_stats, status: retry_error}' + description: Number of create_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.protected_age_sec + exported_name: jobs_create_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: create_stats}' + description: The age of the oldest PTS record protected by create_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.protected_record_count + exported_name: jobs_create_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: create_stats}' + description: Number of protected timestamp records held by create_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.create_stats.resume_completed + exported_name: jobs_create_stats_resume_completed + labeled_name: 'jobs.resume{name: create_stats, status: completed}' + description: Number of create_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.resume_failed + exported_name: jobs_create_stats_resume_failed + labeled_name: 'jobs.resume{name: create_stats, status: failed}' + description: Number of create_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.create_stats.resume_retry_error + exported_name: jobs_create_stats_resume_retry_error + labeled_name: 'jobs.resume{name: create_stats, status: retry_error}' + description: Number of create_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.currently_idle + exported_name: jobs_history_retention_currently_idle + labeled_name: 'jobs{type: history_retention, status: currently_idle}' + description: Number of history_retention jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.currently_paused + exported_name: jobs_history_retention_currently_paused + labeled_name: 'jobs{name: history_retention, status: currently_paused}' + description: Number of history_retention jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.currently_running + exported_name: jobs_history_retention_currently_running + labeled_name: 'jobs{type: history_retention, status: currently_running}' + description: Number of history_retention jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.expired_pts_records + exported_name: jobs_history_retention_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: history_retention}' + description: Number of expired protected timestamp records owned by history_retention jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_completed + exported_name: jobs_history_retention_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: completed}' + description: Number of history_retention jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_failed + exported_name: jobs_history_retention_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: failed}' + description: Number of history_retention jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.fail_or_cancel_retry_error + exported_name: jobs_history_retention_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: history_retention, status: retry_error}' + description: Number of history_retention jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.protected_age_sec + exported_name: jobs_history_retention_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: history_retention}' + description: The age of the oldest PTS record protected by history_retention jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.protected_record_count + exported_name: jobs_history_retention_protected_record_count + labeled_name: 'jobs.protected_record_count{type: history_retention}' + description: Number of protected timestamp records held by history_retention jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.history_retention.resume_completed + exported_name: jobs_history_retention_resume_completed + labeled_name: 'jobs.resume{name: history_retention, status: completed}' + description: Number of history_retention jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.resume_failed + exported_name: jobs_history_retention_resume_failed + labeled_name: 'jobs.resume{name: history_retention, status: failed}' + description: Number of history_retention jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.history_retention.resume_retry_error + exported_name: jobs_history_retention_resume_retry_error + labeled_name: 'jobs.resume{name: history_retention, status: retry_error}' + description: Number of history_retention jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.currently_idle + exported_name: jobs_hot_ranges_logger_currently_idle + labeled_name: 'jobs{type: hot_ranges_logger, status: currently_idle}' + description: Number of hot_ranges_logger jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.currently_paused + exported_name: jobs_hot_ranges_logger_currently_paused + labeled_name: 'jobs{name: hot_ranges_logger, status: currently_paused}' + description: Number of hot_ranges_logger jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.currently_running + exported_name: jobs_hot_ranges_logger_currently_running + labeled_name: 'jobs{type: hot_ranges_logger, status: currently_running}' + description: Number of hot_ranges_logger jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.expired_pts_records + exported_name: jobs_hot_ranges_logger_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: hot_ranges_logger}' + description: Number of expired protected timestamp records owned by hot_ranges_logger jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_completed + exported_name: jobs_hot_ranges_logger_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: completed}' + description: Number of hot_ranges_logger jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_failed + exported_name: jobs_hot_ranges_logger_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: failed}' + description: Number of hot_ranges_logger jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.fail_or_cancel_retry_error + exported_name: jobs_hot_ranges_logger_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: hot_ranges_logger, status: retry_error}' + description: Number of hot_ranges_logger jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.protected_age_sec + exported_name: jobs_hot_ranges_logger_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: hot_ranges_logger}' + description: The age of the oldest PTS record protected by hot_ranges_logger jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.protected_record_count + exported_name: jobs_hot_ranges_logger_protected_record_count + labeled_name: 'jobs.protected_record_count{type: hot_ranges_logger}' + description: Number of protected timestamp records held by hot_ranges_logger jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.hot_ranges_logger.resume_completed + exported_name: jobs_hot_ranges_logger_resume_completed + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: completed}' + description: Number of hot_ranges_logger jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.resume_failed + exported_name: jobs_hot_ranges_logger_resume_failed + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: failed}' + description: Number of hot_ranges_logger jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.hot_ranges_logger.resume_retry_error + exported_name: jobs_hot_ranges_logger_resume_retry_error + labeled_name: 'jobs.resume{name: hot_ranges_logger, status: retry_error}' + description: Number of hot_ranges_logger jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.currently_idle + exported_name: jobs_import_currently_idle + labeled_name: 'jobs{type: import, status: currently_idle}' + description: Number of import jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.currently_paused + exported_name: jobs_import_currently_paused + labeled_name: 'jobs{name: import, status: currently_paused}' + description: Number of import jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.currently_running + exported_name: jobs_import_currently_running + labeled_name: 'jobs{type: import, status: currently_running}' + description: Number of import jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.expired_pts_records + exported_name: jobs_import_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: import}' + description: Number of expired protected timestamp records owned by import jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_completed + exported_name: jobs_import_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: import, status: completed}' + description: Number of import jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_failed + exported_name: jobs_import_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: import, status: failed}' + description: Number of import jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.fail_or_cancel_retry_error + exported_name: jobs_import_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: import, status: retry_error}' + description: Number of import jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.protected_age_sec + exported_name: jobs_import_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: import}' + description: The age of the oldest PTS record protected by import jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.import.protected_record_count + exported_name: jobs_import_protected_record_count + labeled_name: 'jobs.protected_record_count{type: import}' + description: Number of protected timestamp records held by import jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import.resume_completed + exported_name: jobs_import_resume_completed + labeled_name: 'jobs.resume{name: import, status: completed}' + description: Number of import jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.resume_failed + exported_name: jobs_import_resume_failed + labeled_name: 'jobs.resume{name: import, status: failed}' + description: Number of import jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import.resume_retry_error + exported_name: jobs_import_resume_retry_error + labeled_name: 'jobs.resume{name: import, status: retry_error}' + description: Number of import jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.currently_idle + exported_name: jobs_import_rollback_currently_idle + labeled_name: 'jobs{type: import_rollback, status: currently_idle}' + description: Number of import_rollback jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.currently_paused + exported_name: jobs_import_rollback_currently_paused + labeled_name: 'jobs{name: import_rollback, status: currently_paused}' + description: Number of import_rollback jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.currently_running + exported_name: jobs_import_rollback_currently_running + labeled_name: 'jobs{type: import_rollback, status: currently_running}' + description: Number of import_rollback jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.expired_pts_records + exported_name: jobs_import_rollback_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: import_rollback}' + description: Number of expired protected timestamp records owned by import_rollback jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_completed + exported_name: jobs_import_rollback_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: completed}' + description: Number of import_rollback jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_failed + exported_name: jobs_import_rollback_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: failed}' + description: Number of import_rollback jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.fail_or_cancel_retry_error + exported_name: jobs_import_rollback_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: import_rollback, status: retry_error}' + description: Number of import_rollback jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.protected_age_sec + exported_name: jobs_import_rollback_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: import_rollback}' + description: The age of the oldest PTS record protected by import_rollback jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.protected_record_count + exported_name: jobs_import_rollback_protected_record_count + labeled_name: 'jobs.protected_record_count{type: import_rollback}' + description: Number of protected timestamp records held by import_rollback jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.import_rollback.resume_completed + exported_name: jobs_import_rollback_resume_completed + labeled_name: 'jobs.resume{name: import_rollback, status: completed}' + description: Number of import_rollback jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.resume_failed + exported_name: jobs_import_rollback_resume_failed + labeled_name: 'jobs.resume{name: import_rollback, status: failed}' + description: Number of import_rollback jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.import_rollback.resume_retry_error + exported_name: jobs_import_rollback_resume_retry_error + labeled_name: 'jobs.resume{name: import_rollback, status: retry_error}' + description: Number of import_rollback jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.currently_idle + exported_name: jobs_key_visualizer_currently_idle + labeled_name: 'jobs{type: key_visualizer, status: currently_idle}' + description: Number of key_visualizer jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.currently_paused + exported_name: jobs_key_visualizer_currently_paused + labeled_name: 'jobs{name: key_visualizer, status: currently_paused}' + description: Number of key_visualizer jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.currently_running + exported_name: jobs_key_visualizer_currently_running + labeled_name: 'jobs{type: key_visualizer, status: currently_running}' + description: Number of key_visualizer jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.expired_pts_records + exported_name: jobs_key_visualizer_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: key_visualizer}' + description: Number of expired protected timestamp records owned by key_visualizer jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_completed + exported_name: jobs_key_visualizer_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: completed}' + description: Number of key_visualizer jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_failed + exported_name: jobs_key_visualizer_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: failed}' + description: Number of key_visualizer jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.fail_or_cancel_retry_error + exported_name: jobs_key_visualizer_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: key_visualizer, status: retry_error}' + description: Number of key_visualizer jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.protected_age_sec + exported_name: jobs_key_visualizer_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: key_visualizer}' + description: The age of the oldest PTS record protected by key_visualizer jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.protected_record_count + exported_name: jobs_key_visualizer_protected_record_count + labeled_name: 'jobs.protected_record_count{type: key_visualizer}' + description: Number of protected timestamp records held by key_visualizer jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.key_visualizer.resume_completed + exported_name: jobs_key_visualizer_resume_completed + labeled_name: 'jobs.resume{name: key_visualizer, status: completed}' + description: Number of key_visualizer jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.resume_failed + exported_name: jobs_key_visualizer_resume_failed + labeled_name: 'jobs.resume{name: key_visualizer, status: failed}' + description: Number of key_visualizer jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.key_visualizer.resume_retry_error + exported_name: jobs_key_visualizer_resume_retry_error + labeled_name: 'jobs.resume{name: key_visualizer, status: retry_error}' + description: Number of key_visualizer jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.currently_idle + exported_name: jobs_logical_replication_currently_idle + labeled_name: 'jobs{type: logical_replication, status: currently_idle}' + description: Number of logical_replication jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.currently_paused + exported_name: jobs_logical_replication_currently_paused + labeled_name: 'jobs{name: logical_replication, status: currently_paused}' + description: Number of logical_replication jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.currently_running + exported_name: jobs_logical_replication_currently_running + labeled_name: 'jobs{type: logical_replication, status: currently_running}' + description: Number of logical_replication jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.expired_pts_records + exported_name: jobs_logical_replication_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: logical_replication}' + description: Number of expired protected timestamp records owned by logical_replication jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_completed + exported_name: jobs_logical_replication_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: completed}' + description: Number of logical_replication jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_failed + exported_name: jobs_logical_replication_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: failed}' + description: Number of logical_replication jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.fail_or_cancel_retry_error + exported_name: jobs_logical_replication_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: logical_replication, status: retry_error}' + description: Number of logical_replication jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.protected_age_sec + exported_name: jobs_logical_replication_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: logical_replication}' + description: The age of the oldest PTS record protected by logical_replication jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.protected_record_count + exported_name: jobs_logical_replication_protected_record_count + labeled_name: 'jobs.protected_record_count{type: logical_replication}' + description: Number of protected timestamp records held by logical_replication jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.logical_replication.resume_completed + exported_name: jobs_logical_replication_resume_completed + labeled_name: 'jobs.resume{name: logical_replication, status: completed}' + description: Number of logical_replication jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.resume_failed + exported_name: jobs_logical_replication_resume_failed + labeled_name: 'jobs.resume{name: logical_replication, status: failed}' + description: Number of logical_replication jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.logical_replication.resume_retry_error + exported_name: jobs_logical_replication_resume_retry_error + labeled_name: 'jobs.resume{name: logical_replication, status: retry_error}' + description: Number of logical_replication jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.metrics.task_failed + exported_name: jobs_metrics_task_failed + description: Number of metrics poller tasks that failed + y_axis_label: errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.currently_idle + exported_name: jobs_migration_currently_idle + labeled_name: 'jobs{type: migration, status: currently_idle}' + description: Number of migration jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.currently_paused + exported_name: jobs_migration_currently_paused + labeled_name: 'jobs{name: migration, status: currently_paused}' + description: Number of migration jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.currently_running + exported_name: jobs_migration_currently_running + labeled_name: 'jobs{type: migration, status: currently_running}' + description: Number of migration jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.expired_pts_records + exported_name: jobs_migration_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: migration}' + description: Number of expired protected timestamp records owned by migration jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_completed + exported_name: jobs_migration_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: migration, status: completed}' + description: Number of migration jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_failed + exported_name: jobs_migration_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: migration, status: failed}' + description: Number of migration jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.fail_or_cancel_retry_error + exported_name: jobs_migration_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: migration, status: retry_error}' + description: Number of migration jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.protected_age_sec + exported_name: jobs_migration_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: migration}' + description: The age of the oldest PTS record protected by migration jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.migration.protected_record_count + exported_name: jobs_migration_protected_record_count + labeled_name: 'jobs.protected_record_count{type: migration}' + description: Number of protected timestamp records held by migration jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.migration.resume_completed + exported_name: jobs_migration_resume_completed + labeled_name: 'jobs.resume{name: migration, status: completed}' + description: Number of migration jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.resume_failed + exported_name: jobs_migration_resume_failed + labeled_name: 'jobs.resume{name: migration, status: failed}' + description: Number of migration jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.migration.resume_retry_error + exported_name: jobs_migration_resume_retry_error + labeled_name: 'jobs.resume{name: migration, status: retry_error}' + description: Number of migration jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.currently_idle + exported_name: jobs_mvcc_statistics_update_currently_idle + labeled_name: 'jobs{type: mvcc_statistics_update, status: currently_idle}' + description: Number of mvcc_statistics_update jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.currently_paused + exported_name: jobs_mvcc_statistics_update_currently_paused + labeled_name: 'jobs{name: mvcc_statistics_update, status: currently_paused}' + description: Number of mvcc_statistics_update jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.currently_running + exported_name: jobs_mvcc_statistics_update_currently_running + labeled_name: 'jobs{type: mvcc_statistics_update, status: currently_running}' + description: Number of mvcc_statistics_update jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.expired_pts_records + exported_name: jobs_mvcc_statistics_update_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: mvcc_statistics_update}' + description: Number of expired protected timestamp records owned by mvcc_statistics_update jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_completed + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: completed}' + description: Number of mvcc_statistics_update jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_failed + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: failed}' + description: Number of mvcc_statistics_update jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.fail_or_cancel_retry_error + exported_name: jobs_mvcc_statistics_update_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: mvcc_statistics_update, status: retry_error}' + description: Number of mvcc_statistics_update jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.protected_age_sec + exported_name: jobs_mvcc_statistics_update_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: mvcc_statistics_update}' + description: The age of the oldest PTS record protected by mvcc_statistics_update jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.protected_record_count + exported_name: jobs_mvcc_statistics_update_protected_record_count + labeled_name: 'jobs.protected_record_count{type: mvcc_statistics_update}' + description: Number of protected timestamp records held by mvcc_statistics_update jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.mvcc_statistics_update.resume_completed + exported_name: jobs_mvcc_statistics_update_resume_completed + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: completed}' + description: Number of mvcc_statistics_update jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.resume_failed + exported_name: jobs_mvcc_statistics_update_resume_failed + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: failed}' + description: Number of mvcc_statistics_update jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.mvcc_statistics_update.resume_retry_error + exported_name: jobs_mvcc_statistics_update_resume_retry_error + labeled_name: 'jobs.resume{name: mvcc_statistics_update, status: retry_error}' + description: Number of mvcc_statistics_update jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.currently_idle + exported_name: jobs_new_schema_change_currently_idle + labeled_name: 'jobs{type: new_schema_change, status: currently_idle}' + description: Number of new_schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.currently_paused + exported_name: jobs_new_schema_change_currently_paused + labeled_name: 'jobs{name: new_schema_change, status: currently_paused}' + description: Number of new_schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.currently_running + exported_name: jobs_new_schema_change_currently_running + labeled_name: 'jobs{type: new_schema_change, status: currently_running}' + description: Number of new_schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.expired_pts_records + exported_name: jobs_new_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: new_schema_change}' + description: Number of expired protected timestamp records owned by new_schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_completed + exported_name: jobs_new_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: completed}' + description: Number of new_schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_failed + exported_name: jobs_new_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: failed}' + description: Number of new_schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.fail_or_cancel_retry_error + exported_name: jobs_new_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: new_schema_change, status: retry_error}' + description: Number of new_schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.protected_age_sec + exported_name: jobs_new_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: new_schema_change}' + description: The age of the oldest PTS record protected by new_schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.protected_record_count + exported_name: jobs_new_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: new_schema_change}' + description: Number of protected timestamp records held by new_schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.new_schema_change.resume_completed + exported_name: jobs_new_schema_change_resume_completed + labeled_name: 'jobs.resume{name: new_schema_change, status: completed}' + description: Number of new_schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.resume_failed + exported_name: jobs_new_schema_change_resume_failed + labeled_name: 'jobs.resume{name: new_schema_change, status: failed}' + description: Number of new_schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.new_schema_change.resume_retry_error + exported_name: jobs_new_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: new_schema_change, status: retry_error}' + description: Number of new_schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.currently_idle + exported_name: jobs_poll_jobs_stats_currently_idle + labeled_name: 'jobs{type: poll_jobs_stats, status: currently_idle}' + description: Number of poll_jobs_stats jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.currently_paused + exported_name: jobs_poll_jobs_stats_currently_paused + labeled_name: 'jobs{name: poll_jobs_stats, status: currently_paused}' + description: Number of poll_jobs_stats jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.currently_running + exported_name: jobs_poll_jobs_stats_currently_running + labeled_name: 'jobs{type: poll_jobs_stats, status: currently_running}' + description: Number of poll_jobs_stats jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.expired_pts_records + exported_name: jobs_poll_jobs_stats_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: poll_jobs_stats}' + description: Number of expired protected timestamp records owned by poll_jobs_stats jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_completed + exported_name: jobs_poll_jobs_stats_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: completed}' + description: Number of poll_jobs_stats jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_failed + exported_name: jobs_poll_jobs_stats_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: failed}' + description: Number of poll_jobs_stats jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.fail_or_cancel_retry_error + exported_name: jobs_poll_jobs_stats_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: poll_jobs_stats, status: retry_error}' + description: Number of poll_jobs_stats jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.protected_age_sec + exported_name: jobs_poll_jobs_stats_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: poll_jobs_stats}' + description: The age of the oldest PTS record protected by poll_jobs_stats jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.protected_record_count + exported_name: jobs_poll_jobs_stats_protected_record_count + labeled_name: 'jobs.protected_record_count{type: poll_jobs_stats}' + description: Number of protected timestamp records held by poll_jobs_stats jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.poll_jobs_stats.resume_completed + exported_name: jobs_poll_jobs_stats_resume_completed + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: completed}' + description: Number of poll_jobs_stats jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.resume_failed + exported_name: jobs_poll_jobs_stats_resume_failed + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: failed}' + description: Number of poll_jobs_stats jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.poll_jobs_stats.resume_retry_error + exported_name: jobs_poll_jobs_stats_resume_retry_error + labeled_name: 'jobs.resume{name: poll_jobs_stats, status: retry_error}' + description: Number of poll_jobs_stats jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.currently_idle + exported_name: jobs_replication_stream_ingestion_currently_idle + labeled_name: 'jobs{type: replication_stream_ingestion, status: currently_idle}' + description: Number of replication_stream_ingestion jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.currently_paused + exported_name: jobs_replication_stream_ingestion_currently_paused + labeled_name: 'jobs{name: replication_stream_ingestion, status: currently_paused}' + description: Number of replication_stream_ingestion jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.currently_running + exported_name: jobs_replication_stream_ingestion_currently_running + labeled_name: 'jobs{type: replication_stream_ingestion, status: currently_running}' + description: Number of replication_stream_ingestion jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.expired_pts_records + exported_name: jobs_replication_stream_ingestion_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: replication_stream_ingestion}' + description: Number of expired protected timestamp records owned by replication_stream_ingestion jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_completed + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: completed}' + description: Number of replication_stream_ingestion jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_failed + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: failed}' + description: Number of replication_stream_ingestion jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.fail_or_cancel_retry_error + exported_name: jobs_replication_stream_ingestion_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_ingestion, status: retry_error}' + description: Number of replication_stream_ingestion jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.protected_age_sec + exported_name: jobs_replication_stream_ingestion_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: replication_stream_ingestion}' + description: The age of the oldest PTS record protected by replication_stream_ingestion jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.protected_record_count + exported_name: jobs_replication_stream_ingestion_protected_record_count + labeled_name: 'jobs.protected_record_count{type: replication_stream_ingestion}' + description: Number of protected timestamp records held by replication_stream_ingestion jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_ingestion.resume_completed + exported_name: jobs_replication_stream_ingestion_resume_completed + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: completed}' + description: Number of replication_stream_ingestion jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.resume_failed + exported_name: jobs_replication_stream_ingestion_resume_failed + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: failed}' + description: Number of replication_stream_ingestion jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_ingestion.resume_retry_error + exported_name: jobs_replication_stream_ingestion_resume_retry_error + labeled_name: 'jobs.resume{name: replication_stream_ingestion, status: retry_error}' + description: Number of replication_stream_ingestion jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.currently_idle + exported_name: jobs_replication_stream_producer_currently_idle + labeled_name: 'jobs{type: replication_stream_producer, status: currently_idle}' + description: Number of replication_stream_producer jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.currently_paused + exported_name: jobs_replication_stream_producer_currently_paused + labeled_name: 'jobs{name: replication_stream_producer, status: currently_paused}' + description: Number of replication_stream_producer jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.currently_running + exported_name: jobs_replication_stream_producer_currently_running + labeled_name: 'jobs{type: replication_stream_producer, status: currently_running}' + description: Number of replication_stream_producer jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.expired_pts_records + exported_name: jobs_replication_stream_producer_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: replication_stream_producer}' + description: Number of expired protected timestamp records owned by replication_stream_producer jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_completed + exported_name: jobs_replication_stream_producer_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: completed}' + description: Number of replication_stream_producer jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_failed + exported_name: jobs_replication_stream_producer_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: failed}' + description: Number of replication_stream_producer jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.fail_or_cancel_retry_error + exported_name: jobs_replication_stream_producer_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: replication_stream_producer, status: retry_error}' + description: Number of replication_stream_producer jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.protected_age_sec + exported_name: jobs_replication_stream_producer_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: replication_stream_producer}' + description: The age of the oldest PTS record protected by replication_stream_producer jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.protected_record_count + exported_name: jobs_replication_stream_producer_protected_record_count + labeled_name: 'jobs.protected_record_count{type: replication_stream_producer}' + description: Number of protected timestamp records held by replication_stream_producer jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.replication_stream_producer.resume_completed + exported_name: jobs_replication_stream_producer_resume_completed + labeled_name: 'jobs.resume{name: replication_stream_producer, status: completed}' + description: Number of replication_stream_producer jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.resume_failed + exported_name: jobs_replication_stream_producer_resume_failed + labeled_name: 'jobs.resume{name: replication_stream_producer, status: failed}' + description: Number of replication_stream_producer jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.replication_stream_producer.resume_retry_error + exported_name: jobs_replication_stream_producer_resume_retry_error + labeled_name: 'jobs.resume{name: replication_stream_producer, status: retry_error}' + description: Number of replication_stream_producer jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.currently_idle + exported_name: jobs_restore_currently_idle + labeled_name: 'jobs{type: restore, status: currently_idle}' + description: Number of restore jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.currently_paused + exported_name: jobs_restore_currently_paused + labeled_name: 'jobs{name: restore, status: currently_paused}' + description: Number of restore jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.currently_running + exported_name: jobs_restore_currently_running + labeled_name: 'jobs{type: restore, status: currently_running}' + description: Number of restore jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.expired_pts_records + exported_name: jobs_restore_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: restore}' + description: Number of expired protected timestamp records owned by restore jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_completed + exported_name: jobs_restore_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: restore, status: completed}' + description: Number of restore jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_failed + exported_name: jobs_restore_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: restore, status: failed}' + description: Number of restore jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.fail_or_cancel_retry_error + exported_name: jobs_restore_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: restore, status: retry_error}' + description: Number of restore jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.protected_age_sec + exported_name: jobs_restore_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: restore}' + description: The age of the oldest PTS record protected by restore jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.restore.protected_record_count + exported_name: jobs_restore_protected_record_count + labeled_name: 'jobs.protected_record_count{type: restore}' + description: Number of protected timestamp records held by restore jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.restore.resume_completed + exported_name: jobs_restore_resume_completed + labeled_name: 'jobs.resume{name: restore, status: completed}' + description: Number of restore jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.resume_failed + exported_name: jobs_restore_resume_failed + labeled_name: 'jobs.resume{name: restore, status: failed}' + description: Number of restore jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.restore.resume_retry_error + exported_name: jobs_restore_resume_retry_error + labeled_name: 'jobs.resume{name: restore, status: retry_error}' + description: Number of restore jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.resumed_claimed_jobs + exported_name: jobs_resumed_claimed_jobs + description: number of claimed-jobs resumed in job-adopt iterations + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.currently_idle + exported_name: jobs_row_level_ttl_currently_idle + labeled_name: 'jobs{type: row_level_ttl, status: currently_idle}' + description: Number of row_level_ttl jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.expired_pts_records + exported_name: jobs_row_level_ttl_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: row_level_ttl}' + description: Number of expired protected timestamp records owned by row_level_ttl jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_completed + exported_name: jobs_row_level_ttl_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: completed}' + description: Number of row_level_ttl jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_failed + exported_name: jobs_row_level_ttl_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: failed}' + description: Number of row_level_ttl jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.fail_or_cancel_retry_error + exported_name: jobs_row_level_ttl_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: row_level_ttl, status: retry_error}' + description: Number of row_level_ttl jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.num_delete_batch_retries + exported_name: jobs_row_level_ttl_num_delete_batch_retries + description: Number of times the row level TTL job had to reduce the delete batch size and retry. + y_axis_label: num_retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.row_level_ttl.protected_age_sec + exported_name: jobs_row_level_ttl_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: row_level_ttl}' + description: The age of the oldest PTS record protected by row_level_ttl jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.protected_record_count + exported_name: jobs_row_level_ttl_protected_record_count + labeled_name: 'jobs.protected_record_count{type: row_level_ttl}' + description: Number of protected timestamp records held by row_level_ttl jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.row_level_ttl.resume_retry_error + exported_name: jobs_row_level_ttl_resume_retry_error + labeled_name: 'jobs.resume{name: row_level_ttl, status: retry_error}' + description: Number of row_level_ttl jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.running_non_idle + exported_name: jobs_running_non_idle + description: number of running jobs that are not idle + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_idle + exported_name: jobs_schema_change_currently_idle + labeled_name: 'jobs{type: schema_change, status: currently_idle}' + description: Number of schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_paused + exported_name: jobs_schema_change_currently_paused + labeled_name: 'jobs{name: schema_change, status: currently_paused}' + description: Number of schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.currently_running + exported_name: jobs_schema_change_currently_running + labeled_name: 'jobs{type: schema_change, status: currently_running}' + description: Number of schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.expired_pts_records + exported_name: jobs_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: schema_change}' + description: Number of expired protected timestamp records owned by schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_completed + exported_name: jobs_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: completed}' + description: Number of schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_failed + exported_name: jobs_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: failed}' + description: Number of schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.fail_or_cancel_retry_error + exported_name: jobs_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: schema_change, status: retry_error}' + description: Number of schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.protected_age_sec + exported_name: jobs_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: schema_change}' + description: The age of the oldest PTS record protected by schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.protected_record_count + exported_name: jobs_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: schema_change}' + description: Number of protected timestamp records held by schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change.resume_completed + exported_name: jobs_schema_change_resume_completed + labeled_name: 'jobs.resume{name: schema_change, status: completed}' + description: Number of schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.resume_failed + exported_name: jobs_schema_change_resume_failed + labeled_name: 'jobs.resume{name: schema_change, status: failed}' + description: Number of schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change.resume_retry_error + exported_name: jobs_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: schema_change, status: retry_error}' + description: Number of schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.currently_idle + exported_name: jobs_schema_change_gc_currently_idle + labeled_name: 'jobs{type: schema_change_gc, status: currently_idle}' + description: Number of schema_change_gc jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.currently_paused + exported_name: jobs_schema_change_gc_currently_paused + labeled_name: 'jobs{name: schema_change_gc, status: currently_paused}' + description: Number of schema_change_gc jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.currently_running + exported_name: jobs_schema_change_gc_currently_running + labeled_name: 'jobs{type: schema_change_gc, status: currently_running}' + description: Number of schema_change_gc jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.expired_pts_records + exported_name: jobs_schema_change_gc_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: schema_change_gc}' + description: Number of expired protected timestamp records owned by schema_change_gc jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_completed + exported_name: jobs_schema_change_gc_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: completed}' + description: Number of schema_change_gc jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_failed + exported_name: jobs_schema_change_gc_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: failed}' + description: Number of schema_change_gc jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.fail_or_cancel_retry_error + exported_name: jobs_schema_change_gc_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: schema_change_gc, status: retry_error}' + description: Number of schema_change_gc jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.protected_age_sec + exported_name: jobs_schema_change_gc_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: schema_change_gc}' + description: The age of the oldest PTS record protected by schema_change_gc jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.protected_record_count + exported_name: jobs_schema_change_gc_protected_record_count + labeled_name: 'jobs.protected_record_count{type: schema_change_gc}' + description: Number of protected timestamp records held by schema_change_gc jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.schema_change_gc.resume_completed + exported_name: jobs_schema_change_gc_resume_completed + labeled_name: 'jobs.resume{name: schema_change_gc, status: completed}' + description: Number of schema_change_gc jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.resume_failed + exported_name: jobs_schema_change_gc_resume_failed + labeled_name: 'jobs.resume{name: schema_change_gc, status: failed}' + description: Number of schema_change_gc jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.schema_change_gc.resume_retry_error + exported_name: jobs_schema_change_gc_resume_retry_error + labeled_name: 'jobs.resume{name: schema_change_gc, status: retry_error}' + description: Number of schema_change_gc jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.currently_idle + exported_name: jobs_sql_activity_flush_currently_idle + labeled_name: 'jobs{type: sql_activity_flush, status: currently_idle}' + description: Number of sql_activity_flush jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.currently_paused + exported_name: jobs_sql_activity_flush_currently_paused + labeled_name: 'jobs{name: sql_activity_flush, status: currently_paused}' + description: Number of sql_activity_flush jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.currently_running + exported_name: jobs_sql_activity_flush_currently_running + labeled_name: 'jobs{type: sql_activity_flush, status: currently_running}' + description: Number of sql_activity_flush jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.expired_pts_records + exported_name: jobs_sql_activity_flush_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: sql_activity_flush}' + description: Number of expired protected timestamp records owned by sql_activity_flush jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_completed + exported_name: jobs_sql_activity_flush_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: completed}' + description: Number of sql_activity_flush jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_failed + exported_name: jobs_sql_activity_flush_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: failed}' + description: Number of sql_activity_flush jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.fail_or_cancel_retry_error + exported_name: jobs_sql_activity_flush_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: sql_activity_flush, status: retry_error}' + description: Number of sql_activity_flush jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.protected_age_sec + exported_name: jobs_sql_activity_flush_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: sql_activity_flush}' + description: The age of the oldest PTS record protected by sql_activity_flush jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.protected_record_count + exported_name: jobs_sql_activity_flush_protected_record_count + labeled_name: 'jobs.protected_record_count{type: sql_activity_flush}' + description: Number of protected timestamp records held by sql_activity_flush jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.sql_activity_flush.resume_completed + exported_name: jobs_sql_activity_flush_resume_completed + labeled_name: 'jobs.resume{name: sql_activity_flush, status: completed}' + description: Number of sql_activity_flush jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.resume_failed + exported_name: jobs_sql_activity_flush_resume_failed + labeled_name: 'jobs.resume{name: sql_activity_flush, status: failed}' + description: Number of sql_activity_flush jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.sql_activity_flush.resume_retry_error + exported_name: jobs_sql_activity_flush_resume_retry_error + labeled_name: 'jobs.resume{name: sql_activity_flush, status: retry_error}' + description: Number of sql_activity_flush jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.currently_idle + exported_name: jobs_standby_read_ts_poller_currently_idle + labeled_name: 'jobs{type: standby_read_ts_poller, status: currently_idle}' + description: Number of standby_read_ts_poller jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.currently_paused + exported_name: jobs_standby_read_ts_poller_currently_paused + labeled_name: 'jobs{name: standby_read_ts_poller, status: currently_paused}' + description: Number of standby_read_ts_poller jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.currently_running + exported_name: jobs_standby_read_ts_poller_currently_running + labeled_name: 'jobs{type: standby_read_ts_poller, status: currently_running}' + description: Number of standby_read_ts_poller jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.expired_pts_records + exported_name: jobs_standby_read_ts_poller_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: standby_read_ts_poller}' + description: Number of expired protected timestamp records owned by standby_read_ts_poller jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_completed + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: completed}' + description: Number of standby_read_ts_poller jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_failed + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: failed}' + description: Number of standby_read_ts_poller jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.fail_or_cancel_retry_error + exported_name: jobs_standby_read_ts_poller_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: standby_read_ts_poller, status: retry_error}' + description: Number of standby_read_ts_poller jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.protected_age_sec + exported_name: jobs_standby_read_ts_poller_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: standby_read_ts_poller}' + description: The age of the oldest PTS record protected by standby_read_ts_poller jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.protected_record_count + exported_name: jobs_standby_read_ts_poller_protected_record_count + labeled_name: 'jobs.protected_record_count{type: standby_read_ts_poller}' + description: Number of protected timestamp records held by standby_read_ts_poller jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.standby_read_ts_poller.resume_completed + exported_name: jobs_standby_read_ts_poller_resume_completed + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: completed}' + description: Number of standby_read_ts_poller jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.resume_failed + exported_name: jobs_standby_read_ts_poller_resume_failed + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: failed}' + description: Number of standby_read_ts_poller jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.standby_read_ts_poller.resume_retry_error + exported_name: jobs_standby_read_ts_poller_resume_retry_error + labeled_name: 'jobs.resume{name: standby_read_ts_poller, status: retry_error}' + description: Number of standby_read_ts_poller jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.currently_idle + exported_name: jobs_typedesc_schema_change_currently_idle + labeled_name: 'jobs{type: typedesc_schema_change, status: currently_idle}' + description: Number of typedesc_schema_change jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.currently_paused + exported_name: jobs_typedesc_schema_change_currently_paused + labeled_name: 'jobs{name: typedesc_schema_change, status: currently_paused}' + description: Number of typedesc_schema_change jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.currently_running + exported_name: jobs_typedesc_schema_change_currently_running + labeled_name: 'jobs{type: typedesc_schema_change, status: currently_running}' + description: Number of typedesc_schema_change jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.expired_pts_records + exported_name: jobs_typedesc_schema_change_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: typedesc_schema_change}' + description: Number of expired protected timestamp records owned by typedesc_schema_change jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_completed + exported_name: jobs_typedesc_schema_change_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: completed}' + description: Number of typedesc_schema_change jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_failed + exported_name: jobs_typedesc_schema_change_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: failed}' + description: Number of typedesc_schema_change jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.fail_or_cancel_retry_error + exported_name: jobs_typedesc_schema_change_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: typedesc_schema_change, status: retry_error}' + description: Number of typedesc_schema_change jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.protected_age_sec + exported_name: jobs_typedesc_schema_change_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: typedesc_schema_change}' + description: The age of the oldest PTS record protected by typedesc_schema_change jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.protected_record_count + exported_name: jobs_typedesc_schema_change_protected_record_count + labeled_name: 'jobs.protected_record_count{type: typedesc_schema_change}' + description: Number of protected timestamp records held by typedesc_schema_change jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.typedesc_schema_change.resume_completed + exported_name: jobs_typedesc_schema_change_resume_completed + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: completed}' + description: Number of typedesc_schema_change jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.resume_failed + exported_name: jobs_typedesc_schema_change_resume_failed + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: failed}' + description: Number of typedesc_schema_change jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.typedesc_schema_change.resume_retry_error + exported_name: jobs_typedesc_schema_change_resume_retry_error + labeled_name: 'jobs.resume{name: typedesc_schema_change, status: retry_error}' + description: Number of typedesc_schema_change jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.currently_idle + exported_name: jobs_update_table_metadata_cache_currently_idle + labeled_name: 'jobs{type: update_table_metadata_cache, status: currently_idle}' + description: Number of update_table_metadata_cache jobs currently considered Idle and can be freely shut down + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.currently_paused + exported_name: jobs_update_table_metadata_cache_currently_paused + labeled_name: 'jobs{name: update_table_metadata_cache, status: currently_paused}' + description: Number of update_table_metadata_cache jobs currently considered Paused + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.currently_running + exported_name: jobs_update_table_metadata_cache_currently_running + labeled_name: 'jobs{type: update_table_metadata_cache, status: currently_running}' + description: Number of update_table_metadata_cache jobs currently running in Resume or OnFailOrCancel state + y_axis_label: jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.expired_pts_records + exported_name: jobs_update_table_metadata_cache_expired_pts_records + labeled_name: 'jobs.expired_pts_records{type: update_table_metadata_cache}' + description: Number of expired protected timestamp records owned by update_table_metadata_cache jobs + y_axis_label: records + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_completed + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_completed + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: completed}' + description: Number of update_table_metadata_cache jobs which successfully completed their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_failed + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_failed + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: failed}' + description: Number of update_table_metadata_cache jobs which failed with a non-retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.fail_or_cancel_retry_error + exported_name: jobs_update_table_metadata_cache_fail_or_cancel_retry_error + labeled_name: 'jobs.fail_or_cancel{name: update_table_metadata_cache, status: retry_error}' + description: Number of update_table_metadata_cache jobs which failed with a retriable error on their failure or cancelation process + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.protected_age_sec + exported_name: jobs_update_table_metadata_cache_protected_age_sec + labeled_name: 'jobs.protected_age_sec{type: update_table_metadata_cache}' + description: The age of the oldest PTS record protected by update_table_metadata_cache jobs + y_axis_label: seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.protected_record_count + exported_name: jobs_update_table_metadata_cache_protected_record_count + labeled_name: 'jobs.protected_record_count{type: update_table_metadata_cache}' + description: Number of protected timestamp records held by update_table_metadata_cache jobs + y_axis_label: records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: jobs.update_table_metadata_cache.resume_completed + exported_name: jobs_update_table_metadata_cache_resume_completed + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: completed}' + description: Number of update_table_metadata_cache jobs which successfully resumed to completion + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.resume_failed + exported_name: jobs_update_table_metadata_cache_resume_failed + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: failed}' + description: Number of update_table_metadata_cache jobs which failed with a non-retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: jobs.update_table_metadata_cache.resume_retry_error + exported_name: jobs_update_table_metadata_cache_resume_retry_error + labeled_name: 'jobs.resume{name: update_table_metadata_cache, status: retry_error}' + description: Number of update_table_metadata_cache jobs which failed with a retriable error + y_axis_label: jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.errors + exported_name: kv_protectedts_reconciliation_errors + description: number of errors encountered during reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.num_runs + exported_name: kv_protectedts_reconciliation_num_runs + description: number of successful reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.records_processed + exported_name: kv_protectedts_reconciliation_records_processed + description: number of records processed without error during reconciliation on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.protectedts.reconciliation.records_removed + exported_name: kv_protectedts_reconciliation_records_removed + description: number of records removed during reconciliation runs on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.streamer.batches.in_progress + exported_name: kv_streamer_batches_in_progress + description: Number of BatchRequests in progress across all KV Streamer operators + y_axis_label: Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.streamer.batches.sent + exported_name: kv_streamer_batches_sent + description: Number of BatchRequests sent across all KV Streamer operators + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.streamer.batches.throttled + exported_name: kv_streamer_batches_throttled + description: Number of BatchRequests currently being throttled due to reaching the concurrency limit, across all KV Streamer operators + y_axis_label: Batches + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.streamer.operators.active + exported_name: kv_streamer_operators_active + description: Number of KV Streamer operators currently in use + y_axis_label: Operators + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.batch_hist_nanos + exported_name: logical_replication_batch_hist_nanos + description: Time spent per row flushing a batch + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.catchup_ranges + exported_name: logical_replication_catchup_ranges + description: Source side ranges undergoing catch up scans (inaccurate with multiple LDR jobs) + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.catchup_ranges_by_label + exported_name: logical_replication_catchup_ranges_by_label + description: Source side ranges undergoing catch up scans + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.checkpoint_events_ingested + exported_name: logical_replication_checkpoint_events_ingested + description: Checkpoint events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.commit_latency + exported_name: logical_replication_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.events_dlqed + exported_name: logical_replication_events_dlqed + description: Row update events sent to DLQ + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_age + exported_name: logical_replication_events_dlqed_age + description: Row update events sent to DLQ due to reaching the maximum time allowed in the retry queue + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_by_label + exported_name: logical_replication_events_dlqed_by_label + description: Row update events sent to DLQ by label + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_errtype + exported_name: logical_replication_events_dlqed_errtype + description: Row update events sent to DLQ due to an error not considered retryable + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_dlqed_space + exported_name: logical_replication_events_dlqed_space + description: Row update events sent to DLQ due to capacity of the retry queue + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_ingested + exported_name: logical_replication_events_ingested + description: Events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_ingested_by_label + exported_name: logical_replication_events_ingested_by_label + description: Events ingested by all replication jobs by label + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_initial_failure + exported_name: logical_replication_events_initial_failure + description: Failed attempts to apply an incoming row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_initial_success + exported_name: logical_replication_events_initial_success + description: Successful applications of an incoming row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_retry_failure + exported_name: logical_replication_events_retry_failure + description: Failed re-attempts to apply a row update + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.events_retry_success + exported_name: logical_replication_events_retry_success + description: Row update events applied after one or more retries + y_axis_label: Failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.kv.update_too_old + exported_name: logical_replication_kv_update_too_old + description: Total number of updates that were not applied because they were too old + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.kv.value_refreshes + exported_name: logical_replication_kv_value_refreshes + description: Total number of batches that refreshed the previous value + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.logical_bytes + exported_name: logical_replication_logical_bytes + description: Logical bytes (sum of keys + values) received by all replication jobs + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.replan_count + exported_name: logical_replication_replan_count + description: Total number of dist sql replanning events + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: logical_replication.replicated_time_by_label + exported_name: logical_replication_replicated_time_by_label + description: Replicated time of the logical replication stream by label + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.replicated_time_seconds + exported_name: logical_replication_replicated_time_seconds + description: The replicated time of the logical replication stream in seconds since the unix epoch. + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: logical_replication.retry_queue_bytes + exported_name: logical_replication_retry_queue_bytes + description: Logical bytes (sum of keys+values) in the retry queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: logical_replication.retry_queue_events + exported_name: logical_replication_retry_queue_events + description: Row update events in the retry queue + y_axis_label: Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.scanning_ranges + exported_name: logical_replication_scanning_ranges + description: Source side ranges undergoing an initial scan (inaccurate with multiple LDR jobs) + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: logical_replication.scanning_ranges_by_label + exported_name: logical_replication_scanning_ranges_by_label + description: Source side ranges undergoing an initial scan + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: obs.tablemetadata.update_job.duration + exported_name: obs_tablemetadata_update_job_duration + description: Time spent running the update table metadata job. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: obs.tablemetadata.update_job.errors + exported_name: obs_tablemetadata_update_job_errors + description: The total number of errors that have been emitted from the update table metadata job. + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: obs.tablemetadata.update_job.runs + exported_name: obs_tablemetadata_update_job_runs + description: The total number of runs of the update table metadata job. + y_axis_label: Executions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: obs.tablemetadata.update_job.table_updates + exported_name: obs_tablemetadata_update_job_table_updates + description: The total number of rows that have been updated in system.table_metadata + y_axis_label: Rows Updated + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.admit_latency + exported_name: physical_replication_admit_latency + description: 'Event admission latency: a difference between event MVCC timestamp and the time it was admitted into ingestion processor' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.commit_latency + exported_name: physical_replication_commit_latency + description: 'Event commit latency: a difference between event MVCC timestamp and the time it was flushed into disk. If we batch events, then the difference between the oldest event in the batch and flush is recorded' + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.distsql_replan_count + exported_name: physical_replication_distsql_replan_count + description: Total number of dist sql replanning events + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.events_ingested + exported_name: physical_replication_events_ingested + description: Events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.failover_progress + exported_name: physical_replication_failover_progress + description: The number of ranges left to revert in order to complete an inflight cutover + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: physical_replication.flush_hist_nanos + exported_name: physical_replication_flush_hist_nanos + description: Time spent flushing messages across all replication streams + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.flushes + exported_name: physical_replication_flushes + description: Total flushes across all replication jobs + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.logical_bytes + exported_name: physical_replication_logical_bytes + description: Logical bytes (sum of keys + values) ingested by all replication jobs + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.replicated_time_seconds + exported_name: physical_replication_replicated_time_seconds + description: The replicated time of the physical replication stream in seconds since the unix epoch. + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: physical_replication.resolved_events_ingested + exported_name: physical_replication_resolved_events_ingested + description: Resolved events ingested by all replication jobs + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: physical_replication.running + exported_name: physical_replication_running + description: Number of currently running replication streams + y_axis_label: Replication Streams + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.distsender + exported_name: requests_slow_distsender + description: |- + Number of range-bound RPCs currently stuck or retrying for a long time. + + Note that this is not a good signal for KV health. The remote side of the + RPCs tracked here may experience contention, so an end user can easily + cause values for this metric to be emitted by leaving a transaction open + for a long time and contending with it using a second transaction. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: round-trip-latency + exported_name: round_trip_latency + description: | + Distribution of round-trip latencies with other nodes. + + This only reflects successful heartbeats and measures gRPC overhead as well as + possible head-of-line blocking. Elevated values in this metric may hint at + network issues and/or saturation, but they are no proof of them. CPU overload + can similarly elevate this metric. The operator should look towards OS-level + metrics such as packet loss, retransmits, etc, to conclusively diagnose network + issues. Heartbeats are not very frequent (~seconds), so they may not capture + rare or short-lived degradations. + y_axis_label: Round-trip time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: rpc.client.bytes.egress + exported_name: rpc_client_bytes_egress + description: Counter of TCP bytes sent via gRPC on connections we initiated. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.client.bytes.ingress + exported_name: rpc_client_bytes_ingress + description: Counter of TCP bytes received via gRPC on connections we initiated. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.connection.connected + exported_name: rpc_connection_connected + description: | + Counter of TCP level connected connections. + + This metric is the number of gRPC connections from the TCP level. Unlike rpc.connection.healthy + this metric does not take into account whether the application has been able to heartbeat + over this connection. + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.connection.inactive + exported_name: rpc_connection_inactive + description: Gauge of current connections in an inactive state and pending deletion; these are not healthy but are not tracked as unhealthy either because there is reason to believe that the connection is no longer relevant,for example if the node has since been seen under a new address + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.server.request.duration.nanos + exported_name: rpc_server_request_duration_nanos + description: Duration of an grpc request in nanoseconds. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.last-completed-time-by-virtual_cluster + exported_name: schedules_BACKUP_last_completed_time_by_virtual_cluster + description: The unix timestamp of the most recently completed host scheduled backup by virtual cluster specified as maintaining this metric + y_axis_label: Jobs + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.protected_age_sec + exported_name: schedules_BACKUP_protected_age_sec + labeled_name: 'schedules.protected_age_sec{name: BACKUP}' + description: The age of the oldest PTS record protected by BACKUP schedules + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.protected_record_count + exported_name: schedules_BACKUP_protected_record_count + labeled_name: 'schedules.protected_record_count{name: BACKUP}' + description: Number of PTS records held by BACKUP schedules + y_axis_label: Records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.BACKUP.started + exported_name: schedules_BACKUP_started + labeled_name: 'schedules{name: BACKUP, status: started}' + description: Number of BACKUP jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.BACKUP.succeeded + exported_name: schedules_BACKUP_succeeded + labeled_name: 'schedules{name: BACKUP, status: succeeded}' + description: Number of BACKUP jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.failed + exported_name: schedules_CHANGEFEED_failed + labeled_name: 'schedules{name: CHANGEFEED, status: failed}' + description: Number of CHANGEFEED jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.started + exported_name: schedules_CHANGEFEED_started + labeled_name: 'schedules{name: CHANGEFEED, status: started}' + description: Number of CHANGEFEED jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.CHANGEFEED.succeeded + exported_name: schedules_CHANGEFEED_succeeded + labeled_name: 'schedules{name: CHANGEFEED, status: succeeded}' + description: Number of CHANGEFEED jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.error + exported_name: schedules_error + description: Number of schedules which did not execute successfully + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.malformed + exported_name: schedules_malformed + description: Number of malformed schedules + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.jobs-started + exported_name: schedules_round_jobs_started + description: The number of jobs started + y_axis_label: Jobs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.reschedule-skip + exported_name: schedules_round_reschedule_skip + description: The number of schedules rescheduled due to SKIP policy + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.round.reschedule-wait + exported_name: schedules_round_reschedule_wait + description: The number of schedules rescheduled due to WAIT policy + y_axis_label: Schedules + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: schedules.scheduled-row-level-ttl-executor.started + exported_name: schedules_scheduled_row_level_ttl_executor_started + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: started}' + description: Number of scheduled-row-level-ttl-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-row-level-ttl-executor.succeeded + exported_name: schedules_scheduled_row_level_ttl_executor_succeeded + labeled_name: 'schedules{name: scheduled-row-level-ttl-executor, status: succeeded}' + description: Number of scheduled-row-level-ttl-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.failed + exported_name: schedules_scheduled_schema_telemetry_executor_failed + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: failed}' + description: Number of scheduled-schema-telemetry-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.started + exported_name: schedules_scheduled_schema_telemetry_executor_started + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: started}' + description: Number of scheduled-schema-telemetry-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-schema-telemetry-executor.succeeded + exported_name: schedules_scheduled_schema_telemetry_executor_succeeded + labeled_name: 'schedules{name: scheduled-schema-telemetry-executor, status: succeeded}' + description: Number of scheduled-schema-telemetry-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.failed + exported_name: schedules_scheduled_sql_stats_compaction_executor_failed + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: failed}' + description: Number of scheduled-sql-stats-compaction-executor jobs failed + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.started + exported_name: schedules_scheduled_sql_stats_compaction_executor_started + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: started}' + description: Number of scheduled-sql-stats-compaction-executor jobs started + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: schedules.scheduled-sql-stats-compaction-executor.succeeded + exported_name: schedules_scheduled_sql_stats_compaction_executor_succeeded + labeled_name: 'schedules{name: scheduled-sql-stats-compaction-executor, status: succeeded}' + description: Number of scheduled-sql-stats-compaction-executor jobs succeeded + y_axis_label: Jobs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: server.http.request.duration.nanos + exported_name: server_http_request_duration_nanos + description: Duration of an HTTP request in nanoseconds. + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.bytesin + exported_name: sql_bytesin + description: Number of SQL bytes received + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.bytesout + exported_name: sql_bytesout + description: Number of SQL bytes sent + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.count + exported_name: sql_call_stored_proc_count + description: Number of successfully executed stored procedure calls + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.count.internal + exported_name: sql_call_stored_proc_count_internal + description: Number of successfully executed stored procedure calls (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.started.count + exported_name: sql_call_stored_proc_started_count + description: Number of invocation of stored procedures via CALL statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.call_stored_proc.started.count.internal + exported_name: sql_call_stored_proc_started_count_internal + description: Number of invocation of stored procedures via CALL statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.conns_waiting_to_hash + exported_name: sql_conns_waiting_to_hash + description: Number of SQL connection attempts that are being throttled in order to limit password hashing concurrency + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.contention.resolver.failed_resolutions + exported_name: sql_contention_resolver_failed_resolutions + description: Number of failed transaction ID resolution attempts + y_axis_label: Failed transaction ID resolution count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.resolver.queue_size + exported_name: sql_contention_resolver_queue_size + description: Length of queued unresolved contention events + y_axis_label: Queue length + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.contention.resolver.retries + exported_name: sql_contention_resolver_retries + description: Number of times transaction id resolution has been retried + y_axis_label: Retry count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.txn_id_cache.miss + exported_name: sql_contention_txn_id_cache_miss + description: Number of cache misses + y_axis_label: Cache miss + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.contention.txn_id_cache.read + exported_name: sql_contention_txn_id_cache_read + description: Number of cache read + y_axis_label: Cache read + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.count + exported_name: sql_copy_count + description: Number of COPY SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.count.internal + exported_name: sql_copy_count_internal + description: Number of COPY SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.count + exported_name: sql_copy_nonatomic_count + description: Number of non-atomic COPY SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.count.internal + exported_name: sql_copy_nonatomic_count_internal + description: Number of non-atomic COPY SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.started.count + exported_name: sql_copy_nonatomic_started_count + description: Number of non-atomic COPY SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.nonatomic.started.count.internal + exported_name: sql_copy_nonatomic_started_count_internal + description: Number of non-atomic COPY SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.started.count + exported_name: sql_copy_started_count + description: Number of COPY SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.copy.started.count.internal + exported_name: sql_copy_started_count_internal + description: Number of COPY SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.count + exported_name: sql_crud_query_count + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.count.internal + exported_name: sql_crud_query_count_internal + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.started.count + exported_name: sql_crud_query_started_count + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.crud_query.started.count.internal + exported_name: sql_crud_query_started_count_internal + description: Number of SQL SELECT, INSERT, UPDATE, DELETE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.ddl.started.count + exported_name: sql_ddl_started_count + description: Number of SQL DDL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.ddl.started.count.internal + exported_name: sql_ddl_started_count_internal + description: Number of SQL DDL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.started.count + exported_name: sql_delete_started_count + description: Number of SQL DELETE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.delete.started.count.internal + exported_name: sql_delete_started_count_internal + description: Number of SQL DELETE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.disk.distsql.current + exported_name: sql_disk_distsql_current + description: Current sql statement disk usage for distsql + y_axis_label: Disk + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.disk.distsql.max + exported_name: sql_disk_distsql_max + description: Disk usage per sql statement for distsql + y_axis_label: Disk + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.disk.distsql.spilled.bytes.read + exported_name: sql_disk_distsql_spilled_bytes_read + description: Number of bytes read from temporary disk storage as a result of spilling + y_axis_label: Disk + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.disk.distsql.spilled.bytes.written + exported_name: sql_disk_distsql_spilled_bytes_written + description: Number of bytes written to temporary disk storage as a result of spilling + y_axis_label: Disk + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.cumulative_contention_nanos + exported_name: sql_distsql_cumulative_contention_nanos + description: Cumulative contention across all queries (in nanoseconds) + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.dist_query_rerun_locally.count + exported_name: sql_distsql_dist_query_rerun_locally_count + description: Total number of cases when distributed query error resulted in a local rerun + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.dist_query_rerun_locally.failure_count + exported_name: sql_distsql_dist_query_rerun_locally_failure_count + description: Total number of cases when the local rerun of a distributed query resulted in an error + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.distributed_exec.count + exported_name: sql_distsql_distributed_exec_count + description: Number of invocations of the execution engine executed with full or partial distribution (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.exec.latency + exported_name: sql_distsql_exec_latency + description: Latency of DistSQL statement execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.exec.latency.internal + exported_name: sql_distsql_exec_latency_internal + description: Latency of DistSQL statement execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.flows.active + exported_name: sql_distsql_flows_active + description: Number of distributed SQL flows currently active + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.distsql.flows.total + exported_name: sql_distsql_flows_total + description: Number of distributed SQL flows executed + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.queries.active + exported_name: sql_distsql_queries_active + description: Number of invocations of the execution engine currently active (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.distsql.queries.spilled + exported_name: sql_distsql_queries_spilled + description: Number of queries that have spilled to disk + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.queries.total + exported_name: sql_distsql_queries_total + description: Number of invocations of the execution engine executed (multiple of which may occur for a single SQL statement) + y_axis_label: DistSQL runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.count + exported_name: sql_distsql_select_count + description: Number of SELECT statements planned to be distributed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.count.internal + exported_name: sql_distsql_select_count_internal + description: Number of SELECT statements planned to be distributed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.distributed_exec.count + exported_name: sql_distsql_select_distributed_exec_count + description: Number of SELECT statements that were distributed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.select.distributed_exec.count.internal + exported_name: sql_distsql_select_distributed_exec_count_internal + description: Number of SELECT statements that were distributed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.distsql.service.latency + exported_name: sql_distsql_service_latency + description: Latency of DistSQL request execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.service.latency.internal + exported_name: sql_distsql_service_latency_internal + description: Latency of DistSQL request execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.distsql.vec.openfds + exported_name: sql_distsql_vec_openfds + description: Current number of open file descriptors used by vectorized external storage + y_axis_label: Files + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.exec.latency + exported_name: sql_exec_latency + description: Latency of SQL statement execution + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.consistent + exported_name: sql_exec_latency_consistent + description: Latency of SQL statement execution of non-historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.consistent.internal + exported_name: sql_exec_latency_consistent_internal + description: Latency of SQL statement execution of non-historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.detail + exported_name: sql_exec_latency_detail + description: Latency of SQL statement execution, by statement fingerprint + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.detail.internal + exported_name: sql_exec_latency_detail_internal + description: Latency of SQL statement execution, by statement fingerprint (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.historical + exported_name: sql_exec_latency_historical + description: Latency of SQL statement execution of historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.historical.internal + exported_name: sql_exec_latency_historical_internal + description: Latency of SQL statement execution of historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.exec.latency.internal + exported_name: sql_exec_latency_internal + description: Latency of SQL statement execution (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.feature_flag_denial + exported_name: sql_feature_flag_denial + description: Counter of the number of statements denied by a feature flag + y_axis_label: Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.full_scan_rejected.count + exported_name: sql_guardrails_full_scan_rejected_count + description: Number of full table or index scans that have been rejected because of `disallow_full_table_scans` guardrail + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.full_scan_rejected.count.internal + exported_name: sql_guardrails_full_scan_rejected_count_internal + description: Number of full table or index scans that have been rejected because of `disallow_full_table_scans` guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_err.count + exported_name: sql_guardrails_max_row_size_err_count + description: Number of rows observed violating sql.guardrails.max_row_size_err + y_axis_label: Rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_err.count.internal + exported_name: sql_guardrails_max_row_size_err_count_internal + description: Number of rows observed violating sql.guardrails.max_row_size_err (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_log.count + exported_name: sql_guardrails_max_row_size_log_count + description: Number of rows observed violating sql.guardrails.max_row_size_log + y_axis_label: Rows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.max_row_size_log.count.internal + exported_name: sql_guardrails_max_row_size_log_count_internal + description: Number of rows observed violating sql.guardrails.max_row_size_log (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_err.count + exported_name: sql_guardrails_transaction_rows_read_err_count + description: Number of transactions errored because of transaction_rows_read_err guardrail + y_axis_label: Errored transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_err.count.internal + exported_name: sql_guardrails_transaction_rows_read_err_count_internal + description: Number of transactions errored because of transaction_rows_read_err guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_log.count + exported_name: sql_guardrails_transaction_rows_read_log_count + description: Number of transactions logged because of transaction_rows_read_log guardrail + y_axis_label: Logged transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_read_log.count.internal + exported_name: sql_guardrails_transaction_rows_read_log_count_internal + description: Number of transactions logged because of transaction_rows_read_log guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_err.count + exported_name: sql_guardrails_transaction_rows_written_err_count + description: Number of transactions errored because of transaction_rows_written_err guardrail + y_axis_label: Errored transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_err.count.internal + exported_name: sql_guardrails_transaction_rows_written_err_count_internal + description: Number of transactions errored because of transaction_rows_written_err guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_log.count + exported_name: sql_guardrails_transaction_rows_written_log_count + description: Number of transactions logged because of transaction_rows_written_log guardrail + y_axis_label: Logged transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.guardrails.transaction_rows_written_log.count.internal + exported_name: sql_guardrails_transaction_rows_written_log_count_internal + description: Number of transactions logged because of transaction_rows_written_log guardrail (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_schema_cache.hits + exported_name: sql_hydrated_schema_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_schema_cache.misses + exported_name: sql_hydrated_schema_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_table_cache.hits + exported_name: sql_hydrated_table_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_table_cache.misses + exported_name: sql_hydrated_table_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_type_cache.hits + exported_name: sql_hydrated_type_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_type_cache.misses + exported_name: sql_hydrated_type_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_udf_cache.hits + exported_name: sql_hydrated_udf_cache_hits + description: counter on the number of cache hits + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.hydrated_udf_cache.misses + exported_name: sql_hydrated_udf_cache_misses + description: counter on the number of cache misses + y_axis_label: reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.started.count + exported_name: sql_insert_started_count + description: Number of SQL INSERT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insert.started.count.internal + exported_name: sql_insert_started_count_internal + description: Number of SQL INSERT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insights.anomaly_detection.evictions + exported_name: sql_insights_anomaly_detection_evictions + description: Evictions of fingerprint latency summaries due to memory pressure + y_axis_label: Evictions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.insights.anomaly_detection.fingerprints + exported_name: sql_insights_anomaly_detection_fingerprints + description: Current number of statement fingerprints being monitored for anomaly detection + y_axis_label: Fingerprints + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.insights.anomaly_detection.memory + exported_name: sql_insights_anomaly_detection_memory + description: Current memory used to support anomaly detection + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.leases.active + exported_name: sql_leases_active + description: The number of outstanding SQL schema leases. + y_axis_label: Outstanding leases + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.expired + exported_name: sql_leases_expired + description: The number of outstanding session based SQL schema leases expired. + y_axis_label: Leases expired because of a new version + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_initial_version + exported_name: sql_leases_long_wait_for_initial_version + description: The number of wait for initial version routines taking more than the lease duration. + y_axis_label: Number of wait for initial version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_no_version + exported_name: sql_leases_long_wait_for_no_version + description: The number of wait for no versions that are taking more than the lease duration. + y_axis_label: Number of wait for long wait for no version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_one_version + exported_name: sql_leases_long_wait_for_one_version + description: The number of wait for one versions that are taking more than the lease duration. + y_axis_label: Number of wait for long wait for one version routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.long_wait_for_two_version_invariant + exported_name: sql_leases_long_wait_for_two_version_invariant + description: The number of two version invariant waits that are taking more than the lease duration. + y_axis_label: Number of two version invariant wait routines executing + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.leases.waiting_to_expire + exported_name: sql_leases_waiting_to_expire + description: The number of outstanding session based SQL schema leases with expiry. + y_axis_label: Outstanding Leases Waiting to Expire + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.mem.bulk.current + exported_name: sql_mem_bulk_current + description: Current sql statement memory usage for bulk operations + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.bulk.max + exported_name: sql_mem_bulk_max + description: Memory usage per sql statement for bulk operations + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.conns.current + exported_name: sql_mem_conns_current + description: Current sql statement memory usage for conns + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.conns.max + exported_name: sql_mem_conns_max + description: Memory usage per sql statement for conns + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.distsql.current + exported_name: sql_mem_distsql_current + description: Current sql statement memory usage for distsql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.distsql.max + exported_name: sql_mem_distsql_max + description: Memory usage per sql statement for distsql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.current + exported_name: sql_mem_internal_current + description: Current sql statement memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.max + exported_name: sql_mem_internal_max + description: Memory usage per sql statement for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.current + exported_name: sql_mem_internal_session_current + description: Current sql session memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.max + exported_name: sql_mem_internal_session_max + description: Memory usage per sql session for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.prepared.current + exported_name: sql_mem_internal_session_prepared_current + description: Current sql session memory usage by prepared statements for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.session.prepared.max + exported_name: sql_mem_internal_session_prepared_max + description: Memory usage by prepared statements per sql session for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.txn.current + exported_name: sql_mem_internal_txn_current + description: Current sql transaction memory usage for internal + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.internal.txn.max + exported_name: sql_mem_internal_txn_max + description: Memory usage per sql transaction for internal + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.root.max + exported_name: sql_mem_root_max + description: Memory usage per sql statement for root + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.current + exported_name: sql_mem_sql_current + description: Current sql statement memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.max + exported_name: sql_mem_sql_max + description: Memory usage per sql statement for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.current + exported_name: sql_mem_sql_session_current + description: Current sql session memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.max + exported_name: sql_mem_sql_session_max + description: Memory usage per sql session for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.prepared.current + exported_name: sql_mem_sql_session_prepared_current + description: Current sql session memory usage by prepared statements for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.session.prepared.max + exported_name: sql_mem_sql_session_prepared_max + description: Memory usage by prepared statements per sql session for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.txn.current + exported_name: sql_mem_sql_txn_current + description: Current sql transaction memory usage for sql + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.mem.sql.txn.max + exported_name: sql_mem_sql_txn_max + description: Memory usage per sql transaction for sql + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.misc.count + exported_name: sql_misc_count + description: Number of other SQL statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.count.internal + exported_name: sql_misc_count_internal + description: Number of other SQL statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.started.count + exported_name: sql_misc_started_count + description: Number of other SQL statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.misc.started.count.internal + exported_name: sql_misc_started_count_internal + description: Number of other SQL statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.hits + exported_name: sql_optimizer_plan_cache_hits + description: Number of non-prepared statements for which a cached plan was used + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.hits.internal + exported_name: sql_optimizer_plan_cache_hits_internal + description: Number of non-prepared statements for which a cached plan was used (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.misses + exported_name: sql_optimizer_plan_cache_misses + description: Number of non-prepared statements for which a cached plan was not used + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.optimizer.plan_cache.misses.internal + exported_name: sql_optimizer_plan_cache_misses_internal + description: Number of non-prepared statements for which a cached plan was not used (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire.pipeline.count + exported_name: sql_pgwire_pipeline_count + description: Number of pgwire commands received by the server that have not yet begun processing + y_axis_label: Commands + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.pgwire_cancel.ignored + exported_name: sql_pgwire_cancel_ignored + description: Number of pgwire query cancel requests that were ignored due to rate limiting + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire_cancel.successful + exported_name: sql_pgwire_cancel_successful + description: Number of pgwire query cancel requests that were successful + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pgwire_cancel.total + exported_name: sql_pgwire_cancel_total + description: Number of pgwire query cancel requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.bytesin + exported_name: sql_pre_serve_bytesin + description: Number of SQL bytes received prior to routing the connection to the target SQL server + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.bytesout + exported_name: sql_pre_serve_bytesout + description: Number of SQL bytes sent prior to routing the connection to the target SQL server + y_axis_label: SQL Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.conn.failures + exported_name: sql_pre_serve_conn_failures + description: Number of SQL connection failures prior to routing the connection to the target SQL server + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.pre_serve.mem.cur + exported_name: sql_pre_serve_mem_cur + description: Current memory usage for SQL connections prior to routing the connection to the target SQL server + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.pre_serve.mem.max + exported_name: sql_pre_serve_mem_max + description: Memory usage for SQL connections prior to routing the connection to the target SQL server + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.pre_serve.new_conns + exported_name: sql_pre_serve_new_conns + description: Number of SQL connections created prior to routing the connection to the target SQL server + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.count + exported_name: sql_query_count + description: Number of SQL operations started including queries, and transaction control statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.count.internal + exported_name: sql_query_count_internal + description: Number of SQL operations started including queries, and transaction control statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.started.count + exported_name: sql_query_started_count + description: Number of SQL operations started including queries, and transaction control statements + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.started.count.internal + exported_name: sql_query_started_count_internal + description: Number of SQL operations started including queries, and transaction control statements (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.unique.count + exported_name: sql_query_unique_count + description: Cardinality estimate of the set of statement fingerprints + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.query.unique.count.internal + exported_name: sql_query_unique_count_internal + description: Cardinality estimate of the set of statement fingerprints (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.count + exported_name: sql_restart_savepoint_count + description: Number of `SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.count.internal + exported_name: sql_restart_savepoint_count_internal + description: Number of `SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.count + exported_name: sql_restart_savepoint_release_count + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.count.internal + exported_name: sql_restart_savepoint_release_count_internal + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.started.count + exported_name: sql_restart_savepoint_release_started_count + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.release.started.count.internal + exported_name: sql_restart_savepoint_release_started_count_internal + description: Number of `RELEASE SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.count + exported_name: sql_restart_savepoint_rollback_count + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.count.internal + exported_name: sql_restart_savepoint_rollback_count_internal + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.started.count + exported_name: sql_restart_savepoint_rollback_started_count + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.rollback.started.count.internal + exported_name: sql_restart_savepoint_rollback_started_count_internal + description: Number of `ROLLBACK TO SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.started.count + exported_name: sql_restart_savepoint_started_count + description: Number of `SAVEPOINT cockroach_restart` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.restart_savepoint.started.count.internal + exported_name: sql_restart_savepoint_started_count_internal + description: Number of `SAVEPOINT cockroach_restart` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.count + exported_name: sql_savepoint_count + description: Number of SQL SAVEPOINT statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.count.internal + exported_name: sql_savepoint_count_internal + description: Number of SQL SAVEPOINT statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.count + exported_name: sql_savepoint_release_count + description: Number of `RELEASE SAVEPOINT` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.count.internal + exported_name: sql_savepoint_release_count_internal + description: Number of `RELEASE SAVEPOINT` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.started.count + exported_name: sql_savepoint_release_started_count + description: Number of `RELEASE SAVEPOINT` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.release.started.count.internal + exported_name: sql_savepoint_release_started_count_internal + description: Number of `RELEASE SAVEPOINT` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.count + exported_name: sql_savepoint_rollback_count + description: Number of `ROLLBACK TO SAVEPOINT` statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.count.internal + exported_name: sql_savepoint_rollback_count_internal + description: Number of `ROLLBACK TO SAVEPOINT` statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.started.count + exported_name: sql_savepoint_rollback_started_count + description: Number of `ROLLBACK TO SAVEPOINT` statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.rollback.started.count.internal + exported_name: sql_savepoint_rollback_started_count_internal + description: Number of `ROLLBACK TO SAVEPOINT` statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.started.count + exported_name: sql_savepoint_started_count + description: Number of SQL SAVEPOINT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.savepoint.started.count.internal + exported_name: sql_savepoint_started_count_internal + description: Number of SQL SAVEPOINT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.schema.invalid_objects + exported_name: sql_schema_invalid_objects + description: Gauge of detected invalid objects within the system.descriptor table (measured by querying crdb_internal.invalid_objects) + y_axis_label: Objects + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.schema_changer.object_count + exported_name: sql_schema_changer_object_count + description: Counter of the number of objects in the cluster + y_axis_label: Objects + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.select.started.count + exported_name: sql_select_started_count + description: Number of SQL SELECT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.select.started.count.internal + exported_name: sql_select_started_count_internal + description: Number of SQL SELECT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.service.latency.consistent + exported_name: sql_service_latency_consistent + description: Latency of SQL request execution of non-historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.consistent.internal + exported_name: sql_service_latency_consistent_internal + description: Latency of SQL request execution of non-historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.historical + exported_name: sql_service_latency_historical + description: Latency of SQL request execution of historical queries + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.service.latency.historical.internal + exported_name: sql_service_latency_historical_internal + description: Latency of SQL request execution of historical queries (internal queries) + y_axis_label: SQL Internal Statements + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.statement_timeout.count + exported_name: sql_statement_timeout_count + description: Count of statements that failed because they exceeded the statement timeout + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statement_timeout.count.internal + exported_name: sql_statement_timeout_count_internal + description: Count of statements that failed because they exceeded the statement timeout (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statements.auto_retry.count + exported_name: sql_statements_auto_retry_count + description: Number of SQL statement automatic retries + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.statements.auto_retry.count.internal + exported_name: sql_statements_auto_retry_count_internal + description: Number of SQL statement automatic retries (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.activity.update.latency + exported_name: sql_stats_activity_update_latency + description: The latency of updates made by the SQL activity updater job. Includes failed update attempts + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.stats.activity.updates.failed + exported_name: sql_stats_activity_updates_failed + description: Number of update attempts made by the SQL activity updater job that failed with errors + y_axis_label: failed updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.activity.updates.successful + exported_name: sql_stats_activity_updates_successful + description: Number of successful updates made by the SQL activity updater job + y_axis_label: successful updates + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.cleanup.rows_removed + exported_name: sql_stats_cleanup_rows_removed + description: Number of stale statistics rows that are removed + y_axis_label: SQL Stats Cleanup + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.discarded.current + exported_name: sql_stats_discarded_current + description: Number of fingerprint statistics being discarded + y_axis_label: Discarded SQL Stats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.done_signals.ignored + exported_name: sql_stats_flush_done_signals_ignored + description: Number of times the SQL Stats activity update job ignored the signal sent to it indicating a flush has completed + y_axis_label: flush done signals ignored + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.fingerprint.count + exported_name: sql_stats_flush_fingerprint_count + description: The number of unique statement and transaction fingerprints included in the SQL Stats flush + y_axis_label: statement & transaction fingerprints + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flush.latency + exported_name: sql_stats_flush_latency + description: The latency of SQL Stats flushes to persistent storage. Includes failed flush attempts + y_axis_label: nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.stats.flushes.failed + exported_name: sql_stats_flushes_failed + description: Number of attempted SQL Stats flushes that failed with errors + y_axis_label: failed flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.flushes.successful + exported_name: sql_stats_flushes_successful + description: Number of times SQL Stats are flushed successfully to persistent storage + y_axis_label: successful flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.stats.mem.current + exported_name: sql_stats_mem_current + description: Current memory usage for fingerprint storage + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.mem.max + exported_name: sql_stats_mem_max + description: Memory usage for fingerprint storage + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.reported.mem.current + exported_name: sql_stats_reported_mem_current + description: Current memory usage for reported fingerprint storage + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.reported.mem.max + exported_name: sql_stats_reported_mem_max + description: Memory usage for reported fingerprint storage + y_axis_label: Memory + type: HISTOGRAM + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sql.stats.txn_stats_collection.duration + exported_name: sql_stats_txn_stats_collection_duration + description: Time took in nanoseconds to collect transaction stats + y_axis_label: SQL Transaction Stats Collection Overhead + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sql.temp_object_cleaner.active_cleaners + exported_name: sql_temp_object_cleaner_active_cleaners + description: number of cleaner tasks currently running on this node + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.temp_object_cleaner.schemas_deletion_error + exported_name: sql_temp_object_cleaner_schemas_deletion_error + description: number of errored schema deletions by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.temp_object_cleaner.schemas_deletion_success + exported_name: sql_temp_object_cleaner_schemas_deletion_success + description: number of successful schema deletions by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.temp_object_cleaner.schemas_to_delete + exported_name: sql_temp_object_cleaner_schemas_to_delete + description: number of schemas to be deleted by the temp object cleaner on this node + y_axis_label: Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.transaction_timeout.count + exported_name: sql_transaction_timeout_count + description: Count of statements that failed because they exceeded the transaction timeout + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.transaction_timeout.count.internal + exported_name: sql_transaction_timeout_count_internal + description: Count of statements that failed because they exceeded the transaction timeout (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.auto_retry.count + exported_name: sql_txn_auto_retry_count + description: Number of SQL transaction automatic retries + y_axis_label: SQL Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.auto_retry.count.internal + exported_name: sql_txn_auto_retry_count_internal + description: Number of SQL transaction automatic retries (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.started.count + exported_name: sql_txn_begin_started_count + description: Number of SQL transaction BEGIN statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.begin.started.count.internal + exported_name: sql_txn_begin_started_count_internal + description: Number of SQL transaction BEGIN statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.started.count + exported_name: sql_txn_commit_started_count + description: Number of SQL transaction COMMIT statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit.started.count.internal + exported_name: sql_txn_commit_started_count_internal + description: Number of SQL transaction COMMIT statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.count + exported_name: sql_txn_commit_prepared_count + description: Number of SQL COMMIT PREPARED statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.count.internal + exported_name: sql_txn_commit_prepared_count_internal + description: Number of SQL COMMIT PREPARED statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.started.count + exported_name: sql_txn_commit_prepared_started_count + description: Number of SQL COMMIT PREPARED statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.commit_prepared.started.count.internal + exported_name: sql_txn_commit_prepared_started_count_internal + description: Number of SQL COMMIT PREPARED statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.contended.count + exported_name: sql_txn_contended_count + description: Number of SQL transactions experienced contention + y_axis_label: Contention + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.contended.count.internal + exported_name: sql_txn_contended_count_internal + description: Number of SQL transactions experienced contention (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.count + exported_name: sql_txn_prepare_count + description: Number of SQL PREPARE TRANSACTION statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.count.internal + exported_name: sql_txn_prepare_count_internal + description: Number of SQL PREPARE TRANSACTION statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.started.count + exported_name: sql_txn_prepare_started_count + description: Number of SQL PREPARE TRANSACTION statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.prepare.started.count.internal + exported_name: sql_txn_prepare_started_count_internal + description: Number of SQL PREPARE TRANSACTION statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback.started.count + exported_name: sql_txn_rollback_started_count + description: Number of SQL transaction ROLLBACK statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback.started.count.internal + exported_name: sql_txn_rollback_started_count_internal + description: Number of SQL transaction ROLLBACK statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.count + exported_name: sql_txn_rollback_prepared_count + description: Number of SQL ROLLBACK PREPARED statements successfully executed + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.count.internal + exported_name: sql_txn_rollback_prepared_count_internal + description: Number of SQL ROLLBACK PREPARED statements successfully executed (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.started.count + exported_name: sql_txn_rollback_prepared_started_count + description: Number of SQL ROLLBACK PREPARED statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.rollback_prepared.started.count.internal + exported_name: sql_txn_rollback_prepared_started_count_internal + description: Number of SQL ROLLBACK PREPARED statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.upgraded_iso_level.count + exported_name: sql_txn_upgraded_iso_level_count + description: Number of times a weak isolation level was automatically upgraded to a stronger one + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.txn.upgraded_iso_level.count.internal + exported_name: sql_txn_upgraded_iso_level_count_internal + description: Number of times a weak isolation level was automatically upgraded to a stronger one (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.update.started.count + exported_name: sql_update_started_count + description: Number of SQL UPDATE statements started + y_axis_label: SQL Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.update.started.count.internal + exported_name: sql_update_started_count_internal + description: Number of SQL UPDATE statements started (internal queries) + y_axis_label: SQL Internal Statements + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sql.vecindex.pending_splits_merges + exported_name: sql_vecindex_pending_splits_merges + description: Total number of vector index splits and merges waiting to be processed + y_axis_label: Pending Splits/Merges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sql.vecindex.successful_splits + exported_name: sql_vecindex_successful_splits + description: Total number of vector index partitions split without error + y_axis_label: Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.is_alive.cache_hits + exported_name: sqlliveness_is_alive_cache_hits + description: Number of calls to IsAlive that return from the cache + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.is_alive.cache_misses + exported_name: sqlliveness_is_alive_cache_misses + description: Number of calls to IsAlive that do not return from the cache + y_axis_label: Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.sessions_deleted + exported_name: sqlliveness_sessions_deleted + description: Number of expired sessions which have been deleted + y_axis_label: Sessions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.sessions_deletion_runs + exported_name: sqlliveness_sessions_deletion_runs + description: Number of calls to delete sessions which have been performed + y_axis_label: Sessions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.write_failures + exported_name: sqlliveness_write_failures + description: Number of update or insert calls which have failed + y_axis_label: Writes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sqlliveness.write_successes + exported_name: sqlliveness_write_successes + description: Number of update or insert calls successfully performed + y_axis_label: Writes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.cost_client.blocked_requests + exported_name: tenant_cost_client_blocked_requests + description: Number of requests currently blocked by the rate limiter + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.sql_usage.cross_region_network_ru + exported_name: tenant_sql_usage_cross_region_network_ru + description: Total number of RUs charged for cross-region network traffic + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_cpu_seconds + exported_name: tenant_sql_usage_estimated_cpu_seconds + description: Estimated amount of CPU consumed by a virtual cluster + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_kv_cpu_seconds + exported_name: tenant_sql_usage_estimated_kv_cpu_seconds + description: Estimated amount of CPU consumed by a virtual cluster, in the KV layer + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.estimated_replication_bytes + exported_name: tenant_sql_usage_estimated_replication_bytes + description: Total number of estimated bytes for KV replication traffic + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.external_io_egress_bytes + exported_name: tenant_sql_usage_external_io_egress_bytes + description: Total number of bytes written to external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.external_io_ingress_bytes + exported_name: tenant_sql_usage_external_io_ingress_bytes + description: Total number of bytes read from external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.kv_request_units + exported_name: tenant_sql_usage_kv_request_units + description: RU consumption attributable to KV + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.pgwire_egress_bytes + exported_name: tenant_sql_usage_pgwire_egress_bytes + description: Total number of bytes transferred from a SQL pod to the client + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.provisioned_vcpus + exported_name: tenant_sql_usage_provisioned_vcpus + description: Number of vcpus available to the virtual cluster + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.sql_usage.read_batches + exported_name: tenant_sql_usage_read_batches + description: Total number of KV read batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.read_bytes + exported_name: tenant_sql_usage_read_bytes + description: Total number of bytes read from KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.read_requests + exported_name: tenant_sql_usage_read_requests + description: Total number of KV read requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.request_units + exported_name: tenant_sql_usage_request_units + description: RU consumption + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.sql_pods_cpu_seconds + exported_name: tenant_sql_usage_sql_pods_cpu_seconds + description: Total amount of CPU used by SQL pods + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_batches + exported_name: tenant_sql_usage_write_batches + description: Total number of KV write batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_bytes + exported_name: tenant_sql_usage_write_bytes + description: Total number of bytes written to KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.sql_usage.write_requests + exported_name: tenant_sql_usage_write_requests + description: Total number of KV write requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.aborts + exported_name: txn_aborts + description: Number of aborted KV transactions + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commit_waits + exported_name: txn_commit_waits + description: Number of KV transactions that had to commit-wait on commit in order to ensure linearizability. This generally happens to transactions writing to global ranges. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits + exported_name: txn_commits + description: Number of committed KV transactions (including 1PC) + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits1PC + exported_name: txn_commits1PC + description: Number of KV transaction one-phase commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commits_read_only + exported_name: txn_commits_read_only + description: Number of read only KV transaction commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.condensed_intent_spans + exported_name: txn_condensed_intent_spans + description: KV transactions that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans_gauge for a gauge of such transactions currently running. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.condensed_intent_spans_gauge + exported_name: txn_condensed_intent_spans_gauge + description: KV transactions currently running that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans for a perpetual counter/rate. + y_axis_label: KV Transactions + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txn.condensed_intent_spans_rejected + exported_name: txn_condensed_intent_spans_rejected + description: KV transactions that have been aborted because they exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). Rejection is caused by kv.transaction.reject_over_max_intents_budget. + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.count_limit_on_response + exported_name: txn_count_limit_on_response + description: KV transactions that have exceeded the count limit on a response + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.count_limit_rejected + exported_name: txn_count_limit_rejected + description: KV transactions that have been aborted because they exceeded the max number of writes and locking reads allowed + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.durations + exported_name: txn_durations + description: KV transaction durations + y_axis_label: KV Txn Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txn.inflight_locks_over_tracking_budget + exported_name: txn_inflight_locks_over_tracking_budget + description: KV transactions whose in-flight writes and locking reads have exceeded the intent tracking memory budget (kv.transaction.max_intents_bytes). + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.parallelcommits + exported_name: txn_parallelcommits + description: Number of KV transaction parallel commits + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.parallelcommits.auto_retries + exported_name: txn_parallelcommits_auto_retries + description: Number of commit tries after successful failed parallel commit attempts + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.prepares + exported_name: txn_prepares + description: Number of prepared KV transactions + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.auto_retries + exported_name: txn_refresh_auto_retries + description: Number of request retries after successful client-side refreshes + y_axis_label: Retries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.fail + exported_name: txn_refresh_fail + description: Number of failed client-side transaction refreshes + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.fail_with_condensed_spans + exported_name: txn_refresh_fail_with_condensed_spans + description: Number of failed client-side refreshes for transactions whose read tracking lost fidelity because of condensing. Such a failure could be a false conflict. Failures counted here are also counted in txn.refresh.fail, and the respective transactions are also counted in txn.refresh.memory_limit_exceeded. + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.memory_limit_exceeded + exported_name: txn_refresh_memory_limit_exceeded + description: Number of transaction which exceed the refresh span bytes limit, causing their read spans to be condensed + y_axis_label: Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.success + exported_name: txn_refresh_success + description: Number of successful client-side transaction refreshes. A refresh may be preemptive or reactive. A reactive refresh is performed after a request throws an error because a refresh is needed for it to succeed. In these cases, the request will be re-issued as an auto-retry (see txn.refresh.auto_retries) after the refresh succeeds. + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.refresh.success_server_side + exported_name: txn_refresh_success_server_side + description: Number of successful server-side transaction refreshes + y_axis_label: Refreshes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts + exported_name: txn_restarts + description: Number of restarted KV transactions + y_axis_label: KV Transactions + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txn.restarts.asyncwritefailure + exported_name: txn_restarts_asyncwritefailure + description: Number of restarts due to async consensus writes that failed to leave intents + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.commitdeadlineexceeded + exported_name: txn_restarts_commitdeadlineexceeded + description: Number of restarts due to a transaction exceeding its deadline + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.exclusionviolation + exported_name: txn_restarts_exclusionviolation + description: Number of restarts due to an exclusion violation + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.restarts.readwithinuncertainty + exported_name: txn_restarts_readwithinuncertainty + description: Number of restarts due to reading a new value within the uncertainty interval + y_axis_label: Restarted Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.rollbacks.async.failed + exported_name: txn_rollbacks_async_failed + description: Number of KV transactions that failed to send abort asynchronously which is not always retried + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.rollbacks.failed + exported_name: txn_rollbacks_failed + description: Number of KV transactions that failed to send final abort + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.batches.fully_handled + exported_name: txn_write_buffering_batches_fully_handled + description: Number of KV batches that were fully handled by the write buffer (not sent to KV) + y_axis_label: KV Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.disabled_after_buffering + exported_name: txn_write_buffering_disabled_after_buffering + description: Number of KV transactions that disabled write buffering after buffering some writes but before an EndTxn request + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.memory_limit_exceeded + exported_name: txn_write_buffering_memory_limit_exceeded + description: Number of KV transactions that exceeded the write buffering memory limit + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.write_buffering.num_enabled + exported_name: txn_write_buffering_num_enabled + description: Number of KV transactions that enabled buffered writes + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE +- name: SERVER + categories: + - name: HARDWARE + metrics: + - name: sys.cpu.combined.percent-normalized + exported_name: sys_cpu_combined_percent_normalized + description: Current user+system cpu percentage consumed by the CRDB process, normalized 0-1 by number of cores + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU utilization percentage by the CockroachDB process. \n\t\tIf it is equal to 1 (or 100%), then the CPU is overloaded. The CockroachDB process should \n\t\tnot be running with over 80% utilization for extended periods of time (hours). This metric \n\t\tis used in the DB Console CPU Percent graph." + essential: true + - name: sys.cpu.host.combined.percent-normalized + exported_name: sys_cpu_host_combined_percent_normalized + description: Current user+system cpu percentage across the whole machine, normalized 0-1 by number of cores + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU utilization percentage of the\n\t\tunderlying server, virtual machine, or container hosting the\n\t\tCockroachDB process. It includes CPU usage from both CockroachDB\n\t\tand non-CockroachDB processes. It also accounts for time spent\n\t\tprocessing hardware (irq) and software (softirq) interrupts, as\n\t\twell as nice time, which represents low-priority user-mode\n\t\tactivity.\n\n A value of 1 (or 100%) indicates that the CPU is overloaded. Avoid\n running the CockroachDB process in an environment where the CPU\n remains overloaded for extended periods (e.g. multiple hours). This\n metric appears in the DB Console on the Host CPU Percent graph." + essential: true + - name: sys.cpu.sys.percent + exported_name: sys_cpu_sys_percent + description: Current system cpu percentage consumed by the CRDB process + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU usage percentage at the system\n\t\t(Linux kernel) level by the CockroachDB process only. This is\n\t\tsimilar to the Linux top command output. The metric value can be\n\t\tmore than 1 (or 100%) on multi-core systems. It is best to combine\n\t\tuser and system metrics." + essential: true + - name: sys.cpu.user.percent + exported_name: sys_cpu_user_percent + description: Current user cpu percentage consumed by the CRDB process + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the CPU usage percentage at the user\n\t\tlevel by the CockroachDB process only. This is similar to the Linux\n\t\ttop command output. The metric value can be more than 1 (or 100%)\n\t\ton multi-core systems. It is best to combine user and system\n\t\tmetrics." + essential: true + - name: sys.host.disk.iopsinprogress + exported_name: sys_host_disk_iopsinprogress + description: IO operations currently in progress on this host (as reported by the OS) + y_axis_label: Operations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric gives the average queue length of the storage device. It characterizes the storage device's performance capability. All I/O performance metrics are Linux counters and correspond to the avgqu-sz in the Linux iostat command output. You need to view the device queue graph in the context of the actual read/write IOPS and MBPS metrics that show the actual device utilization. If the device is not keeping up, the queue will grow. Values over 10 are bad. Values around 5 mean the device is working hard trying to keep up. For internal (on chassis) NVMe devices, the queue values are typically 0. For network connected devices, such as AWS EBS volumes, the normal operating range of values is 1 to 2. Spikes in values are OK. They indicate an I/O spike where the device fell behind and then caught up. End users may experience inconsistent response times, but there should be no cluster stability issues. If the queue is greater than 5 for an extended period of time and IOPS or MBPS are low, then the storage is most likely not provisioned per Cockroach Labs guidance. In AWS EBS, it is commonly an EBS type, such as gp2, not suitable as database primary storage. If I/O is low and the queue is low, the most likely scenario is that the CPU is lacking and not driving I/O. One such case is a cluster with nodes with only 2 vcpus which is not supported sizing for production deployments. There are quite a few background processes in the database that take CPU away from the workload, so the workload is just not getting the CPU. Review storage and disk I/O. + essential: true + - name: sys.host.disk.read.bytes + exported_name: sys_host_disk_read_bytes + description: Bytes read from all disks since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device read throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.read.count + exported_name: sys_host_disk_read_count + description: Disk read operations across all disks since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device read IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.write.bytes + exported_name: sys_host_disk_write_bytes + description: Bytes written to all disks since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device write throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.disk.write.count + exported_name: sys_host_disk_write_count + description: Disk write operations across all disks since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the effective storage device write IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. + essential: true + - name: sys.host.net.recv.bytes + exported_name: sys_host_net_recv_bytes + description: Bytes received on all network interfaces since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. + essential: true + - name: sys.host.net.send.bytes + exported_name: sys_host_net_send_bytes + description: Bytes sent on all network interfaces since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. + essential: true + - name: sys.rss + exported_name: sys_rss + description: Current process RSS + y_axis_label: RSS + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: "This metric gives the amount of RAM used by the\n\t\tCockroachDB process. Persistently low values over an extended\n\t\tperiod of time suggest there is underutilized memory that can be\n\t\tput to work with adjusted settings for --cache or --max_sql_memory\n\t\tor both. Conversely, a high utilization, even if a temporary spike,\n\t\tindicates an increased risk of Out-of-memory (OOM) crash\n\t\t(particularly since the swap is generally disabled)." + essential: true + - name: sys.runnable.goroutines.per.cpu + exported_name: sys_runnable_goroutines_per_cpu + description: Average number of goroutines that are waiting to run, normalized by number of cores + y_axis_label: goroutines + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: If this metric has a value over 30, it indicates a CPU overload. If the condition lasts a short period of time (a few seconds), the database users are likely to experience inconsistent response times. If the condition persists for an extended period of time (tens of seconds, or minutes) the cluster may start developing stability issues. Review CPU planning. + essential: true + - name: sys.uptime + exported_name: sys_uptime + description: Process uptime + y_axis_label: Uptime + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric measures the length of time, in seconds, that the CockroachDB process has been running. Monitor this metric to detect events such as node restarts, which may require investigation or intervention. + essential: true + - name: UNSET + metrics: + - name: build.timestamp + exported_name: build_timestamp + description: Build information + y_axis_label: Build Time + type: GAUGE + unit: TIMESTAMP_SEC + aggregation: AVG + derivative: NONE + - name: go.scheduler_latency + exported_name: go_scheduler_latency + description: Go scheduling latency + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: log.buffered.messages.dropped + exported_name: log_buffered_messages_dropped + description: Count of log messages that are dropped by buffered log sinks. When CRDB attempts to buffer a log message in a buffered log sink whose buffer is already full, it drops the oldest buffered messages to make space for the new message + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.conn.attempts + exported_name: log_fluent_sink_conn_attempts + description: Number of connection attempts experienced by fluent-server logging sinks + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.conn.errors + exported_name: log_fluent_sink_conn_errors + description: Number of connection errors experienced by fluent-server logging sinks + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.write.attempts + exported_name: log_fluent_sink_write_attempts + description: Number of write attempts experienced by fluent-server logging sinks + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.fluent.sink.write.errors + exported_name: log_fluent_sink_write_errors + description: Number of write errors experienced by fluent-server logging sinks + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: log.messages.count + exported_name: log_messages_count + description: Count of messages logged on the node since startup. Note that this does not measure the fan-out of single log messages to the various configured logging sinks. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cgo.allocbytes + exported_name: sys_cgo_allocbytes + description: Current bytes of memory allocated by cgo + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.cgo.totalbytes + exported_name: sys_cgo_totalbytes + description: Total bytes of memory allocated by cgo, but not released + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.cgocalls + exported_name: sys_cgocalls + description: Total number of cgo calls + y_axis_label: cgo Calls + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.now.ns + exported_name: sys_cpu_now_ns + description: The time when CPU measurements were taken, as nanoseconds since epoch + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.sys.ns + exported_name: sys_cpu_sys_ns + description: Total system cpu time consumed by the CRDB process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.cpu.user.ns + exported_name: sys_cpu_user_ns + description: Total user cpu time consumed by the CRDB process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.fd.open + exported_name: sys_fd_open + description: Process open file descriptors + y_axis_label: File Descriptors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.fd.softlimit + exported_name: sys_fd_softlimit + description: Process open FD soft limit + y_axis_label: File Descriptors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.gc.assist.ns + exported_name: sys_gc_assist_ns + description: Estimated total CPU time user goroutines spent to assist the GC process + y_axis_label: CPU Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.count + exported_name: sys_gc_count + description: Total number of GC runs + y_axis_label: GC Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.pause.ns + exported_name: sys_gc_pause_ns + description: Total GC pause + y_axis_label: GC Pause + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.gc.pause.percent + exported_name: sys_gc_pause_percent + description: Current GC pause percentage + y_axis_label: GC Pause + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: sys.gc.stop.ns + exported_name: sys_gc_stop_ns + description: Estimated GC stop-the-world stopping latencies + y_axis_label: GC Stopping + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.allocbytes + exported_name: sys_go_allocbytes + description: Current bytes of memory allocated by go + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.allocbytes + exported_name: sys_go_heap_allocbytes + description: Cumulative bytes allocated for heap objects. + y_axis_label: Memory + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.go.heap.heapfragmentbytes + exported_name: sys_go_heap_heapfragmentbytes + description: Total heap fragmentation bytes, derived from bytes in in-use spans minus bytes allocated + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.heapreleasedbytes + exported_name: sys_go_heap_heapreleasedbytes + description: Total bytes returned to the OS from heap. + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.heap.heapreservedbytes + exported_name: sys_go_heap_heapreservedbytes + description: Total bytes reserved by heap, derived from bytes in idle (unused) spans subtracts bytes returned to the OS + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.pause.other.ns + exported_name: sys_go_pause_other_ns + description: Estimated non-GC-related total pause time + y_axis_label: Non-GC Pause + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.stack.systembytes + exported_name: sys_go_stack_systembytes + description: Stack memory obtained from the OS. + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.go.stop.other.ns + exported_name: sys_go_stop_other_ns + description: Estimated non-GC-related stop-the-world stopping latencies + y_axis_label: Non-GC Stopping + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: sys.go.totalbytes + exported_name: sys_go_totalbytes + description: Total bytes of memory allocated by go, but not released + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: sys.goroutines + exported_name: sys_goroutines + description: Current number of goroutines + y_axis_label: goroutines + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: sys.host.disk.io.time + exported_name: sys_host_disk_io_time + description: Time spent reading from or writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.read.time + exported_name: sys_host_disk_read_time + description: Time spent reading from all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.weightedio.time + exported_name: sys_host_disk_weightedio_time + description: Weighted time spent reading from or writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.disk.write.time + exported_name: sys_host_disk_write_time + description: Time spent writing to all disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.drop + exported_name: sys_host_net_recv_drop + description: Receiving packets that got dropped on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.err + exported_name: sys_host_net_recv_err + description: Error receiving packets on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.recv.packets + exported_name: sys_host_net_recv_packets + description: Packets received on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.drop + exported_name: sys_host_net_send_drop + description: Sending packets that got dropped on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.err + exported_name: sys_host_net_send_err + description: Error on sending packets on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.host.net.send.packets + exported_name: sys_host_net_send_packets + description: Packets sent on all network interfaces since this process started (as reported by the OS) + y_axis_label: Packets + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sys.totalmem + exported_name: sys_totalmem + description: Total memory (both free and used) + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE +- name: STORAGE + categories: + - name: OVERLOAD + metrics: + - name: admission.wait_durations.kv + exported_name: admission_wait_durations_kv + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric shows if CPU utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling. If observing over 100ms waits for over 5 seconds while there was excess capacity available, then the admission control is overly aggressive. + essential: true + - name: admission.wait_durations.kv-stores + exported_name: admission_wait_durations_kv_stores + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: This metric shows if I/O utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling. If observing over 100ms waits for over 5 seconds while there was excess capacity available, then the admission control is overly aggressive. + essential: true + - name: REPLICATION + metrics: + - name: leases.transfers.success + exported_name: leases_transfers_success + description: Number of successful lease transfers + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: A high number of lease transfers is not a negative or positive signal, rather it is a reflection of the elastic cluster activities. For example, this metric is high during cluster topology changes. A high value is often the reason for NotLeaseHolderErrors which are normal and expected during rebalancing. Observing this metric may provide a confirmation of the cause of such errors. + essential: true + - name: liveness.heartbeatlatency + exported_name: liveness_heartbeatlatency + description: Node liveness heartbeat latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this metric exceeds 1 second, it is a sign of cluster instability. + essential: true + - name: liveness.livenodes + exported_name: liveness_livenodes + description: Number of live nodes in the cluster (will be 0 if this node is not itself live) + y_axis_label: Nodes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This is a critical metric that tracks the live nodes in the cluster. + essential: true + - name: queue.replicate.replacedecommissioningreplica.error + exported_name: queue_replicate_replacedecommissioningreplica_error + description: Number of failed decommissioning replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Refer to Decommission the node. + essential: true + - name: range.merges + exported_name: range_merges + description: Number of range merges + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric indicates how fast a workload is scaling down. Merges are Cockroach's optimization for performance. This metric indicates that there have been deletes in the workload. + essential: true + - name: range.splits + exported_name: range_splits + description: Number of range splits + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric indicates how fast a workload is scaling up. Spikes can indicate resource hotspots since the split heuristic is based on QPS. To understand whether hotspots are an issue and with which tables and indexes they are occurring, correlate this metric with other metrics such as CPU usage, such as sys.cpu.combined.percent-normalized, or use the Hot Ranges page. + essential: true + - name: ranges + exported_name: ranges + description: Number of ranges + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides a measure of the scale of the data size. + essential: true + - name: ranges.unavailable + exported_name: ranges_unavailable + description: Number of ranges with fewer live replicas than needed for quorum + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is an indicator of replication issues. It shows whether the cluster is unhealthy and can impact workload. If an entire range is unavailable, then it will be unable to process queries. + essential: true + - name: ranges.underreplicated + exported_name: ranges_underreplicated + description: Number of ranges with fewer live replicas than the replication target + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric is an indicator of replication issues. It shows whether the cluster has data that is not conforming to resilience goals. The next step is to determine the corresponding database object, such as the table or index, of these under-replicated ranges and whether the under-replication is temporarily expected. Use the statement SELECT table_name, index_name FROM [SHOW RANGES WITH INDEXES] WHERE range_id = {id of under-replicated range}; + essential: true + - name: rebalancing.cpunanospersecond + exported_name: rebalancing_cpunanospersecond + description: Average CPU nanoseconds spent on processing replica operations in the last 30 minutes. + y_axis_label: Nanoseconds/Sec + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: A high value of this metric could indicate that one of the store's replicas is part of a hot range. + essential: true + - name: rebalancing.lease.transfers + exported_name: rebalancing_lease_transfers + description: Number of lease transfers motivated by store-level load imbalances + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. + essential: true + - name: rebalancing.queriespersecond + exported_name: rebalancing_queriespersecond + description: Number of kv-level requests received per second by the store, considering the last 30 minutes, as used in rebalancing decisions. + y_axis_label: Queries/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric shows hotspots along the queries per second (QPS) dimension. It provides insights into the ongoing rebalancing activities. + essential: true + - name: rebalancing.range.rebalances + exported_name: rebalancing_range_rebalances + description: Number of range rebalance operations motivated by store-level load imbalances + y_axis_label: Range Rebalances + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. + essential: true + - name: rebalancing.replicas.cpunanospersecond + exported_name: rebalancing_replicas_cpunanospersecond + description: Histogram of average CPU nanoseconds spent on processing replica operations in the last 30 minutes. + y_axis_label: Nanoseconds/Sec + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: 'A high value of this metric could indicate that one of the store''s replicas is part of a hot range. See also the non-histogram variant: rebalancing.cpunanospersecond.' + essential: true + - name: rebalancing.replicas.queriespersecond + exported_name: rebalancing_replicas_queriespersecond + description: Histogram of average kv-level requests received per second by replicas on the store in the last 30 minutes. + y_axis_label: Queries/Sec + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: 'A high value of this metric could indicate that one of the store''s replicas is part of a hot range. See also: rebalancing_replicas_cpunanospersecond.' + essential: true + - name: replicas + exported_name: replicas + description: Number of replicas + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides an essential characterization of the data distribution across cluster nodes. + essential: true + - name: replicas.leaseholders + exported_name: replicas_leaseholders + description: Number of lease holders + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + how_to_use: This metric provides an essential characterization of the data processing points across cluster nodes. + essential: true + - name: SQL + metrics: + - name: txnwaitqueue.deadlocks_total + exported_name: txnwaitqueue_deadlocks_total + description: Number of deadlocks detected by the txn wait queue + y_axis_label: Deadlocks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: Alert on this metric if its value is greater than zero, especially if transaction throughput is lower than expected. Applications should be able to detect and recover from deadlock errors. However, transaction performance and throughput can be maximized if the application logic avoids deadlock conditions in the first place, for example, by keeping transactions as short as possible. + essential: true + - name: STORAGE + metrics: + - name: admission.io.overload + exported_name: admission_io_overload + description: 1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). + y_axis_label: Threshold + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + how_to_use: If the value of this metric exceeds 1, then it indicates overload. You can also look at the metrics `storage.l0-num-files`, `storage.l0-sublevels` or `rocksdb.read-amplification` directly. A healthy LSM shape is defined as "read-amp < 20" and "L0-files < 1000", looking at cluster settings `admission.l0_sub_level_count_overload_threshold` and `admission.l0_file_count_overload_threshold` respectively. + essential: true + - name: capacity + exported_name: capacity + description: Total storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives total storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: capacity.available + exported_name: capacity_available + description: Available storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives available storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: capacity.used + exported_name: capacity_used + description: Used storage capacity + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + how_to_use: 'This metric gives used storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space).' + essential: true + - name: rocksdb.block.cache.hits + exported_name: rocksdb_block_cache_hits + description: Count of block cache hits + y_axis_label: Cache Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives hits to block cache which is reserved memory. It is allocated upon the start of a node process by the `--cache` flag and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. + essential: true + - name: rocksdb.block.cache.misses + exported_name: rocksdb_block_cache_misses + description: Count of block cache misses + y_axis_label: Cache Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric gives misses to block cache which is reserved memory. It is allocated upon the start of a node process by the `--cache` flag and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. + essential: true + - name: rocksdb.compactions + exported_name: rocksdb_compactions + description: Number of table compactions + y_axis_label: Compactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports the number of a node's LSM compactions. If the number of compactions remains elevated while the LSM health does not improve, compactions are not keeping up with the workload. If the condition persists for an extended period, the cluster will initially exhibit performance issues that will eventually escalate into stability issues. + essential: true + - name: storage.wal.fsync.latency + exported_name: storage_wal_fsync_latency + description: The write ahead log fsync latency + y_axis_label: Fsync Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + how_to_use: If this value is greater than `100ms`, it is an indication of a disk stall. To mitigate the effects of disk stalls, consider deploying your cluster with WAL failover configured. + essential: true + - name: storage.write-stalls + exported_name: storage_write_stalls + description: Number of instances of intentional write stalls to backpressure incoming writes + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + how_to_use: This metric reports actual disk stall events. Ideally, investigate all reports of disk stalls. As a pratical guideline, one stall per minute is not likely to have a material impact on workload beyond an occasional increase in response time. However one stall per second should be viewed as problematic and investigated actively. It is particularly problematic if the rate persists over an extended period of time, and worse, if it is increasing. + essential: true + - name: UNSET + metrics: + - name: abortspanbytes + exported_name: abortspanbytes + description: Number of bytes in the abort span + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: addsstable.applications + exported_name: addsstable_applications + description: Number of SSTable ingestions applied (i.e. applied by Replicas) + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.aswrites + exported_name: addsstable_aswrites + description: | + Number of SSTables ingested as normal writes. + + These AddSSTable requests do not count towards the addsstable metrics + 'proposals', 'applications', or 'copies', as they are not ingested as AddSSTable + Raft commands, but rather normal write commands. However, if these requests get + throttled they do count towards 'delay.total' and 'delay.enginebackpressure'. + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.copies + exported_name: addsstable_copies + description: number of SSTable ingestions that required copying files during application + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.delay.enginebackpressure + exported_name: addsstable_delay_enginebackpressure + description: Amount by which evaluation of AddSSTable requests was delayed by storage-engine backpressure + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.delay.total + exported_name: addsstable_delay_total + description: Amount by which evaluation of AddSSTable requests was delayed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: addsstable.proposals + exported_name: addsstable_proposals + description: Number of SSTable ingestions proposed (i.e. sent to Raft by lease holders) + y_axis_label: Ingestions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu + exported_name: admission_admitted_elastic_cpu + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu.bulk-normal-pri + exported_name: admission_admitted_elastic_cpu_bulk_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-cpu.normal-pri + exported_name: admission_admitted_elastic_cpu_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores + exported_name: admission_admitted_elastic_stores + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores.bulk-low-pri + exported_name: admission_admitted_elastic_stores_bulk_low_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.elastic-stores.bulk-normal-pri + exported_name: admission_admitted_elastic_stores_bulk_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv + exported_name: admission_admitted_kv + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores + exported_name: admission_admitted_kv_stores + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.high-pri + exported_name: admission_admitted_kv_stores_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.locking-normal-pri + exported_name: admission_admitted_kv_stores_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.normal-pri + exported_name: admission_admitted_kv_stores_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv-stores.user-high-pri + exported_name: admission_admitted_kv_stores_user_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.high-pri + exported_name: admission_admitted_kv_high_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.locking-normal-pri + exported_name: admission_admitted_kv_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.kv.normal-pri + exported_name: admission_admitted_kv_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response + exported_name: admission_admitted_sql_kv_response + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response.locking-normal-pri + exported_name: admission_admitted_sql_kv_response_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-kv-response.normal-pri + exported_name: admission_admitted_sql_kv_response_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response + exported_name: admission_admitted_sql_sql_response + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response.locking-normal-pri + exported_name: admission_admitted_sql_sql_response_locking_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.admitted.sql-sql-response.normal-pri + exported_name: admission_admitted_sql_sql_response_normal_pri + description: Number of requests admitted + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.acquired_nanos + exported_name: admission_elastic_cpu_acquired_nanos + description: Total CPU nanoseconds acquired by elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.available_nanos + exported_name: admission_elastic_cpu_available_nanos + description: Instantaneous available CPU nanoseconds per second ignoring utilization limit + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.max_available_nanos + exported_name: admission_elastic_cpu_max_available_nanos + description: Maximum available CPU nanoseconds per second ignoring utilization limit + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.nanos_exhausted_duration + exported_name: admission_elastic_cpu_nanos_exhausted_duration + description: Total duration when elastic CPU nanoseconds were exhausted, in micros + y_axis_label: Microseconds + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.over_limit_durations + exported_name: admission_elastic_cpu_over_limit_durations + description: Measurement of how much over the prescribed limit elastic requests ran (not recorded if requests don't run over) + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.pre_work_nanos + exported_name: admission_elastic_cpu_pre_work_nanos + description: Total CPU nanoseconds spent doing pre-work, before doing elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.returned_nanos + exported_name: admission_elastic_cpu_returned_nanos + description: Total CPU nanoseconds returned by elastic work + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.elastic_cpu.utilization + exported_name: admission_elastic_cpu_utilization + description: CPU utilization by elastic work + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: admission.elastic_cpu.utilization_limit + exported_name: admission_elastic_cpu_utilization_limit + description: Utilization limit set for the elastic CPU work + y_axis_label: CPU Time + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: admission.errored.elastic-cpu + exported_name: admission_errored_elastic_cpu + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-cpu.bulk-normal-pri + exported_name: admission_errored_elastic_cpu_bulk_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-cpu.normal-pri + exported_name: admission_errored_elastic_cpu_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores + exported_name: admission_errored_elastic_stores + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores.bulk-low-pri + exported_name: admission_errored_elastic_stores_bulk_low_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.elastic-stores.bulk-normal-pri + exported_name: admission_errored_elastic_stores_bulk_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv + exported_name: admission_errored_kv + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores + exported_name: admission_errored_kv_stores + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.high-pri + exported_name: admission_errored_kv_stores_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.locking-normal-pri + exported_name: admission_errored_kv_stores_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.normal-pri + exported_name: admission_errored_kv_stores_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv-stores.user-high-pri + exported_name: admission_errored_kv_stores_user_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.high-pri + exported_name: admission_errored_kv_high_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.locking-normal-pri + exported_name: admission_errored_kv_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.kv.normal-pri + exported_name: admission_errored_kv_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response + exported_name: admission_errored_sql_kv_response + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response.locking-normal-pri + exported_name: admission_errored_sql_kv_response_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-kv-response.normal-pri + exported_name: admission_errored_sql_kv_response_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response + exported_name: admission_errored_sql_sql_response + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response.locking-normal-pri + exported_name: admission_errored_sql_sql_response_locking_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.errored.sql-sql-response.normal-pri + exported_name: admission_errored_sql_sql_response_normal_pri + description: Number of requests not admitted due to error + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.cpu_load_long_period_duration.kv + exported_name: admission_granter_cpu_load_long_period_duration_kv + description: Total duration when CPULoad was being called with a long period, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.cpu_load_short_period_duration.kv + exported_name: admission_granter_cpu_load_short_period_duration_kv + description: Total duration when CPULoad was being called with a short period, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.elastic_io_tokens_available.kv + exported_name: admission_granter_elastic_io_tokens_available_kv + description: Number of tokens available + y_axis_label: Tokens + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.elastic_io_tokens_exhausted_duration.kv + exported_name: admission_granter_elastic_io_tokens_exhausted_duration_kv + description: Total duration when Elastic IO tokens were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_available.kv + exported_name: admission_granter_io_tokens_available_kv + description: Number of tokens available + y_axis_label: Tokens + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.io_tokens_bypassed.kv + exported_name: admission_granter_io_tokens_bypassed_kv + description: Total number of tokens taken by work bypassing admission control (for example, follower writes without flow control) + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_exhausted_duration.kv + exported_name: admission_granter_io_tokens_exhausted_duration_kv + description: Total duration when IO tokens were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_returned.kv + exported_name: admission_granter_io_tokens_returned_kv + description: Total number of tokens returned + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.io_tokens_taken.kv + exported_name: admission_granter_io_tokens_taken_kv + description: Total number of tokens taken + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slot_adjuster_decrements.kv + exported_name: admission_granter_slot_adjuster_decrements_kv + description: Number of decrements of the total KV slots + y_axis_label: Slots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slot_adjuster_increments.kv + exported_name: admission_granter_slot_adjuster_increments_kv + description: Number of increments of the total KV slots + y_axis_label: Slots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.slots_exhausted_duration.kv + exported_name: admission_granter_slots_exhausted_duration_kv + description: Total duration when KV slots were exhausted, in micros + y_axis_label: Microseconds + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.granter.total_slots.kv + exported_name: admission_granter_total_slots_kv + description: Total slots for kv work + y_axis_label: Slots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.granter.used_slots.kv + exported_name: admission_granter_used_slots_kv + description: Used slots + y_axis_label: Slots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.l0_compacted_bytes.kv + exported_name: admission_l0_compacted_bytes_kv + description: Total bytes compacted out of L0 (used to generate IO tokens) + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.l0_tokens_produced.kv + exported_name: admission_l0_tokens_produced_kv + description: Total bytes produced for L0 writes + y_axis_label: Tokens + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.raft.paused_replicas + exported_name: admission_raft_paused_replicas + description: |- + Number of followers (i.e. Replicas) to which replication is currently paused to help them recover from I/O overload. + + Such Replicas will be ignored for the purposes of proposal quota, and will not + receive replication traffic. They are essentially treated as offline for the + purpose of replication. This serves as a crude form of admission control. + + The count is emitted by the leaseholder of each range. + y_axis_label: Followers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.raft.paused_replicas_dropped_msgs + exported_name: admission_raft_paused_replicas_dropped_msgs + description: |- + Number of messages dropped instead of being sent to paused replicas. + + The messages are dropped to help these replicas to recover from I/O overload. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu + exported_name: admission_requested_elastic_cpu + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu.bulk-normal-pri + exported_name: admission_requested_elastic_cpu_bulk_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-cpu.normal-pri + exported_name: admission_requested_elastic_cpu_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores + exported_name: admission_requested_elastic_stores + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores.bulk-low-pri + exported_name: admission_requested_elastic_stores_bulk_low_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.elastic-stores.bulk-normal-pri + exported_name: admission_requested_elastic_stores_bulk_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv + exported_name: admission_requested_kv + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores + exported_name: admission_requested_kv_stores + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.high-pri + exported_name: admission_requested_kv_stores_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.locking-normal-pri + exported_name: admission_requested_kv_stores_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.normal-pri + exported_name: admission_requested_kv_stores_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv-stores.user-high-pri + exported_name: admission_requested_kv_stores_user_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.high-pri + exported_name: admission_requested_kv_high_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.locking-normal-pri + exported_name: admission_requested_kv_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.kv.normal-pri + exported_name: admission_requested_kv_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response + exported_name: admission_requested_sql_kv_response + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response.locking-normal-pri + exported_name: admission_requested_sql_kv_response_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-kv-response.normal-pri + exported_name: admission_requested_sql_kv_response_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response + exported_name: admission_requested_sql_sql_response + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response.locking-normal-pri + exported_name: admission_requested_sql_sql_response_locking_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.requested.sql-sql-response.normal-pri + exported_name: admission_requested_sql_sql_response_normal_pri + description: Number of requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: admission.scheduler_latency_listener.p99_nanos + exported_name: admission_scheduler_latency_listener_p99_nanos + description: The scheduling latency at p99 as observed by the scheduler latency listener + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu + exported_name: admission_wait_durations_elastic_cpu + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu.bulk-normal-pri + exported_name: admission_wait_durations_elastic_cpu_bulk_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-cpu.normal-pri + exported_name: admission_wait_durations_elastic_cpu_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores + exported_name: admission_wait_durations_elastic_stores + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores.bulk-low-pri + exported_name: admission_wait_durations_elastic_stores_bulk_low_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.elastic-stores.bulk-normal-pri + exported_name: admission_wait_durations_elastic_stores_bulk_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.high-pri + exported_name: admission_wait_durations_kv_stores_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.locking-normal-pri + exported_name: admission_wait_durations_kv_stores_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.normal-pri + exported_name: admission_wait_durations_kv_stores_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv-stores.user-high-pri + exported_name: admission_wait_durations_kv_stores_user_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.high-pri + exported_name: admission_wait_durations_kv_high_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.locking-normal-pri + exported_name: admission_wait_durations_kv_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.kv.normal-pri + exported_name: admission_wait_durations_kv_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.snapshot_ingest + exported_name: admission_wait_durations_snapshot_ingest + description: Wait time for snapshot ingest requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response + exported_name: admission_wait_durations_sql_kv_response + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response.locking-normal-pri + exported_name: admission_wait_durations_sql_kv_response_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-kv-response.normal-pri + exported_name: admission_wait_durations_sql_kv_response_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response + exported_name: admission_wait_durations_sql_sql_response + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response.locking-normal-pri + exported_name: admission_wait_durations_sql_sql_response_locking_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_durations.sql-sql-response.normal-pri + exported_name: admission_wait_durations_sql_sql_response_normal_pri + description: Wait time durations for requests that waited + y_axis_label: Wait time Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu + exported_name: admission_wait_queue_length_elastic_cpu + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu.bulk-normal-pri + exported_name: admission_wait_queue_length_elastic_cpu_bulk_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-cpu.normal-pri + exported_name: admission_wait_queue_length_elastic_cpu_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores + exported_name: admission_wait_queue_length_elastic_stores + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores.bulk-low-pri + exported_name: admission_wait_queue_length_elastic_stores_bulk_low_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.elastic-stores.bulk-normal-pri + exported_name: admission_wait_queue_length_elastic_stores_bulk_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv + exported_name: admission_wait_queue_length_kv + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores + exported_name: admission_wait_queue_length_kv_stores + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.high-pri + exported_name: admission_wait_queue_length_kv_stores_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.locking-normal-pri + exported_name: admission_wait_queue_length_kv_stores_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.normal-pri + exported_name: admission_wait_queue_length_kv_stores_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv-stores.user-high-pri + exported_name: admission_wait_queue_length_kv_stores_user_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.high-pri + exported_name: admission_wait_queue_length_kv_high_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.locking-normal-pri + exported_name: admission_wait_queue_length_kv_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.kv.normal-pri + exported_name: admission_wait_queue_length_kv_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response + exported_name: admission_wait_queue_length_sql_kv_response + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response.locking-normal-pri + exported_name: admission_wait_queue_length_sql_kv_response_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-kv-response.normal-pri + exported_name: admission_wait_queue_length_sql_kv_response_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response + exported_name: admission_wait_queue_length_sql_sql_response + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response.locking-normal-pri + exported_name: admission_wait_queue_length_sql_sql_response_locking_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: admission.wait_queue_length.sql-sql-response.normal-pri + exported_name: admission_wait_queue_length_sql_sql_response_normal_pri + description: Length of wait queue + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: batch_requests.bytes + exported_name: batch_requests_bytes + description: Total byte count of batch requests processed + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_requests.cross_region.bytes + exported_name: batch_requests_cross_region_bytes + description: "Total byte count of batch requests processed cross region when region\n\t\ttiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_requests.cross_zone.bytes + exported_name: batch_requests_cross_zone_bytes + description: "Total bytes of batch requests processed cross zones within the same\n\t\tregion when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.bytes + exported_name: batch_responses_bytes + description: Total byte count of batch responses received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.cross_region.bytes + exported_name: batch_responses_cross_region_bytes + description: "Total byte count of batch responses received cross region when region\n\t\ttiers are configured" + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: batch_responses.cross_zone.bytes + exported_name: batch_responses_cross_zone_bytes + description: "Total bytes of batch responses received cross zones within the same\n\t\tregion when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: capacity.reserved + exported_name: capacity_reserved + description: Capacity reserved for snapshots + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: exec.error + exported_name: exec_error + description: |- + Number of batch KV requests that failed to execute on this node. + + This count excludes transaction restart/abort errors. However, it will include + other errors expected during normal operation, such as ConditionFailedError. + This metric is thus not an indicator of KV health. + y_axis_label: Batch KV Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: exec.latency + exported_name: exec_latency + description: | + Latency of batch KV requests (including errors) executed on this node. + + This measures requests already addressed to a single replica, from the moment + at which they arrive at the internal gRPC endpoint to the moment at which the + response (or an error) is returned. + + This latency includes in particular commit waits, conflict resolution and replication, + and end-users can easily produce high measurements via long-running transactions that + conflict with foreground traffic. This metric thus does not provide a good signal for + understanding the health of the KV layer. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: exec.success + exported_name: exec_success + description: | + Number of batch KV requests executed successfully on this node. + + A request is considered to have executed 'successfully' if it either returns a result + or a transaction restart/abort error. + y_axis_label: Batch KV Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: exportrequest.delay.total + exported_name: exportrequest_delay_total + description: Amount by which evaluation of Export requests was delayed + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: follower_reads.success_count + exported_name: follower_reads_success_count + description: Number of reads successfully processed by any replica + y_axis_label: Read Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gcbytesage + exported_name: gcbytesage + description: Cumulative age of non-live data + y_axis_label: Age + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: gossip.bytes.received + exported_name: gossip_bytes_received + description: Number of received gossip bytes + y_axis_label: Gossip Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.bytes.sent + exported_name: gossip_bytes_sent + description: Number of sent gossip bytes + y_axis_label: Gossip Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.callbacks.pending + exported_name: gossip_callbacks_pending + description: Number of gossip callbacks waiting to be processed + y_axis_label: Callbacks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.callbacks.pending_duration + exported_name: gossip_callbacks_pending_duration + description: Duration of gossip callback queueing to be processed + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: gossip.callbacks.processed + exported_name: gossip_callbacks_processed + description: Number of gossip callbacks processed + y_axis_label: Callbacks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.callbacks.processing_duration + exported_name: gossip_callbacks_processing_duration + description: Duration of gossip callback processing + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: gossip.connections.incoming + exported_name: gossip_connections_incoming + description: Number of active incoming gossip connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.connections.outgoing + exported_name: gossip_connections_outgoing + description: Number of active outgoing gossip connections + y_axis_label: Connections + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: gossip.connections.refused + exported_name: gossip_connections_refused + description: Number of refused incoming gossip connections + y_axis_label: Connections + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.infos.received + exported_name: gossip_infos_received + description: Number of received gossip Info objects + y_axis_label: Infos + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.infos.sent + exported_name: gossip_infos_sent + description: Number of sent gossip Info objects + y_axis_label: Infos + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.messages.received + exported_name: gossip_messages_received + description: Number of received gossip messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: gossip.messages.sent + exported_name: gossip_messages_sent + description: Number of sent gossip messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentage + exported_name: intentage + description: Cumulative age of locks + y_axis_label: Age + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: intentbytes + exported_name: intentbytes + description: Number of bytes in intent KV pairs + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: intentcount + exported_name: intentcount + description: Count of intent keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: intentresolver.async.throttled + exported_name: intentresolver_async_throttled + description: Number of intent resolution attempts not run asynchronously due to throttling + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentresolver.finalized_txns.failed + exported_name: intentresolver_finalized_txns_failed + description: Number of finalized transaction cleanup failures. Transaction cleanup refers to the process of resolving all of a transactions intents and then garbage collecting its transaction record. + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intentresolver.intents.failed + exported_name: intentresolver_intents_failed + description: Number of intent resolution failures. The unit of measurement is a single intent, so if a batch of intent resolution requests fails, the metric will be incremented for each request in the batch. + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.abort-attempts + exported_name: intents_abort_attempts + description: Count of (point or range) non-poisoning intent abort evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.poison-attempts + exported_name: intents_poison_attempts + description: Count of (point or range) poisoning intent abort evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: intents.resolve-attempts + exported_name: intents_resolve_attempts + description: Count of (point or range) intent commit evaluation attempts + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: keybytes + exported_name: keybytes + description: Number of bytes taken up by keys + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: keycount + exported_name: keycount + description: Count of all keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.allocator.load_based_lease_transfers.cannot_find_better_candidate + exported_name: kv_allocator_load_based_lease_transfers_cannot_find_better_candidate + description: The number times the allocator determined that the lease was on the best possible replica + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.delta_not_significant + exported_name: kv_allocator_load_based_lease_transfers_delta_not_significant + description: The number times the allocator determined that the delta between the existing store and the best candidate was not significant + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.existing_not_overfull + exported_name: kv_allocator_load_based_lease_transfers_existing_not_overfull + description: The number times the allocator determined that the lease was not on an overfull store + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.follow_the_workload + exported_name: kv_allocator_load_based_lease_transfers_follow_the_workload + description: The number times the allocator determined that the lease should be transferred to another replica for locality. + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.missing_stats_for_existing_stores + exported_name: kv_allocator_load_based_lease_transfers_missing_stats_for_existing_stores + description: The number times the allocator was missing qps stats for the leaseholder + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_lease_transfers.should_transfer + exported_name: kv_allocator_load_based_lease_transfers_should_transfer + description: The number times the allocator determined that the lease should be transferred to another replica for better load distribution + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.cannot_find_better_candidate + exported_name: kv_allocator_load_based_replica_rebalancing_cannot_find_better_candidate + description: The number times the allocator determined that the range was on the best possible stores + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.delta_not_significant + exported_name: kv_allocator_load_based_replica_rebalancing_delta_not_significant + description: The number times the allocator determined that the delta between an existing store and the best replacement candidate was not high enough + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.existing_not_overfull + exported_name: kv_allocator_load_based_replica_rebalancing_existing_not_overfull + description: The number times the allocator determined that none of the range's replicas were on overfull stores + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.missing_stats_for_existing_store + exported_name: kv_allocator_load_based_replica_rebalancing_missing_stats_for_existing_store + description: The number times the allocator was missing the qps stats for the existing store + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.allocator.load_based_replica_rebalancing.should_transfer + exported_name: kv_allocator_load_based_replica_rebalancing_should_transfer + description: The number times the allocator determined that the replica should be rebalanced to another store for better load distribution + y_axis_label: Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.closed_timestamp.max_behind_nanos + exported_name: kv_closed_timestamp_max_behind_nanos + description: Largest latency between realtime and replica max closed timestamp + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lag_by_cluster_setting + exported_name: kv_closed_timestamp_policy_lag_by_cluster_setting + description: Number of ranges with LAG_BY_CLUSTER_SETTING closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_equal_or_greater_than_300ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_equal_or_greater_than_300ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_EQUAL_OR_GREATER_THAN_300MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_100ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_100ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_100MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_120ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_120ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_120MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_140ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_140ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_140MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_160ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_160ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_160MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_180ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_180ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_180MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_200ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_200ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_200MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_20ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_20ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_20MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_220ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_220ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_220MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_240ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_240ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_240MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_260ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_260ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_260MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_280ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_280ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_280MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_300ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_300ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_300MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_40ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_40ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_40MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_60ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_60ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_60MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_latency_less_than_80ms + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_latency_less_than_80ms + description: Number of ranges with LEAD_FOR_GLOBAL_READS_LATENCY_LESS_THAN_80MS closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy.lead_for_global_reads_with_no_latency_info + exported_name: kv_closed_timestamp_policy_lead_for_global_reads_with_no_latency_info + description: Number of ranges with LEAD_FOR_GLOBAL_READS_WITH_NO_LATENCY_INFO closed timestamp policy + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.closed_timestamp.policy_change + exported_name: kv_closed_timestamp_policy_change + description: Number of times closed timestamp policy change occurred on ranges + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.closed_timestamp.policy_latency_info_missing + exported_name: kv_closed_timestamp_policy_latency_info_missing + description: Number of times closed timestamp policy refresh had to use hardcoded network RTT due to missing node latency info for one or more replicas + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.concurrency.avg_lock_hold_duration_nanos + exported_name: kv_concurrency_avg_lock_hold_duration_nanos + description: Average lock hold duration across locks currently held in lock tables. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.avg_lock_wait_duration_nanos + exported_name: kv_concurrency_avg_lock_wait_duration_nanos + description: Average lock wait duration across requests currently waiting in lock wait-queues + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.latch_conflict_wait_durations + exported_name: kv_concurrency_latch_conflict_wait_durations + description: Durations in nanoseconds spent on latch acquisition waiting for conflicts with other latches + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.lock_wait_queue_waiters + exported_name: kv_concurrency_lock_wait_queue_waiters + description: Number of requests actively waiting in a lock wait-queue + y_axis_label: Lock-Queue Waiters + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.locks + exported_name: kv_concurrency_locks + description: Number of active locks held in lock tables. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.locks_with_wait_queues + exported_name: kv_concurrency_locks_with_wait_queues + description: Number of active locks held in lock tables with active wait-queues + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_hold_duration_nanos + exported_name: kv_concurrency_max_lock_hold_duration_nanos + description: Maximum length of time any lock in a lock table is held. Does not include replicated locks (intents) that are not held in memory + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_wait_duration_nanos + exported_name: kv_concurrency_max_lock_wait_duration_nanos + description: Maximum lock wait duration across requests currently waiting in lock wait-queues + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.concurrency.max_lock_wait_queue_waiters_for_lock + exported_name: kv_concurrency_max_lock_wait_queue_waiters_for_lock + description: Maximum number of requests actively waiting in any single lock wait-queue + y_axis_label: Lock-Queue Waiters + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.loadsplitter.cleardirection + exported_name: kv_loadsplitter_cleardirection + description: Load-based splitter observed an access direction greater than 80% left or right in the samples. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.loadsplitter.nosplitkey + exported_name: kv_loadsplitter_nosplitkey + description: Load-based splitter could not find a split key. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.loadsplitter.popularkey + exported_name: kv_loadsplitter_popularkey + description: Load-based splitter could not find a split key and the most popular sampled split key occurs in >= 25% of the samples. + y_axis_label: Occurrences + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.planning_attempts + exported_name: kv_prober_planning_attempts + description: Number of attempts at planning out probes made; in order to probe KV we need to plan out which ranges to probe; + y_axis_label: Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.planning_failures + exported_name: kv_prober_planning_failures + description: Number of attempts at planning out probes that failed; in order to probe KV we need to plan out which ranges to probe; if planning fails, then kvprober is not able to send probes to all ranges; consider alerting on this metric as a result + y_axis_label: Runs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.attempts + exported_name: kv_prober_read_attempts + description: Number of attempts made to read probe KV, regardless of outcome + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.failures + exported_name: kv_prober_read_failures + description: Number of attempts made to read probe KV that failed, whether due to error or timeout + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.read.latency + exported_name: kv_prober_read_latency + description: Latency of successful KV read probes + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.prober.write.attempts + exported_name: kv_prober_write_attempts + description: Number of attempts made to write probe KV, regardless of outcome + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.write.failures + exported_name: kv_prober_write_failures + description: Number of attempts made to write probe KV that failed, whether due to error or timeout + y_axis_label: Queries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.prober.write.latency + exported_name: kv_prober_write_latency + description: Latency of successful KV write probes + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.prober.write.quarantine.oldest_duration + exported_name: kv_prober_write_quarantine_oldest_duration + description: The duration that the oldest range in the write quarantine pool has remained + y_axis_label: Seconds + type: GAUGE + unit: SECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.budget_allocation_blocked + exported_name: kv_rangefeed_budget_allocation_blocked + description: Number of times RangeFeed waited for budget availability + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.budget_allocation_failed + exported_name: kv_rangefeed_budget_allocation_failed + description: Number of times RangeFeed failed because memory budget was exceeded + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.buffered_registrations + exported_name: kv_rangefeed_buffered_registrations + description: Number of active RangeFeed buffered registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.buffered_sender.queue_size + exported_name: kv_rangefeed_buffered_sender_queue_size + description: Number of entries in the buffered sender queue + y_axis_label: Pending Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.catchup_scan_nanos + exported_name: kv_rangefeed_catchup_scan_nanos + description: Time spent in RangeFeed catchup scan + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.closed_timestamp.slow_ranges + exported_name: kv_rangefeed_closed_timestamp_slow_ranges + description: Number of ranges that have a closed timestamp lagging by more than 5x target lag. Periodically re-calculated + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.closed_timestamp.slow_ranges.cancelled + exported_name: kv_rangefeed_closed_timestamp_slow_ranges_cancelled + description: Number of rangefeeds that were cancelled due to a chronically lagging closed timestamp + y_axis_label: Cancellation Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.closed_timestamp_max_behind_nanos + exported_name: kv_rangefeed_closed_timestamp_max_behind_nanos + description: Largest latency between realtime and replica max closed timestamp for replicas that have active rangeeds on them + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mem_shared + exported_name: kv_rangefeed_mem_shared + description: Memory usage by rangefeeds + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mem_system + exported_name: kv_rangefeed_mem_system + description: Memory usage by rangefeeds on system ranges + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mux_stream_send.latency + exported_name: kv_rangefeed_mux_stream_send_latency + description: Latency of sending RangeFeed events to the client + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.mux_stream_send.slow_events + exported_name: kv_rangefeed_mux_stream_send_slow_events + description: Number of RangeFeed events that took longer than 10s to send to the client + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.output_loop_unbuffered_registration_nanos + exported_name: kv_rangefeed_output_loop_unbuffered_registration_nanos + description: Duration of the Rangefeed O(range) output loop goroutine. This is only applicable for unbuffered registrations since buffered registrations spawns long-living goroutines. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.processors_goroutine + exported_name: kv_rangefeed_processors_goroutine + description: Number of active RangeFeed processors using goroutines + y_axis_label: Processors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.processors_scheduler + exported_name: kv_rangefeed_processors_scheduler + description: Number of active RangeFeed processors using scheduler + y_axis_label: Processors + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.registrations + exported_name: kv_rangefeed_registrations + description: Number of active RangeFeed registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduled_processor.queue_timeout + exported_name: kv_rangefeed_scheduled_processor_queue_timeout + description: Number of times the RangeFeed processor shutdown because of a queue send timeout + y_axis_label: Failure Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.rangefeed.scheduler.normal.latency + exported_name: kv_rangefeed_scheduler_normal_latency + description: KV RangeFeed normal scheduler latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.normal.queue_size + exported_name: kv_rangefeed_scheduler_normal_queue_size + description: Number of entries in the KV RangeFeed normal scheduler queue + y_axis_label: Pending Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.system.latency + exported_name: kv_rangefeed_scheduler_system_latency + description: KV RangeFeed system scheduler latency + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.scheduler.system.queue_size + exported_name: kv_rangefeed_scheduler_system_queue_size + description: Number of entries in the KV RangeFeed system scheduler queue + y_axis_label: Pending Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.rangefeed.unbuffered_registrations + exported_name: kv_rangefeed_unbuffered_registrations + description: Number of active RangeFeed unbuffered registrations + y_axis_label: Registrations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.replica_circuit_breaker.num_tripped_events + exported_name: kv_replica_circuit_breaker_num_tripped_events + description: Number of times the per-Replica circuit breakers tripped since process start. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_circuit_breaker.num_tripped_replicas + exported_name: kv_replica_circuit_breaker_num_tripped_replicas + description: | + Number of Replicas for which the per-Replica circuit breaker is currently tripped. + + A nonzero value indicates range or replica unavailability, and should be investigated. + Replicas in this state will fail-fast all inbound requests. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.replica_read_batch_evaluate.dropped_latches_before_eval + exported_name: kv_replica_read_batch_evaluate_dropped_latches_before_eval + description: Number of times read-only batches dropped latches before evaluation. + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_read_batch_evaluate.latency + exported_name: kv_replica_read_batch_evaluate_latency + description: |- + Execution duration for evaluating a BatchRequest on the read-only path after latches have been acquired. + + A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.replica_read_batch_evaluate.without_interleaving_iter + exported_name: kv_replica_read_batch_evaluate_without_interleaving_iter + description: Number of read-only batches evaluated without an intent interleaving iter. + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.replica_write_batch_evaluate.latency + exported_name: kv_replica_write_batch_evaluate_latency + description: |- + Execution duration for evaluating a BatchRequest on the read-write path after latches have been acquired. + + A measurement is recorded regardless of outcome (i.e. also in case of an error). If internal retries occur, each instance is recorded separately. + Note that the measurement does not include the duration for replicating the evaluated command. + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kv.split.estimated_stats + exported_name: kv_split_estimated_stats + description: Number of splits that computed estimated MVCC stats. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.split.total_bytes_estimates + exported_name: kv_split_total_bytes_estimates + description: Number of total bytes difference between the pre-split and post-split MVCC stats. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.current_blocked + exported_name: kv_tenant_rate_limit_current_blocked + description: Number of requests currently blocked by the rate limiter + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.tenant_rate_limit.num_tenants + exported_name: kv_tenant_rate_limit_num_tenants + description: Number of tenants currently being tracked + y_axis_label: Tenants + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kv.tenant_rate_limit.read_batches_admitted + exported_name: kv_tenant_rate_limit_read_batches_admitted + description: Number of read batches admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.read_bytes_admitted + exported_name: kv_tenant_rate_limit_read_bytes_admitted + description: Number of read bytes admitted by the rate limiter + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.read_requests_admitted + exported_name: kv_tenant_rate_limit_read_requests_admitted + description: Number of read requests admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_batches_admitted + exported_name: kv_tenant_rate_limit_write_batches_admitted + description: Number of write batches admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_bytes_admitted + exported_name: kv_tenant_rate_limit_write_bytes_admitted + description: Number of write bytes admitted by the rate limiter + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kv.tenant_rate_limit.write_requests_admitted + exported_name: kv_tenant_rate_limit_write_requests_admitted + description: Number of write requests admitted by the rate limiter + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.duration + exported_name: kvflowcontrol_eval_wait_elastic_duration + description: Latency histogram for time elastic requests spent waiting for flow tokens to evaluate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.elastic.requests.admitted + exported_name: kvflowcontrol_eval_wait_elastic_requests_admitted + description: Number of elastic requests admitted by the flow controller + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.bypassed + exported_name: kvflowcontrol_eval_wait_elastic_requests_bypassed + description: Number of waiting elastic requests that bypassed the flow controller due the evaluating replica not being the leader + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.errored + exported_name: kvflowcontrol_eval_wait_elastic_requests_errored + description: Number of elastic requests that errored out while waiting for flow tokens + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.elastic.requests.waiting + exported_name: kvflowcontrol_eval_wait_elastic_requests_waiting + description: Number of elastic requests waiting for flow tokens + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.regular.duration + exported_name: kvflowcontrol_eval_wait_regular_duration + description: Latency histogram for time regular requests spent waiting for flow tokens to evaluate + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.eval_wait.regular.requests.admitted + exported_name: kvflowcontrol_eval_wait_regular_requests_admitted + description: Number of regular requests admitted by the flow controller + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.bypassed + exported_name: kvflowcontrol_eval_wait_regular_requests_bypassed + description: Number of waiting regular requests that bypassed the flow controller due the evaluating replica not being the leader + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.errored + exported_name: kvflowcontrol_eval_wait_regular_requests_errored + description: Number of regular requests that errored out while waiting for flow tokens + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.eval_wait.regular.requests.waiting + exported_name: kvflowcontrol_eval_wait_regular_requests_waiting + description: Number of regular requests waiting for flow tokens + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.range_controller.count + exported_name: kvflowcontrol_range_controller_count + description: Gauge of range flow controllers currently open, this should align with the number of leaders + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.bytes + exported_name: kvflowcontrol_send_queue_bytes + description: Byte size of all raft entries queued for sending to followers, waiting on available elastic send tokens + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.count + exported_name: kvflowcontrol_send_queue_count + description: Count of all raft entries queued for sending to followers, waiting on available elastic send tokens + y_axis_label: Bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.prevent.count + exported_name: kvflowcontrol_send_queue_prevent_count + description: Counter of replication streams that were prevented from forming a send queue + y_axis_label: Preventions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.send_queue.scheduled.deducted_bytes + exported_name: kvflowcontrol_send_queue_scheduled_deducted_bytes + description: Gauge of elastic send token bytes already deducted by replication streams waiting on the scheduler + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.send_queue.scheduled.force_flush + exported_name: kvflowcontrol_send_queue_scheduled_force_flush + description: Gauge of replication streams scheduled to force flush their send queue + y_axis_label: Scheduled force flushes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.elastic.blocked_count + exported_name: kvflowcontrol_streams_eval_elastic_blocked_count + description: Number of eval replication streams with no flow tokens available for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.elastic.total_count + exported_name: kvflowcontrol_streams_eval_elastic_total_count + description: Total number of eval replication streams for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.regular.blocked_count + exported_name: kvflowcontrol_streams_eval_regular_blocked_count + description: Number of eval replication streams with no flow tokens available for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.eval.regular.total_count + exported_name: kvflowcontrol_streams_eval_regular_total_count + description: Total number of eval replication streams for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.elastic.blocked_count + exported_name: kvflowcontrol_streams_send_elastic_blocked_count + description: Number of send replication streams with no flow tokens available for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.elastic.total_count + exported_name: kvflowcontrol_streams_send_elastic_total_count + description: Total number of send replication streams for elastic requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.regular.blocked_count + exported_name: kvflowcontrol_streams_send_regular_blocked_count + description: Number of send replication streams with no flow tokens available for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.streams.send.regular.total_count + exported_name: kvflowcontrol_streams_send_regular_total_count + description: Total number of send replication streams for regular requests + y_axis_label: Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.elastic.available + exported_name: kvflowcontrol_tokens_eval_elastic_available + description: Flow eval tokens available for elastic requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.elastic.deducted + exported_name: kvflowcontrol_tokens_eval_elastic_deducted + description: Flow eval tokens deducted by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.returned + exported_name: kvflowcontrol_tokens_eval_elastic_returned + description: Flow eval tokens returned by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.returned.disconnect + exported_name: kvflowcontrol_tokens_eval_elastic_returned_disconnect + description: Flow eval tokens returned early by elastic due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.elastic.unaccounted + exported_name: kvflowcontrol_tokens_eval_elastic_unaccounted + description: Flow eval tokens returned by elastic requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.available + exported_name: kvflowcontrol_tokens_eval_regular_available + description: Flow eval tokens available for regular requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.eval.regular.deducted + exported_name: kvflowcontrol_tokens_eval_regular_deducted + description: Flow eval tokens deducted by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.returned + exported_name: kvflowcontrol_tokens_eval_regular_returned + description: Flow eval tokens returned by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.returned.disconnect + exported_name: kvflowcontrol_tokens_eval_regular_returned_disconnect + description: Flow eval tokens returned early by regular due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.eval.regular.unaccounted + exported_name: kvflowcontrol_tokens_eval_regular_unaccounted + description: Flow eval tokens returned by regular requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.available + exported_name: kvflowcontrol_tokens_send_elastic_available + description: Flow send tokens available for elastic requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.send.elastic.deducted + exported_name: kvflowcontrol_tokens_send_elastic_deducted + description: Flow send tokens deducted by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.deducted.force_flush_send_queue + exported_name: kvflowcontrol_tokens_send_elastic_deducted_force_flush_send_queue + description: Flow send tokens deducted by elastic requests, across all replication streams due to force flushing the stream's send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.deducted.prevent_send_queue + exported_name: kvflowcontrol_tokens_send_elastic_deducted_prevent_send_queue + description: Flow send tokens deducted by elastic requests, across all replication streams to prevent forming a send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.returned + exported_name: kvflowcontrol_tokens_send_elastic_returned + description: Flow send tokens returned by elastic requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.returned.disconnect + exported_name: kvflowcontrol_tokens_send_elastic_returned_disconnect + description: Flow send tokens returned early by elastic due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.elastic.unaccounted + exported_name: kvflowcontrol_tokens_send_elastic_unaccounted + description: Flow send tokens returned by elastic requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.available + exported_name: kvflowcontrol_tokens_send_regular_available + description: Flow send tokens available for regular requests, across all replication streams + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: kvflowcontrol.tokens.send.regular.deducted + exported_name: kvflowcontrol_tokens_send_regular_deducted + description: Flow send tokens deducted by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.deducted.prevent_send_queue + exported_name: kvflowcontrol_tokens_send_regular_deducted_prevent_send_queue + description: Flow send tokens deducted by regular requests, across all replication streams to prevent forming a send queue + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.returned + exported_name: kvflowcontrol_tokens_send_regular_returned + description: Flow send tokens returned by regular requests, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.returned.disconnect + exported_name: kvflowcontrol_tokens_send_regular_returned_disconnect + description: Flow send tokens returned early by regular due disconnects, across all replication stream, this is a subset of returned tokens + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: kvflowcontrol.tokens.send.regular.unaccounted + exported_name: kvflowcontrol_tokens_send_regular_unaccounted + description: Flow send tokens returned by regular requests that were unaccounted for, across all replication streams + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.epoch + exported_name: leases_epoch + description: Number of replica leaseholders using epoch-based leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.error + exported_name: leases_error + description: Number of failed lease requests + y_axis_label: Lease Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.expiration + exported_name: leases_expiration + description: Number of replica leaseholders using expiration-based leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.leader + exported_name: leases_leader + description: Number of replica leaseholders using leader leases + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.liveness + exported_name: leases_liveness + description: Number of replica leaseholders for the liveness range(s) + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.preferences.less-preferred + exported_name: leases_preferences_less_preferred + description: Number of replica leaseholders which satisfy a lease preference which is not the most preferred + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.preferences.violating + exported_name: leases_preferences_violating + description: Number of replica leaseholders which violate lease preferences + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: leases.requests.latency + exported_name: leases_requests_latency + description: Lease request latency (all types and outcomes, coalesced) + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: leases.success + exported_name: leases_success + description: Number of successful lease requests + y_axis_label: Lease Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.transfers.error + exported_name: leases_transfers_error + description: Number of failed lease transfers + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: leases.transfers.locks_written + exported_name: leases_transfers_locks_written + description: Number of locks written to storage during lease transfers + y_axis_label: Locks Written + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: livebytes + exported_name: livebytes + description: Number of bytes of live data (keys plus values) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: livecount + exported_name: livecount + description: Count of live keys + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: liveness.epochincrements + exported_name: liveness_epochincrements + description: Number of times this node has incremented its liveness epoch + y_axis_label: Epochs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: liveness.heartbeatfailures + exported_name: liveness_heartbeatfailures + description: Number of failed node liveness heartbeats from this node + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: liveness.heartbeatsinflight + exported_name: liveness_heartbeatsinflight + description: Number of in-flight liveness heartbeats from this node + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: liveness.heartbeatsuccesses + exported_name: liveness_heartbeatsuccesses + description: Number of successful node liveness heartbeats from this node + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: lockbytes + exported_name: lockbytes + description: Number of bytes taken up by replicated lock key-values (shared and exclusive strength, not intent strength) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: lockcount + exported_name: lockcount + description: Count of replicated locks (shared, exclusive, and intent strength) + y_axis_label: Locks + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: node-id + exported_name: node_id + description: node ID with labels for advertised RPC and HTTP addresses + y_axis_label: Node ID + type: GAUGE + unit: CONST + aggregation: AVG + derivative: NONE + - name: queue.consistency.pending + exported_name: queue_consistency_pending + description: Number of pending replicas in the consistency checker queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.consistency.process.failure + exported_name: queue_consistency_process_failure + description: Number of replicas which failed processing in the consistency checker queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.consistency.process.success + exported_name: queue_consistency_process_success + description: Number of replicas successfully processed by the consistency checker queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.consistency.processingnanos + exported_name: queue_consistency_processingnanos + description: Nanoseconds spent processing replicas in the consistency checker queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspanconsidered + exported_name: queue_gc_info_abortspanconsidered + description: Number of AbortSpan entries old enough to be considered for removal + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspangcnum + exported_name: queue_gc_info_abortspangcnum + description: Number of AbortSpan entries fit for removal + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.abortspanscanned + exported_name: queue_gc_info_abortspanscanned + description: Number of transactions present in the AbortSpan scanned from the engine + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.clearrangefailed + exported_name: queue_gc_info_clearrangefailed + description: Number of failed ClearRange operations during GC + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.clearrangesuccess + exported_name: queue_gc_info_clearrangesuccess + description: Number of successful ClearRange operations during GC + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.enqueuehighpriority + exported_name: queue_gc_info_enqueuehighpriority + description: Number of replicas enqueued for GC with high priority + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.intentsconsidered + exported_name: queue_gc_info_intentsconsidered + description: Number of 'old' intents + y_axis_label: Intents + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.intenttxns + exported_name: queue_gc_info_intenttxns + description: Number of associated distinct transactions + y_axis_label: Txns + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.numkeysaffected + exported_name: queue_gc_info_numkeysaffected + description: Number of keys with GC'able data + y_axis_label: Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.numrangekeysaffected + exported_name: queue_gc_info_numrangekeysaffected + description: Number of range keys GC'able + y_axis_label: Range Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.pushtxn + exported_name: queue_gc_info_pushtxn + description: Number of attempted pushes + y_axis_label: Pushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvefailed + exported_name: queue_gc_info_resolvefailed + description: Number of cleanup intent failures during GC + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvesuccess + exported_name: queue_gc_info_resolvesuccess + description: Number of successful intent resolutions + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.resolvetotal + exported_name: queue_gc_info_resolvetotal + description: Number of attempted intent resolutions + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionresolvefailed + exported_name: queue_gc_info_transactionresolvefailed + description: Number of intent cleanup failures for local transactions during GC + y_axis_label: Intent Resolutions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcaborted + exported_name: queue_gc_info_transactionspangcaborted + description: Number of GC'able entries corresponding to aborted txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangccommitted + exported_name: queue_gc_info_transactionspangccommitted + description: Number of GC'able entries corresponding to committed txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcpending + exported_name: queue_gc_info_transactionspangcpending + description: Number of GC'able entries corresponding to pending txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcprepared + exported_name: queue_gc_info_transactionspangcprepared + description: Number of GC'able entries corresponding to prepared txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspangcstaging + exported_name: queue_gc_info_transactionspangcstaging + description: Number of GC'able entries corresponding to staging txns + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.info.transactionspanscanned + exported_name: queue_gc_info_transactionspanscanned + description: Number of entries in transaction spans scanned from the engine + y_axis_label: Txn Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.pending + exported_name: queue_gc_pending + description: Number of pending replicas in the MVCC GC queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.gc.process.failure + exported_name: queue_gc_process_failure + description: Number of replicas which failed processing in the MVCC GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.process.success + exported_name: queue_gc_process_success + description: Number of replicas successfully processed by the MVCC GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.gc.processingnanos + exported_name: queue_gc_processingnanos + description: Nanoseconds spent processing replicas in the MVCC GC queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.pending + exported_name: queue_lease_pending + description: Number of pending replicas in the replica lease queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.lease.process.failure + exported_name: queue_lease_process_failure + description: Number of replicas which failed processing in the replica lease queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.process.success + exported_name: queue_lease_process_success + description: Number of replicas successfully processed by the replica lease queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.processingnanos + exported_name: queue_lease_processingnanos + description: Nanoseconds spent processing replicas in the replica lease queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.lease.purgatory + exported_name: queue_lease_purgatory + description: Number of replicas in the lease queue's purgatory, awaiting lease transfer operations + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.merge.pending + exported_name: queue_merge_pending + description: Number of pending replicas in the merge queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.merge.process.failure + exported_name: queue_merge_process_failure + description: Number of replicas which failed processing in the merge queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.process.success + exported_name: queue_merge_process_success + description: Number of replicas successfully processed by the merge queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.processingnanos + exported_name: queue_merge_processingnanos + description: Nanoseconds spent processing replicas in the merge queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.merge.purgatory + exported_name: queue_merge_purgatory + description: Number of replicas in the merge queue's purgatory, waiting to become mergeable + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftlog.pending + exported_name: queue_raftlog_pending + description: Number of pending replicas in the Raft log queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftlog.process.failure + exported_name: queue_raftlog_process_failure + description: Number of replicas which failed processing in the Raft log queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftlog.process.success + exported_name: queue_raftlog_process_success + description: Number of replicas successfully processed by the Raft log queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftlog.processingnanos + exported_name: queue_raftlog_processingnanos + description: Nanoseconds spent processing replicas in the Raft log queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.pending + exported_name: queue_raftsnapshot_pending + description: Number of pending replicas in the Raft repair queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.raftsnapshot.process.failure + exported_name: queue_raftsnapshot_process_failure + description: Number of replicas which failed processing in the Raft repair queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.process.success + exported_name: queue_raftsnapshot_process_success + description: Number of replicas successfully processed by the Raft repair queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.raftsnapshot.processingnanos + exported_name: queue_raftsnapshot_processingnanos + description: Nanoseconds spent processing replicas in the Raft repair queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.pending + exported_name: queue_replicagc_pending + description: Number of pending replicas in the replica GC queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicagc.process.failure + exported_name: queue_replicagc_process_failure + description: Number of replicas which failed processing in the replica GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.process.success + exported_name: queue_replicagc_process_success + description: Number of replicas successfully processed by the replica GC queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.processingnanos + exported_name: queue_replicagc_processingnanos + description: Nanoseconds spent processing replicas in the replica GC queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicagc.removereplica + exported_name: queue_replicagc_removereplica + description: Number of replica removals attempted by the replica GC queue + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addnonvoterreplica + exported_name: queue_replicate_addnonvoterreplica + description: Number of non-voter replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica + exported_name: queue_replicate_addreplica + description: Number of replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica.error + exported_name: queue_replicate_addreplica_error + description: Number of failed replica additions processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addreplica.success + exported_name: queue_replicate_addreplica_success + description: Number of successful replica additions processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.addvoterreplica + exported_name: queue_replicate_addvoterreplica + description: Number of voter replica additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.nonvoterpromotions + exported_name: queue_replicate_nonvoterpromotions + description: Number of non-voters promoted to voters by the replicate queue + y_axis_label: Promotions of Non Voters to Voters + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.pending + exported_name: queue_replicate_pending + description: Number of pending replicas in the replicate queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicate.process.failure + exported_name: queue_replicate_process_failure + description: Number of replicas which failed processing in the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.process.success + exported_name: queue_replicate_process_success + description: Number of replicas successfully processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.processingnanos + exported_name: queue_replicate_processingnanos + description: Nanoseconds spent processing replicas in the replicate queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.purgatory + exported_name: queue_replicate_purgatory + description: Number of replicas in the replicate queue's purgatory, awaiting allocation options + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.replicate.rebalancenonvoterreplica + exported_name: queue_replicate_rebalancenonvoterreplica + description: Number of non-voter replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.rebalancereplica + exported_name: queue_replicate_rebalancereplica + description: Number of replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.rebalancevoterreplica + exported_name: queue_replicate_rebalancevoterreplica + description: Number of voter replica rebalancer-initiated additions attempted by the replicate queue + y_axis_label: Replica Additions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadnonvoterreplica + exported_name: queue_replicate_removedeadnonvoterreplica + description: Number of dead non-voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica + exported_name: queue_replicate_removedeadreplica + description: Number of dead replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica.error + exported_name: queue_replicate_removedeadreplica_error + description: Number of failed dead replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadreplica.success + exported_name: queue_replicate_removedeadreplica_success + description: Number of successful dead replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedeadvoterreplica + exported_name: queue_replicate_removedeadvoterreplica + description: Number of dead voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningnonvoterreplica + exported_name: queue_replicate_removedecommissioningnonvoterreplica + description: Number of decommissioning non-voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica + exported_name: queue_replicate_removedecommissioningreplica + description: Number of decommissioning replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica.error + exported_name: queue_replicate_removedecommissioningreplica_error + description: Number of failed decommissioning replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningreplica.success + exported_name: queue_replicate_removedecommissioningreplica_success + description: Number of successful decommissioning replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removedecommissioningvoterreplica + exported_name: queue_replicate_removedecommissioningvoterreplica + description: Number of decommissioning voter replica removals attempted by the replicate queue (typically in response to a node outage) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removelearnerreplica + exported_name: queue_replicate_removelearnerreplica + description: Number of learner replica removals attempted by the replicate queue (typically due to internal race conditions) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removenonvoterreplica + exported_name: queue_replicate_removenonvoterreplica + description: Number of non-voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica + exported_name: queue_replicate_removereplica + description: Number of replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica.error + exported_name: queue_replicate_removereplica_error + description: Number of failed replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removereplica.success + exported_name: queue_replicate_removereplica_success + description: Number of successful replica removals processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.removevoterreplica + exported_name: queue_replicate_removevoterreplica + description: Number of voter replica removals attempted by the replicate queue (typically in response to a rebalancer-initiated addition) + y_axis_label: Replica Removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedeadreplica.error + exported_name: queue_replicate_replacedeadreplica_error + description: Number of failed dead replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedeadreplica.success + exported_name: queue_replicate_replacedeadreplica_success + description: Number of successful dead replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.replacedecommissioningreplica.success + exported_name: queue_replicate_replacedecommissioningreplica_success + description: Number of successful decommissioning replica replacements processed by the replicate queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.transferlease + exported_name: queue_replicate_transferlease + description: Number of range lease transfers attempted by the replicate queue + y_axis_label: Lease Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.replicate.voterdemotions + exported_name: queue_replicate_voterdemotions + description: Number of voters demoted to non-voters by the replicate queue + y_axis_label: Demotions of Voters to Non Voters + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.load_based + exported_name: queue_split_load_based + description: Number of range splits due to a range being greater than the configured max range load + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.pending + exported_name: queue_split_pending + description: Number of pending replicas in the split queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.split.process.failure + exported_name: queue_split_process_failure + description: Number of replicas which failed processing in the split queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.process.success + exported_name: queue_split_process_success + description: Number of replicas successfully processed by the split queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.processingnanos + exported_name: queue_split_processingnanos + description: Nanoseconds spent processing replicas in the split queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.purgatory + exported_name: queue_split_purgatory + description: Number of replicas in the split queue's purgatory, waiting to become splittable + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.split.size_based + exported_name: queue_split_size_based + description: Number of range splits due to a range being greater than the configured max range size + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.split.span_config_based + exported_name: queue_split_span_config_based + description: Number of range splits due to span configuration + y_axis_label: Range Splits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.pending + exported_name: queue_tsmaintenance_pending + description: Number of pending replicas in the time series maintenance queue + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: queue.tsmaintenance.process.failure + exported_name: queue_tsmaintenance_process_failure + description: Number of replicas which failed processing in the time series maintenance queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.process.success + exported_name: queue_tsmaintenance_process_success + description: Number of replicas successfully processed by the time series maintenance queue + y_axis_label: Replicas + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: queue.tsmaintenance.processingnanos + exported_name: queue_tsmaintenance_processingnanos + description: Nanoseconds spent processing replicas in the time series maintenance queue + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.pending + exported_name: raft_commands_pending + description: |- + Number of Raft commands proposed and pending. + + The number of Raft commands that the leaseholders are tracking as in-flight. + These commands will be periodically reproposed until they are applied or until + they fail, either unequivocally or ambiguously. + y_axis_label: Commands + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.commands.proposed + exported_name: raft_commands_proposed + description: |- + Number of Raft commands proposed. + + The number of proposals and all kinds of reproposals made by leaseholders. This + metric approximates the number of commands submitted through Raft. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.reproposed.new-lai + exported_name: raft_commands_reproposed_new_lai + description: |- + Number of Raft commands re-proposed with a newer LAI. + + The number of Raft commands that leaseholders re-proposed with a modified LAI. + Such re-proposals happen for commands that are committed to Raft out of intended + order, and hence can not be applied as is. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commands.reproposed.unchanged + exported_name: raft_commands_reproposed_unchanged + description: |- + Number of Raft commands re-proposed without modification. + + The number of Raft commands that leaseholders re-proposed without modification. + Such re-proposals happen for commands that are not committed/applied within a + timeout, and have a high chance of being dropped. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.commandsapplied + exported_name: raft_commandsapplied + description: |- + Number of Raft commands applied. + + This measurement is taken on the Raft apply loops of all Replicas (leaders and + followers alike), meaning that it does not measure the number of Raft commands + *proposed* (in the hypothetical extreme case, all Replicas may apply all commands + through snapshots, thus not increasing this metric at all). + Instead, it is a proxy for how much work is being done advancing the Replica + state machines on this node. + y_axis_label: Commands + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.dropped + exported_name: raft_dropped + description: Number of Raft proposals dropped (this counts individial raftpb.Entry, not raftpb.MsgProp) + y_axis_label: Proposals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.dropped_leader + exported_name: raft_dropped_leader + description: Number of Raft proposals dropped by a Replica that believes itself to be the leader; each update also increments `raft.dropped` (this counts individial raftpb.Entry, not raftpb.MsgProp) + y_axis_label: Proposals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.accesses + exported_name: raft_entrycache_accesses + description: Number of cache lookups in the Raft entry cache + y_axis_label: Accesses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.bytes + exported_name: raft_entrycache_bytes + description: Aggregate size of all Raft entries in the Raft entry cache + y_axis_label: Entry Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.entrycache.hits + exported_name: raft_entrycache_hits + description: Number of successful cache lookups in the Raft entry cache + y_axis_label: Hits + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.read_bytes + exported_name: raft_entrycache_read_bytes + description: Counter of bytes in entries returned from the Raft entry cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.entrycache.size + exported_name: raft_entrycache_size + description: Number of Raft entries in the Raft entry cache + y_axis_label: Entry Count + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.entered.state_probe + exported_name: raft_flows_entered_state_probe + description: The number of leader->peer flows transitioned to StateProbe + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.entered.state_replicate + exported_name: raft_flows_entered_state_replicate + description: The number of leader->peer flows transitioned to StateReplicate + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.entered.state_snapshot + exported_name: raft_flows_entered_state_snapshot + description: The number of of leader->peer flows transitioned to StateSnapshot + y_axis_label: Flows + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.flows.state_probe + exported_name: raft_flows_state_probe + description: Number of leader->peer flows in StateProbe + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.state_replicate + exported_name: raft_flows_state_replicate + description: Number of leader->peer flows in StateReplicate + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.flows.state_snapshot + exported_name: raft_flows_state_snapshot + description: Number of leader->peer flows in StateSnapshot + y_axis_label: Flows + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.fortification.skipped_no_support + exported_name: raft_fortification_skipped_no_support + description: The number of fortification requests that were skipped (not sent) due to lack of store liveness support + y_axis_label: Skipped Fortifications + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.fortification_resp.accepted + exported_name: raft_fortification_resp_accepted + description: The number of accepted fortification responses. Calculated on the raft leader + y_axis_label: Accepted Fortification Responses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.fortification_resp.rejected + exported_name: raft_fortification_resp_rejected + description: The number of rejected fortification responses. Calculated on the raft leader + y_axis_label: Rejected Fortification Responses + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.heartbeats.pending + exported_name: raft_heartbeats_pending + description: Number of pending heartbeats and responses waiting to be coalesced + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.loaded_entries.bytes + exported_name: raft_loaded_entries_bytes + description: Bytes allocated by raft Storage.Entries calls that are still kept in memory + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.loaded_entries.reserved.bytes + exported_name: raft_loaded_entries_reserved_bytes + description: Bytes allocated by raft Storage.Entries calls that are still kept in memory + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.process.applycommitted.latency + exported_name: raft_process_applycommitted_latency + description: |- + Latency histogram for applying all committed Raft commands in a Raft ready. + + This measures the end-to-end latency of applying all commands in a Raft ready. Note that + this closes over possibly multiple measurements of the 'raft.process.commandcommit.latency' + metric, which receives datapoints for each sub-batch processed in the process. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.commandcommit.latency + exported_name: raft_process_commandcommit_latency + description: | + Latency histogram for applying a batch of Raft commands to the state machine. + + This metric is misnamed: it measures the latency for *applying* a batch of + committed Raft commands to a Replica state machine. This requires only + non-durable I/O (except for replication configuration changes). + + Note that a "batch" in this context is really a sub-batch of the batch received + for application during raft ready handling. The + 'raft.process.applycommitted.latency' histogram is likely more suitable in most + cases, as it measures the total latency across all sub-batches (i.e. the sum of + commandcommit.latency for a complete batch). + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.handleready.latency + exported_name: raft_process_handleready_latency + description: | + Latency histogram for handling a Raft ready. + + This measures the end-to-end-latency of the Raft state advancement loop, including: + - snapshot application + - SST ingestion + - durably appending to the Raft log (i.e. includes fsync) + - entry application (incl. replicated side effects, notably log truncation) + + These include work measured in 'raft.process.commandcommit.latency' and + 'raft.process.applycommitted.latency'. However, matching percentiles of these + metrics may be *higher* than handleready, since not every handleready cycle + leads to an update of the others. For example, under tpcc-100 on a single node, + the handleready count is approximately twice the logcommit count (and logcommit + count tracks closely with applycommitted count). + + High percentile outliers can be caused by individual large Raft commands or + storage layer blips. Lower percentile (e.g. 50th) increases are often driven by + CPU exhaustion or storage layer slowdowns. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.logcommit.latency + exported_name: raft_process_logcommit_latency + description: | + Latency histogram for committing Raft log entries to stable storage + + This measures the latency of durably committing a group of newly received Raft + entries as well as the HardState entry to disk. This excludes any data + processing, i.e. we measure purely the commit latency of the resulting Engine + write. Homogeneous bands of p50-p99 latencies (in the presence of regular Raft + traffic), make it likely that the storage layer is healthy. Spikes in the + latency bands can either hint at the presence of large sets of Raft entries + being received, or at performance issues at the storage layer. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.process.tickingnanos + exported_name: raft_process_tickingnanos + description: Nanoseconds spent in store.processRaft() processing replica.Tick() + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.process.workingnanos + exported_name: raft_process_workingnanos + description: | + Nanoseconds spent in store.processRaft() working. + + This is the sum of the measurements passed to the raft.process.handleready.latency + histogram. + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.quota_pool.percent_used + exported_name: raft_quota_pool_percent_used + description: Histogram of proposal quota pool utilization (0-100) per leaseholder per metrics interval + y_axis_label: Percent + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.rcvd.app + exported_name: raft_rcvd_app + description: Number of MsgApp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.appresp + exported_name: raft_rcvd_appresp + description: Number of MsgAppResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.bytes + exported_name: raft_rcvd_bytes + description: "Number of bytes in Raft messages received by this store. Note\n\t\tthat this does not include raft snapshot received." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.cross_region.bytes + exported_name: raft_rcvd_cross_region_bytes + description: "Number of bytes received by this store for cross region Raft messages\n\t\twhen region tiers are configured. Note that this does not include raft\n\t\tsnapshot received." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.cross_zone.bytes + exported_name: raft_rcvd_cross_zone_bytes + description: "Number of bytes received by this store for cross zone, same region\n\t\tRaft messages when zone tiers are configured. If region tiers are not set,\n\t\tit is assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes. Note that this does not include raft snapshot\n\t\treceived." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.defortifyleader + exported_name: raft_rcvd_defortifyleader + description: Number of MsgDeFortifyLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.dropped + exported_name: raft_rcvd_dropped + description: Number of incoming Raft messages dropped (due to queue length or size) + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.dropped_bytes + exported_name: raft_rcvd_dropped_bytes + description: Bytes of dropped incoming Raft messages + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.fortifyleader + exported_name: raft_rcvd_fortifyleader + description: Number of MsgFortifyLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.fortifyleaderresp + exported_name: raft_rcvd_fortifyleaderresp + description: Number of MsgFortifyLeaderResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.heartbeat + exported_name: raft_rcvd_heartbeat + description: Number of (coalesced, if enabled) MsgHeartbeat messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.heartbeatresp + exported_name: raft_rcvd_heartbeatresp + description: Number of (coalesced, if enabled) MsgHeartbeatResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prevote + exported_name: raft_rcvd_prevote + description: Number of MsgPreVote messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prevoteresp + exported_name: raft_rcvd_prevoteresp + description: Number of MsgPreVoteResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.prop + exported_name: raft_rcvd_prop + description: Number of MsgProp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.queued_bytes + exported_name: raft_rcvd_queued_bytes + description: Number of bytes in messages currently waiting for raft processing + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.rcvd.snap + exported_name: raft_rcvd_snap + description: Number of MsgSnap messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.stepped_bytes + exported_name: raft_rcvd_stepped_bytes + description: | + Number of bytes in messages processed by Raft. + + Messages reflected here have been handed to Raft (via RawNode.Step). This does not imply that the + messages are no longer held in memory or that IO has been performed. Raft delegates IO activity to + Raft ready handling, which occurs asynchronously. Since handing messages to Raft serializes with + Raft ready handling and size the size of an entry is dominated by the contained pebble WriteBatch, + on average the rate at which this metric increases is a good proxy for the rate at which Raft ready + handling consumes writes. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.timeoutnow + exported_name: raft_rcvd_timeoutnow + description: Number of MsgTimeoutNow messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.transferleader + exported_name: raft_rcvd_transferleader + description: Number of MsgTransferLeader messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.vote + exported_name: raft_rcvd_vote + description: Number of MsgVote messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.rcvd.voteresp + exported_name: raft_rcvd_voteresp + description: Number of MsgVoteResp messages received by this store + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.replication.latency + exported_name: raft_replication_latency + description: |- + The duration elapsed between having evaluated a BatchRequest and it being + reflected in the proposer's state machine (i.e. having applied fully). + + This encompasses time spent in the quota pool, in replication (including + reproposals), and application, but notably *not* sequencing latency (i.e. + contention and latch acquisition). + + No measurement is recorded for read-only commands as well as read-write commands + which end up not writing (such as a DeleteRange on an empty span). Commands that + result in 'above-replication' errors (i.e. txn retries, etc) are similarly + excluded. Errors that arise while waiting for the in-flight replication result + or result from application of the command are included. + + Note also that usually, clients are signalled at beginning of application, but + the recorded measurement captures the entirety of log application. + + The duration is always measured on the proposer, even if the Raft leader and + leaseholder are not colocated, or the request is proposed from a follower. + + Commands that use async consensus will still cause a measurement that reflects + the actual replication latency, despite returning early to the client. + y_axis_label: Latency + type: HISTOGRAM + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.scheduler.latency + exported_name: raft_scheduler_latency + description: | + Queueing durations for ranges waiting to be processed by the Raft scheduler. + + This histogram measures the delay from when a range is registered with the scheduler + for processing to when it is actually processed. This does not include the duration + of processing. + y_axis_label: Latency + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: raft.sent.bytes + exported_name: raft_sent_bytes + description: "Number of bytes in Raft messages sent by this store. Note that\n\t\tthis does not include raft snapshot sent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.sent.cross_region.bytes + exported_name: raft_sent_cross_region_bytes + description: "Number of bytes sent by this store for cross region Raft messages\n\t\twhen region\ttiers are configured. Note that this does not include raft\n\t\tsnapshot sent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.sent.cross_zone.bytes + exported_name: raft_sent_cross_zone_bytes + description: "Number of bytes sent by this store for cross zone, same region Raft\n\t\tmessages when zone tiers are configured. If region tiers are not set, it is\n\t\tassumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes. Note that this does not include raft snapshot\n\t\tsent." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.storage.error + exported_name: raft_storage_error + description: Number of Raft storage errors + y_axis_label: Error Count + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.storage.read_bytes + exported_name: raft_storage_read_bytes + description: | + Counter of raftpb.Entry.Size() read from pebble for raft log entries. + + These are the bytes returned from the (raft.Storage).Entries method that were not + returned via the raft entry cache. This metric plus the raft.entrycache.read_bytes + metric represent the total bytes returned from the Entries method. + + Since pebble might serve these entries from the block cache, only a fraction of this + throughput might manifest in disk metrics. + + Entries tracked in this metric incur an unmarshalling-related CPU and memory + overhead that would not be incurred would the entries be served from the raft + entry cache. + + The bytes returned here do not correspond 1:1 to bytes read from pebble. This + metric measures the in-memory size of the raftpb.Entry, whereas we read its + encoded representation from pebble. As there is no compression involved, these + will generally be comparable. + + A common reason for elevated measurements on this metric is that a store is + falling behind on raft log application. The raft entry cache generally tracks + entries that were recently appended, so if log application falls behind the + cache will already have moved on to newer entries. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.ticks + exported_name: raft_ticks + description: Number of Raft ticks queued + y_axis_label: Ticks + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.timeoutcampaign + exported_name: raft_timeoutcampaign + description: Number of Raft replicas campaigning after missed heartbeats from leader + y_axis_label: Elections called after timeout + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.flow-token-dispatches-dropped + exported_name: raft_transport_flow_token_dispatches_dropped + description: Number of flow token dispatches dropped by the Raft Transport + y_axis_label: Dispatches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.rcvd + exported_name: raft_transport_rcvd + description: Number of Raft messages received by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.reverse-rcvd + exported_name: raft_transport_reverse_rcvd + description: |- + Messages received from the reverse direction of a stream. + + These messages should be rare. They are mostly informational, and are not actual + responses to Raft messages. Responses are received over another stream. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.reverse-sent + exported_name: raft_transport_reverse_sent + description: |- + Messages sent in the reverse direction of a stream. + + These messages should be rare. They are mostly informational, and are not actual + responses to Raft messages. Responses are sent over another stream. + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.send-queue-bytes + exported_name: raft_transport_send_queue_bytes + description: |- + The total byte size of pending outgoing messages in the queue. + + The queue is composed of multiple bounded channels associated with different + peers. A size higher than the average baseline could indicate issues streaming + messages to at least one peer. Use this metric together with send-queue-size, to + have a fuller picture. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raft.transport.send-queue-size + exported_name: raft_transport_send_queue_size + description: |- + Number of pending outgoing messages in the Raft Transport queue. + + The queue is composed of multiple bounded channels associated with different + peers. The overall size of tens of thousands could indicate issues streaming + messages to at least one peer. Use this metric in conjunction with + send-queue-bytes. + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raft.transport.sends-dropped + exported_name: raft_transport_sends_dropped + description: Number of Raft message sends dropped by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raft.transport.sent + exported_name: raft_transport_sent + description: Number of Raft messages sent by the Raft Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: raftlog.behind + exported_name: raftlog_behind + description: |- + Number of Raft log entries followers on other stores are behind. + + This gauge provides a view of the aggregate number of log entries the Raft leaders + on this node think the followers are behind. Since a raft leader may not always + have a good estimate for this information for all of its followers, and since + followers are expected to be behind (when they are not required as part of a + quorum) *and* the aggregate thus scales like the count of such followers, it is + difficult to meaningfully interpret this metric. + y_axis_label: Log Entries + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: raftlog.size.max + exported_name: raftlog_size_max + description: Approximate size of the largest Raft log on the store. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raftlog.size.total + exported_name: raftlog_size_total + description: Approximate size of all Raft logs on the store. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: raftlog.truncated + exported_name: raftlog_truncated + description: Number of Raft log entries truncated + y_axis_label: Log Entries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.adds + exported_name: range_adds + description: Number of range additions + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.raftleaderremovals + exported_name: range_raftleaderremovals + description: Number of times the current Raft leader was removed from a range + y_axis_label: Raft leader removals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.raftleadertransfers + exported_name: range_raftleadertransfers + description: Number of raft leader transfers + y_axis_label: Leader Transfers + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.recoveries + exported_name: range_recoveries + description: |- + Count of offline loss of quorum recovery operations performed on ranges. + + This count increments for every range recovered in offline loss of quorum + recovery operation. Metric is updated when node on which survivor replica + is located starts following the recovery. + y_axis_label: Quorum Recoveries + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.removes + exported_name: range_removes + description: Number of range removals + y_axis_label: Range Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-initial + exported_name: range_snapshots_applied_initial + description: Number of snapshots applied for initial upreplication + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-non-voter + exported_name: range_snapshots_applied_non_voter + description: Number of snapshots applied by non-voter replicas + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.applied-voter + exported_name: range_snapshots_applied_voter + description: Number of snapshots applied by voter replicas + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-region.rcvd-bytes + exported_name: range_snapshots_cross_region_rcvd_bytes + description: Number of snapshot bytes received cross region by this store when region tiers are configured + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-region.sent-bytes + exported_name: range_snapshots_cross_region_sent_bytes + description: Number of snapshot bytes sent cross region by this store when region tiers are configured + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-zone.rcvd-bytes + exported_name: range_snapshots_cross_zone_rcvd_bytes + description: "Number of snapshot bytes received cross zone within the same region\n\t\tby this store when zone tiers are configured. If region tiers are not set,\n\t\tit is assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.cross-zone.sent-bytes + exported_name: range_snapshots_cross_zone_sent_bytes + description: "Number of snapshot bytes sent cross zone within the same region by\n\t\tthis store when zone tiers are configured. If region tiers are not set, it\n\t\tis assumed to be within the same region. To ensure accurate monitoring of\n\t\tcross-zone data transfer, region and zone tiers should be consistently\n\t\tconfigured across all nodes." + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.failures + exported_name: range_snapshots_delegate_failures + description: | + Number of snapshots that were delegated to a different node and + resulted in failure on that delegate. There are numerous reasons a failure can + occur on a delegate such as timeout, the delegate Raft log being too far behind + or the delegate being too busy to send. + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.in-progress + exported_name: range_snapshots_delegate_in_progress + description: Number of delegated snapshots that are currently in-flight. + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.delegate.sent-bytes + exported_name: range_snapshots_delegate_sent_bytes + description: | + Bytes sent using a delegate. + + The number of bytes sent as a result of a delegate snapshot request + that was originated from a different node. This metric is useful in + evaluating the network savings of not sending cross region traffic. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.delegate.successes + exported_name: range_snapshots_delegate_successes + description: | + Number of snapshots that were delegated to a different node and + resulted in success on that delegate. This does not count self delegated snapshots. + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.generated + exported_name: range_snapshots_generated + description: Number of generated snapshots + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rcvd-bytes + exported_name: range_snapshots_rcvd_bytes + description: Number of snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rebalancing.rcvd-bytes + exported_name: range_snapshots_rebalancing_rcvd_bytes + description: Number of rebalancing snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.rebalancing.sent-bytes + exported_name: range_snapshots_rebalancing_sent_bytes + description: Number of rebalancing snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recovery.rcvd-bytes + exported_name: range_snapshots_recovery_rcvd_bytes + description: Number of raft recovery snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recovery.sent-bytes + exported_name: range_snapshots_recovery_sent_bytes + description: Number of raft recovery snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recv-failed + exported_name: range_snapshots_recv_failed + description: Number of range snapshot initialization messages that errored out on the recipient, typically before any data is transferred + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.recv-in-progress + exported_name: range_snapshots_recv_in_progress + description: Number of non-empty snapshots being received + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-queue + exported_name: range_snapshots_recv_queue + description: Number of snapshots queued to receive + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-queue-bytes + exported_name: range_snapshots_recv_queue_bytes + description: Total size of all snapshots in the snapshot receive queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-total-in-progress + exported_name: range_snapshots_recv_total_in_progress + description: Number of total snapshots being received + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.recv-unusable + exported_name: range_snapshots_recv_unusable + description: Number of range snapshot that were fully transmitted but determined to be unnecessary or unusable + y_axis_label: Snapshots + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.send-in-progress + exported_name: range_snapshots_send_in_progress + description: Number of non-empty snapshots being sent + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-queue + exported_name: range_snapshots_send_queue + description: Number of snapshots queued to send + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-queue-bytes + exported_name: range_snapshots_send_queue_bytes + description: Total size of all snapshots in the snapshot send queue + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: range.snapshots.send-total-in-progress + exported_name: range_snapshots_send_total_in_progress + description: Number of total snapshots being sent + y_axis_label: Snapshots + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: range.snapshots.sent-bytes + exported_name: range_snapshots_sent_bytes + description: Number of snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.unknown.rcvd-bytes + exported_name: range_snapshots_unknown_rcvd_bytes + description: Number of unknown snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.unknown.sent-bytes + exported_name: range_snapshots_unknown_sent_bytes + description: Number of unknown snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.upreplication.rcvd-bytes + exported_name: range_snapshots_upreplication_rcvd_bytes + description: Number of upreplication snapshot bytes received + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: range.snapshots.upreplication.sent-bytes + exported_name: range_snapshots_upreplication_sent_bytes + description: Number of upreplication snapshot bytes sent + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rangekeybytes + exported_name: rangekeybytes + description: Number of bytes taken up by range keys (e.g. MVCC range tombstones) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rangekeycount + exported_name: rangekeycount + description: Count of all range keys (e.g. MVCC range tombstones) + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: ranges.decommissioning + exported_name: ranges_decommissioning + description: Number of ranges with at lease one replica on a decommissioning node + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: ranges.overreplicated + exported_name: ranges_overreplicated + description: Number of ranges with more live replicas than the replication target + y_axis_label: Ranges + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rangevalbytes + exported_name: rangevalbytes + description: Number of bytes taken up by range key values (e.g. MVCC range tombstones) + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rangevalcount + exported_name: rangevalcount + description: Count of all range key values (e.g. MVCC range tombstones) + y_axis_label: MVCC Values + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.readbytespersecond + exported_name: rebalancing_readbytespersecond + description: Number of bytes read recently per second, considering the last 30 minutes. + y_axis_label: Bytes/Sec + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rebalancing.readspersecond + exported_name: rebalancing_readspersecond + description: Number of keys read recently per second, considering the last 30 minutes. + y_axis_label: Keys/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.requestspersecond + exported_name: rebalancing_requestspersecond + description: Number of requests received recently per second, considering the last 30 minutes. + y_axis_label: Requests/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rebalancing.state.imbalanced_overfull_options_exhausted + exported_name: rebalancing_state_imbalanced_overfull_options_exhausted + description: Number of occurrences where this store was overfull but failed to shed load after exhausting available rebalance options + y_axis_label: Overfull Options Exhausted + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rebalancing.writebytespersecond + exported_name: rebalancing_writebytespersecond + description: Number of bytes written recently per second, considering the last 30 minutes. + y_axis_label: Bytes/Sec + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rebalancing.writespersecond + exported_name: rebalancing_writespersecond + description: Number of keys written (i.e. applied by raft) per second to the store, considering the last 30 minutes. + y_axis_label: Keys/Sec + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.asleep + exported_name: replicas_asleep + description: Number of asleep replicas. Similarly to quiesced replicas, asleep replicas do not tick in Raft. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.cpunanospersecond + exported_name: replicas_cpunanospersecond + description: Nanoseconds of CPU time in Replica request processing including evaluation but not replication + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: replicas.leaders + exported_name: replicas_leaders + description: Number of raft leaders + y_axis_label: Raft Leaders + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_invalid_lease + exported_name: replicas_leaders_invalid_lease + description: Number of replicas that are Raft leaders whose lease is invalid + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_not_fortified + exported_name: replicas_leaders_not_fortified + description: Number of replicas that are not fortified Raft leaders + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.leaders_not_leaseholders + exported_name: replicas_leaders_not_leaseholders + description: Number of replicas that are Raft leaders whose range lease is held by another store + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.quiescent + exported_name: replicas_quiescent + description: Number of quiesced replicas + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.reserved + exported_name: replicas_reserved + description: Number of replicas reserved for snapshots + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: replicas.uninitialized + exported_name: replicas_uninitialized + description: Number of uninitialized replicas, this does not include uninitialized replicas that can lie dormant in a persistent state. + y_axis_label: Replicas + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.backpressure.split + exported_name: requests_backpressure_split + description: | + Number of backpressured writes waiting on a Range split. + + A Range will backpressure (roughly) non-system traffic when the range is above + the configured size until the range splits. When the rate of this metric is + nonzero over extended periods of time, it should be investigated why splits are + not occurring. + y_axis_label: Writes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.latch + exported_name: requests_slow_latch + description: | + Number of requests that have been stuck for a long time acquiring latches. + + Latches moderate access to the KV keyspace for the purpose of evaluating and + replicating commands. A slow latch acquisition attempt is often caused by + another request holding and not releasing its latches in a timely manner. This + in turn can either be caused by a long delay in evaluation (for example, under + severe system overload) or by delays at the replication layer. + + This gauge registering a nonzero value usually indicates a serious problem and + should be investigated. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.lease + exported_name: requests_slow_lease + description: | + Number of requests that have been stuck for a long time acquiring a lease. + + This gauge registering a nonzero value usually indicates range or replica + unavailability, and should be investigated. In the common case, we also + expect to see 'requests.slow.raft' to register a nonzero value, indicating + that the lease requests are not getting a timely response from the replication + layer. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: requests.slow.raft + exported_name: requests_slow_raft + description: | + Number of requests that have been stuck for a long time in the replication layer. + + An (evaluated) request has to pass through the replication layer, notably the + quota pool and raft. If it fails to do so within a highly permissive duration, + the gauge is incremented (and decremented again once the request is either + applied or returns an error). + + A nonzero value indicates range or replica unavailability, and should be investigated. + y_axis_label: Requests + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.block.cache.usage + exported_name: rocksdb_block_cache_usage + description: Bytes used by the block cache + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.bloom.filter.prefix.checked + exported_name: rocksdb_bloom_filter_prefix_checked + description: Number of times the bloom filter was checked + y_axis_label: Bloom Filter Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.bloom.filter.prefix.useful + exported_name: rocksdb_bloom_filter_prefix_useful + description: Number of times the bloom filter helped avoid iterator creation + y_axis_label: Bloom Filter Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.compacted-bytes-read + exported_name: rocksdb_compacted_bytes_read + description: Bytes read during compaction + y_axis_label: Bytes Read + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.compacted-bytes-written + exported_name: rocksdb_compacted_bytes_written + description: Bytes written during compaction + y_axis_label: Bytes Written + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.encryption.algorithm + exported_name: rocksdb_encryption_algorithm + description: Algorithm in use for encryption-at-rest, see storage/enginepb/key_registry.proto + y_axis_label: Encryption At Rest + type: GAUGE + unit: CONST + aggregation: AVG + derivative: NONE + - name: rocksdb.estimated-pending-compaction + exported_name: rocksdb_estimated_pending_compaction + description: Estimated pending compaction bytes + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.flushed-bytes + exported_name: rocksdb_flushed_bytes + description: Bytes written during flush + y_axis_label: Bytes Written + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.flushes + exported_name: rocksdb_flushes + description: Number of table flushes + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.ingested-bytes + exported_name: rocksdb_ingested_bytes + description: Bytes ingested + y_axis_label: Bytes Ingested + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rocksdb.memtable.total-size + exported_name: rocksdb_memtable_total_size + description: Current size of memtable in bytes + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rocksdb.num-sstables + exported_name: rocksdb_num_sstables + description: Number of storage engine SSTables + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.read-amplification + exported_name: rocksdb_read_amplification + description: Number of disk reads per query + y_axis_label: Disk Reads per Query + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rocksdb.table-readers-mem-estimate + exported_name: rocksdb_table_readers_mem_estimate + description: Memory used by index and filter blocks + y_axis_label: Memory + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: rpc.batches.recv + exported_name: rpc_batches_recv + description: Number of batches processed + y_axis_label: Batches + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.addsstable.recv + exported_name: rpc_method_addsstable_recv + description: Number of AddSSTable requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminchangereplicas.recv + exported_name: rpc_method_adminchangereplicas_recv + description: Number of AdminChangeReplicas requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminmerge.recv + exported_name: rpc_method_adminmerge_recv + description: Number of AdminMerge requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminrelocaterange.recv + exported_name: rpc_method_adminrelocaterange_recv + description: Number of AdminRelocateRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminscatter.recv + exported_name: rpc_method_adminscatter_recv + description: Number of AdminScatter requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminsplit.recv + exported_name: rpc_method_adminsplit_recv + description: Number of AdminSplit requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.admintransferlease.recv + exported_name: rpc_method_admintransferlease_recv + description: Number of AdminTransferLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.adminunsplit.recv + exported_name: rpc_method_adminunsplit_recv + description: Number of AdminUnsplit requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.barrier.recv + exported_name: rpc_method_barrier_recv + description: Number of Barrier requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.checkconsistency.recv + exported_name: rpc_method_checkconsistency_recv + description: Number of CheckConsistency requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.clearrange.recv + exported_name: rpc_method_clearrange_recv + description: Number of ClearRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.computechecksum.recv + exported_name: rpc_method_computechecksum_recv + description: Number of ComputeChecksum requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.conditionalput.recv + exported_name: rpc_method_conditionalput_recv + description: Number of ConditionalPut requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.delete.recv + exported_name: rpc_method_delete_recv + description: Number of Delete requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.deleterange.recv + exported_name: rpc_method_deleterange_recv + description: Number of DeleteRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.endtxn.recv + exported_name: rpc_method_endtxn_recv + description: Number of EndTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.excise.recv + exported_name: rpc_method_excise_recv + description: Number of Excise requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.export.recv + exported_name: rpc_method_export_recv + description: Number of Export requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.gc.recv + exported_name: rpc_method_gc_recv + description: Number of GC requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.get.recv + exported_name: rpc_method_get_recv + description: Number of Get requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.heartbeattxn.recv + exported_name: rpc_method_heartbeattxn_recv + description: Number of HeartbeatTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.increment.recv + exported_name: rpc_method_increment_recv + description: Number of Increment requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.isspanempty.recv + exported_name: rpc_method_isspanempty_recv + description: Number of IsSpanEmpty requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.leaseinfo.recv + exported_name: rpc_method_leaseinfo_recv + description: Number of LeaseInfo requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.linkexternalsstable.recv + exported_name: rpc_method_linkexternalsstable_recv + description: Number of LinkExternalSSTable requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.merge.recv + exported_name: rpc_method_merge_recv + description: Number of Merge requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.migrate.recv + exported_name: rpc_method_migrate_recv + description: Number of Migrate requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.probe.recv + exported_name: rpc_method_probe_recv + description: Number of Probe requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.pushtxn.recv + exported_name: rpc_method_pushtxn_recv + description: Number of PushTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.put.recv + exported_name: rpc_method_put_recv + description: Number of Put requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.queryintent.recv + exported_name: rpc_method_queryintent_recv + description: Number of QueryIntent requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.querylocks.recv + exported_name: rpc_method_querylocks_recv + description: Number of QueryLocks requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.queryresolvedtimestamp.recv + exported_name: rpc_method_queryresolvedtimestamp_recv + description: Number of QueryResolvedTimestamp requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.querytxn.recv + exported_name: rpc_method_querytxn_recv + description: Number of QueryTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.rangestats.recv + exported_name: rpc_method_rangestats_recv + description: Number of RangeStats requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.recomputestats.recv + exported_name: rpc_method_recomputestats_recv + description: Number of RecomputeStats requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.recovertxn.recv + exported_name: rpc_method_recovertxn_recv + description: Number of RecoverTxn requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.refresh.recv + exported_name: rpc_method_refresh_recv + description: Number of Refresh requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.refreshrange.recv + exported_name: rpc_method_refreshrange_recv + description: Number of RefreshRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.requestlease.recv + exported_name: rpc_method_requestlease_recv + description: Number of RequestLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.resolveintent.recv + exported_name: rpc_method_resolveintent_recv + description: Number of ResolveIntent requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.resolveintentrange.recv + exported_name: rpc_method_resolveintentrange_recv + description: Number of ResolveIntentRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.reversescan.recv + exported_name: rpc_method_reversescan_recv + description: Number of ReverseScan requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.revertrange.recv + exported_name: rpc_method_revertrange_recv + description: Number of RevertRange requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.scan.recv + exported_name: rpc_method_scan_recv + description: Number of Scan requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.subsume.recv + exported_name: rpc_method_subsume_recv + description: Number of Subsume requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.transferlease.recv + exported_name: rpc_method_transferlease_recv + description: Number of TransferLease requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.truncatelog.recv + exported_name: rpc_method_truncatelog_recv + description: Number of TruncateLog requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.method.writebatch.recv + exported_name: rpc_method_writebatch_recv + description: Number of WriteBatch requests processed + y_axis_label: RPCs + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: rpc.streams.mux_rangefeed.active + exported_name: rpc_streams_mux_rangefeed_active + description: Number of currently running MuxRangeFeed streams + y_axis_label: Streams + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: rpc.streams.mux_rangefeed.recv + exported_name: rpc_streams_mux_rangefeed_recv + description: Total number of MuxRangeFeed streams + y_axis_label: Streams + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: spanconfig.kvsubscriber.oldest_protected_record_nanos + exported_name: spanconfig_kvsubscriber_oldest_protected_record_nanos + description: Difference between the current time and the oldest protected timestamp (sudden drops indicate a record being released; an ever increasing number indicates that the oldest record is around and preventing GC if > configured GC TTL) + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: spanconfig.kvsubscriber.protected_record_count + exported_name: spanconfig_kvsubscriber_protected_record_count + description: Number of protected timestamp records, as seen by KV + y_axis_label: Records + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: spanconfig.kvsubscriber.update_behind_nanos + exported_name: spanconfig_kvsubscriber_update_behind_nanos + description: Difference between the current time and when the KVSubscriber received its last update (an ever increasing number indicates that we're no longer receiving updates) + y_axis_label: Nanoseconds + type: GAUGE + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storage.batch-commit.commit-wait.duration + exported_name: storage_batch_commit_commit_wait_duration + description: Cumulative time spent waiting for WAL sync, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.count + exported_name: storage_batch_commit_count + description: Count of batch commits. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Commit Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.duration + exported_name: storage_batch_commit_duration + description: Cumulative time spent in batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.l0-stall.duration + exported_name: storage_batch_commit_l0_stall_duration + description: Cumulative time spent in a write stall due to high read amplification in L0, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.mem-stall.duration + exported_name: storage_batch_commit_mem_stall_duration + description: Cumulative time spent in a write stall due to too many memtables, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.sem-wait.duration + exported_name: storage_batch_commit_sem_wait_duration + description: Cumulative time spent in semaphore wait, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.wal-queue-wait.duration + exported_name: storage_batch_commit_wal_queue_wait_duration + description: Cumulative time spent waiting for memory blocks in the WAL queue, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.batch-commit.wal-rotation.duration + exported_name: storage_batch_commit_wal_rotation_duration + description: Cumulative time spent waiting for WAL rotation, for batch commit. See storage.AggregatedBatchCommitStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.block-load.active + exported_name: storage_block_load_active + description: The number of sstable block loads currently in progress + y_axis_label: Block loads + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.block-load.queued + exported_name: storage_block_load_queued + description: 'The cumulative number of SSTable block loads that were delayed because too many loads were active (see also: `storage.block_load.node_max_active`)' + y_axis_label: Block loads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.checkpoints + exported_name: storage_checkpoints + description: |- + The number of checkpoint directories found in storage. + + This is the number of directories found in the auxiliary/checkpoints directory. + Each represents an immutable point-in-time storage engine checkpoint. They are + cheap (consisting mostly of hard links), but over time they effectively become a + full copy of the old state, which increases their relative cost. Checkpoints + must be deleted once acted upon (e.g. copied elsewhere or investigated). + + A likely cause of having a checkpoint is that one of the ranges in this store + had inconsistent data among its replicas. Such checkpoint directories are + located in auxiliary/checkpoints/rN_at_M, where N is the range ID, and M is the + Raft applied index at which this checkpoint was taken. + y_axis_label: Directories + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.compactions.cancelled.bytes + exported_name: storage_compactions_cancelled_bytes + description: Cumulative volume of data written to sstables during compactions that were ultimately cancelled due to a conflicting operation. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.cancelled.count + exported_name: storage_compactions_cancelled_count + description: Cumulative count of compactions that were cancelled before they completed due to a conflicting operation. + y_axis_label: Compactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.duration + exported_name: storage_compactions_duration + description: |- + Cumulative sum of all compaction durations. + + The rate of this value provides the effective compaction concurrency of a store, + which can be useful to determine whether the maximum compaction concurrency is + fully utilized. + y_axis_label: Processing Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.keys.pinned.bytes + exported_name: storage_compactions_keys_pinned_bytes + description: | + Cumulative size of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots. + + Various subsystems of CockroachDB take LSM snapshots to maintain a consistent view + of the database over an extended duration. In order to maintain the consistent view, + flushes and compactions within the storage engine must preserve keys that otherwise + would have been dropped. This increases write amplification, and introduces keys + that must be skipped during iteration. This metric records the cumulative number of + bytes preserved during flushes and compactions over the lifetime of the process. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.compactions.keys.pinned.count + exported_name: storage_compactions_keys_pinned_count + description: | + Cumulative count of storage engine KVs written to sstables during flushes and compactions due to open LSM snapshots. + + Various subsystems of CockroachDB take LSM snapshots to maintain a consistent view + of the database over an extended duration. In order to maintain the consistent view, + flushes and compactions within the storage engine must preserve keys that otherwise + would have been dropped. This increases write amplification, and introduces keys + that must be skipped during iteration. This metric records the cumulative count of + KVs preserved during flushes and compactions over the lifetime of the process. + y_axis_label: Keys + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk-slow + exported_name: storage_disk_slow + description: Number of instances of disk operations taking longer than 10s + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk-stalled + exported_name: storage_disk_stalled + description: Number of instances of disk operations taking longer than 20s + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.io.time + exported_name: storage_disk_io_time + description: Time spent reading from or writing to the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.iopsinprogress + exported_name: storage_disk_iopsinprogress + description: IO operations currently in progress on the store's disk (as reported by the OS) + y_axis_label: Operations + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.disk.read-max.bytespersecond + exported_name: storage_disk_read_max_bytespersecond + description: Maximum rate at which bytes were read from disk (as reported by the OS) + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.disk.read.bytes + exported_name: storage_disk_read_bytes + description: Bytes read from the store's disk since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.read.count + exported_name: storage_disk_read_count + description: Disk read operations on the store's disk since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.read.time + exported_name: storage_disk_read_time + description: Time spent reading from the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.weightedio.time + exported_name: storage_disk_weightedio_time + description: Weighted time spent reading from or writing to the store's disk since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write-max.bytespersecond + exported_name: storage_disk_write_max_bytespersecond + description: Maximum rate at which bytes were written to disk (as reported by the OS) + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.disk.write.bytes + exported_name: storage_disk_write_bytes + description: Bytes written to the store's disk since this process started (as reported by the OS) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write.count + exported_name: storage_disk_write_count + description: Disk write operations on the store's disk since this process started (as reported by the OS) + y_axis_label: Operations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.disk.write.time + exported_name: storage_disk_write_time + description: Time spent writing to the store's disks since this process started (as reported by the OS) + y_axis_label: Time + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.count + exported_name: storage_flush_ingest_count + description: Flushes performing an ingest (flushable ingestions) + y_axis_label: Flushes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.table.bytes + exported_name: storage_flush_ingest_table_bytes + description: Bytes ingested via flushes (flushable ingestions) + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.ingest.table.count + exported_name: storage_flush_ingest_table_count + description: Tables ingested via flushes (flushable ingestions) + y_axis_label: Tables + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.flush.utilization + exported_name: storage_flush_utilization + description: The percentage of time the storage engine is actively flushing memtables to disk. + y_axis_label: Flush Utilization + type: GAUGE + unit: PERCENT + aggregation: AVG + derivative: NONE + - name: storage.ingest.count + exported_name: storage_ingest_count + description: Number of successful ingestions performed + y_axis_label: Events + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.initial_stats_complete + exported_name: storage_initial_stats_complete + description: Set to 1 when initial table stats collection is complete. + y_axis_label: Boolean + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.iterator.block-load.bytes + exported_name: storage_iterator_block_load_bytes + description: Bytes loaded by storage engine iterators (possibly cached). See storage.AggregatedIteratorStats for details. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.block-load.cached-bytes + exported_name: storage_iterator_block_load_cached_bytes + description: Bytes loaded by storage engine iterators from the block cache. See storage.AggregatedIteratorStats for details. + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.block-load.read-duration + exported_name: storage_iterator_block_load_read_duration + description: Cumulative time storage engine iterators spent loading blocks from durable storage. See storage.AggregatedIteratorStats for details. + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.bytes + exported_name: storage_iterator_category_backup_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.cached-bytes + exported_name: storage_iterator_category_backup_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-backup.block-load.latency-sum + exported_name: storage_iterator_category_backup_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.bytes + exported_name: storage_iterator_category_batch_eval_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.cached-bytes + exported_name: storage_iterator_category_batch_eval_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-batch-eval.block-load.latency-sum + exported_name: storage_iterator_category_batch_eval_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.bytes + exported_name: storage_iterator_category_crdb_unknown_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.cached-bytes + exported_name: storage_iterator_category_crdb_unknown_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-crdb-unknown.block-load.latency-sum + exported_name: storage_iterator_category_crdb_unknown_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.bytes + exported_name: storage_iterator_category_intent_resolution_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.cached-bytes + exported_name: storage_iterator_category_intent_resolution_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-intent-resolution.block-load.latency-sum + exported_name: storage_iterator_category_intent_resolution_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.bytes + exported_name: storage_iterator_category_mvcc_gc_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.cached-bytes + exported_name: storage_iterator_category_mvcc_gc_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-mvcc-gc.block-load.latency-sum + exported_name: storage_iterator_category_mvcc_gc_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.bytes + exported_name: storage_iterator_category_pebble_compaction_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_compaction_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-compaction.block-load.latency-sum + exported_name: storage_iterator_category_pebble_compaction_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.bytes + exported_name: storage_iterator_category_pebble_get_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_get_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-get.block-load.latency-sum + exported_name: storage_iterator_category_pebble_get_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.bytes + exported_name: storage_iterator_category_pebble_ingest_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.cached-bytes + exported_name: storage_iterator_category_pebble_ingest_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-pebble-ingest.block-load.latency-sum + exported_name: storage_iterator_category_pebble_ingest_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.bytes + exported_name: storage_iterator_category_range_snap_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.cached-bytes + exported_name: storage_iterator_category_range_snap_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-range-snap.block-load.latency-sum + exported_name: storage_iterator_category_range_snap_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.bytes + exported_name: storage_iterator_category_rangefeed_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.cached-bytes + exported_name: storage_iterator_category_rangefeed_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-rangefeed.block-load.latency-sum + exported_name: storage_iterator_category_rangefeed_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.bytes + exported_name: storage_iterator_category_replication_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.cached-bytes + exported_name: storage_iterator_category_replication_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-replication.block-load.latency-sum + exported_name: storage_iterator_category_replication_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.bytes + exported_name: storage_iterator_category_scan_background_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.cached-bytes + exported_name: storage_iterator_category_scan_background_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-background.block-load.latency-sum + exported_name: storage_iterator_category_scan_background_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.bytes + exported_name: storage_iterator_category_scan_regular_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.cached-bytes + exported_name: storage_iterator_category_scan_regular_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-scan-regular.block-load.latency-sum + exported_name: storage_iterator_category_scan_regular_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.bytes + exported_name: storage_iterator_category_unknown_block_load_bytes + description: Bytes loaded by storage sstable iterators (possibly cached). + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.cached-bytes + exported_name: storage_iterator_category_unknown_block_load_cached_bytes + description: Bytes loaded by storage sstable iterators from the block cache + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.category-unknown.block-load.latency-sum + exported_name: storage_iterator_category_unknown_block_load_latency_sum + description: Cumulative latency for loading bytes not in the block cache, by storage sstable iterators + y_axis_label: Latency + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.external.seeks + exported_name: storage_iterator_external_seeks + description: Cumulative count of seeks performed on storage engine iterators. See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.external.steps + exported_name: storage_iterator_external_steps + description: Cumulative count of steps performed on storage engine iterators. See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.internal.seeks + exported_name: storage_iterator_internal_seeks + description: |- + Cumulative count of seeks performed internally within storage engine iterators. + + A value high relative to 'storage.iterator.external.seeks' + is a good indication that there's an accumulation of garbage + internally within the storage engine. + + See storage.AggregatedIteratorStats for details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.iterator.internal.steps + exported_name: storage_iterator_internal_steps + description: |- + Cumulative count of steps performed internally within storage engine iterators. + + A value high relative to 'storage.iterator.external.steps' + is a good indication that there's an accumulation of garbage + internally within the storage engine. + + See storage.AggregatedIteratorStats for more details. + y_axis_label: Iterator Ops + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.keys.range-key-set.count + exported_name: storage_keys_range_key_set_count + description: Approximate count of RangeKeySet internal keys across the storage engine. + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.keys.tombstone.count + exported_name: storage_keys_tombstone_count + description: Approximate count of DEL, SINGLEDEL and RANGEDEL internal keys across the storage engine. + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-bytes-flushed + exported_name: storage_l0_bytes_flushed + description: Number of bytes flushed (from memtables) into Level 0 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l0-bytes-ingested + exported_name: storage_l0_bytes_ingested + description: Number of bytes ingested directly into Level 0 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l0-level-score + exported_name: storage_l0_level_score + description: Compaction score of level 0 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-level-size + exported_name: storage_l0_level_size + description: Size of the SSTables in level 0 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l0-num-files + exported_name: storage_l0_num_files + description: Number of SSTables in Level 0 + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l0-sublevels + exported_name: storage_l0_sublevels + description: Number of Level 0 sublevels + y_axis_label: Sublevels + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l1-bytes-ingested + exported_name: storage_l1_bytes_ingested + description: Number of bytes ingested directly into Level 1 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l1-level-score + exported_name: storage_l1_level_score + description: Compaction score of level 1 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l1-level-size + exported_name: storage_l1_level_size + description: Size of the SSTables in level 1 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l2-bytes-ingested + exported_name: storage_l2_bytes_ingested + description: Number of bytes ingested directly into Level 2 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l2-level-score + exported_name: storage_l2_level_score + description: Compaction score of level 2 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l2-level-size + exported_name: storage_l2_level_size + description: Size of the SSTables in level 2 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l3-bytes-ingested + exported_name: storage_l3_bytes_ingested + description: Number of bytes ingested directly into Level 3 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l3-level-score + exported_name: storage_l3_level_score + description: Compaction score of level 3 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l3-level-size + exported_name: storage_l3_level_size + description: Size of the SSTables in level 3 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l4-bytes-ingested + exported_name: storage_l4_bytes_ingested + description: Number of bytes ingested directly into Level 4 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l4-level-score + exported_name: storage_l4_level_score + description: Compaction score of level 4 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l4-level-size + exported_name: storage_l4_level_size + description: Size of the SSTables in level 4 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l5-bytes-ingested + exported_name: storage_l5_bytes_ingested + description: Number of bytes ingested directly into Level 5 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l5-level-score + exported_name: storage_l5_level_score + description: Compaction score of level 5 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l5-level-size + exported_name: storage_l5_level_size + description: Size of the SSTables in level 5 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.l6-bytes-ingested + exported_name: storage_l6_bytes_ingested + description: Number of bytes ingested directly into Level 6 + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.l6-level-score + exported_name: storage_l6_level_score + description: Compaction score of level 6 + y_axis_label: Score + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.l6-level-size + exported_name: storage_l6_level_size + description: Size of the SSTables in level 6 + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.marked-for-compaction-files + exported_name: storage_marked_for_compaction_files + description: Count of SSTables marked for compaction + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.point_deletions.bytes + exported_name: storage_point_deletions_bytes + description: | + Estimated file bytes that will be saved by compacting all point deletions. + + This is dependent on table stats collection, so can be very incomplete until + storage.initial_stats_complete becomes true. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.range_deletions.bytes + exported_name: storage_range_deletions_bytes + description: | + Estimated file bytes that will be saved by compacting all range deletions. + + This is dependent on table stats collection, so can be very incomplete until + storage.initial_stats_complete becomes true. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.count + exported_name: storage_secondary_cache_count + description: The count of cache blocks in the secondary cache (not sstable blocks) + y_axis_label: Cache items + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.evictions + exported_name: storage_secondary_cache_evictions + description: The number of times a cache block was evicted from the secondary cache + y_axis_label: Num evictions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-full-hit + exported_name: storage_secondary_cache_reads_full_hit + description: The number of reads where all data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-multi-block + exported_name: storage_secondary_cache_reads_multi_block + description: The number of secondary cache reads that require reading data from 2+ cache blocks + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-multi-shard + exported_name: storage_secondary_cache_reads_multi_shard + description: The number of secondary cache reads that require reading data from 2+ shards + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-no-hit + exported_name: storage_secondary_cache_reads_no_hit + description: The number of reads where no data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-partial-hit + exported_name: storage_secondary_cache_reads_partial_hit + description: The number of reads where some data returned was read from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.reads-total + exported_name: storage_secondary_cache_reads_total + description: The number of reads from the secondary cache + y_axis_label: Num reads + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.secondary-cache.size + exported_name: storage_secondary_cache_size + description: The number of sstable bytes stored in the secondary cache + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.secondary-cache.write-back-failures + exported_name: storage_secondary_cache_write_back_failures + description: The number of times writing a cache block to the secondary cache failed + y_axis_label: Num failures + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.shared-storage.read + exported_name: storage_shared_storage_read + description: Bytes read from shared storage + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.shared-storage.write + exported_name: storage_shared_storage_write + description: Bytes written to external storage + y_axis_label: Bytes + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.single-delete.ineffectual + exported_name: storage_single_delete_ineffectual + description: Number of SingleDeletes that were ineffectual + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.single-delete.invariant-violation + exported_name: storage_single_delete_invariant_violation + description: Number of SingleDelete invariant violations + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.sstable.compression.none.count + exported_name: storage_sstable_compression_none_count + description: Count of SSTables that are uncompressed. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.snappy.count + exported_name: storage_sstable_compression_snappy_count + description: Count of SSTables that have been compressed with the snappy compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.unknown.count + exported_name: storage_sstable_compression_unknown_count + description: Count of SSTables that have an unknown compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.compression.zstd.count + exported_name: storage_sstable_compression_zstd_count + description: Count of SSTables that have been compressed with the zstd compression algorithm. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.remote.bytes + exported_name: storage_sstable_remote_bytes + description: Bytes in SSTables that are stored off-disk (remotely) in object storage. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.sstable.remote.count + exported_name: storage_sstable_remote_count + description: Count of SSTables that are stored off-disk (remotely) in object storage. + y_axis_label: SSTables + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.sstable.zombie.bytes + exported_name: storage_sstable_zombie_bytes + description: Bytes in SSTables that have been logically deleted, but can't yet be physically deleted because an open iterator may be reading them. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.blob_files.count + exported_name: storage_value_separation_blob_files_count + description: The number of blob files that are used to store separated values within the storage engine. + y_axis_label: Files + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.value_separation.blob_files.size + exported_name: storage_value_separation_blob_files_size + description: The size of the physical blob files that are used to store separated values within the storage engine. This sum is the physical post-compression sum of value_bytes.referenced and value_bytes.unreferenced. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.value_bytes.referenced + exported_name: storage_value_separation_value_bytes_referenced + description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and referenced by a live sstable. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.value_separation.value_bytes.unreferenced + exported_name: storage_value_separation_value_bytes_unreferenced + description: The size of storage engine value bytes (pre-compression) that are stored separately in blob files and not referenced by any live sstable. These bytes are garbage that could be reclaimed by a compaction. + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storage.wal.bytes_in + exported_name: storage_wal_bytes_in + description: The number of logical bytes the storage engine has written to the WAL + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.bytes_written + exported_name: storage_wal_bytes_written + description: The number of bytes the storage engine has written to the WAL + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.primary.duration + exported_name: storage_wal_failover_primary_duration + description: Cumulative time spent writing to the primary WAL directory. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.secondary.duration + exported_name: storage_wal_failover_secondary_duration + description: Cumulative time spent writing to the secondary WAL directory. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.switch.count + exported_name: storage_wal_failover_switch_count + description: Count of the number of times WAL writing has switched from primary to secondary and vice versa. + y_axis_label: Events + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storage.wal.failover.write_and_sync.latency + exported_name: storage_wal_failover_write_and_sync_latency + description: The observed latency for writing and syncing to the write ahead log. Only populated when WAL failover is configured + y_axis_label: Nanoseconds + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storage.write-amplification + exported_name: storage_write_amplification + description: |- + Running measure of write-amplification. + + Write amplification is measured as the ratio of bytes written to disk relative to the logical + bytes present in sstables, over the life of a store. This metric is a running average + of the write amplification as tracked by Pebble. + y_axis_label: Ratio of bytes written to logical bytes + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storage.write-stall-nanos + exported_name: storage_write_stall_nanos + description: Total write stall duration in nanos + y_axis_label: Nanoseconds + type: COUNTER + unit: NANOSECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.callbacks.processing_duration + exported_name: storeliveness_callbacks_processing_duration + description: Duration of support withdrawal callback processing + y_axis_label: Duration + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: storeliveness.heartbeat.failures + exported_name: storeliveness_heartbeat_failures + description: Number of Store Liveness heartbeats that failed to be sent out by the Store Liveness Support Manager + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.heartbeat.successes + exported_name: storeliveness_heartbeat_successes + description: Number of Store Liveness heartbeats sent out by the Store Liveness Support Manager + y_axis_label: Heartbeats + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.message_handle.failures + exported_name: storeliveness_message_handle_failures + description: Number of incoming Store Liveness messages that failed to be handled by the Store Liveness Support Manager + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.message_handle.successes + exported_name: storeliveness_message_handle_successes + description: Number of incoming Store Liveness messages handled by the Store Liveness Support Manager + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.support_for.stores + exported_name: storeliveness_support_for_stores + description: Number of stores that the Store Liveness Support Manager has ever provided support for + y_axis_label: Stores + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.support_from.stores + exported_name: storeliveness_support_from_stores + description: Number of stores that the Store Liveness Support Manager is requesting support from by sending heartbeats + y_axis_label: Stores + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.support_withdraw.failures + exported_name: storeliveness_support_withdraw_failures + description: Number of times the Store Liveness Support Manager has encountered an error while withdrawing support for another store + y_axis_label: Support Withdrawals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.support_withdraw.successes + exported_name: storeliveness_support_withdraw_successes + description: Number of times the Store Liveness Support Manager has successfully withdrawn support for another store + y_axis_label: Support Withdrawals + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.receive-queue-bytes + exported_name: storeliveness_transport_receive_queue_bytes + description: Total byte size of pending incoming messages from Store Liveness Transport + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.receive-queue-size + exported_name: storeliveness_transport_receive_queue_size + description: Number of pending incoming messages from the Store Liveness Transport + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.receive_dropped + exported_name: storeliveness_transport_receive_dropped + description: Number of Store Liveness messages dropped by the Store Liveness Transport on the receiver side + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.received + exported_name: storeliveness_transport_received + description: Number of Store Liveness messages received by the Store Liveness Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.send-queue-bytes + exported_name: storeliveness_transport_send_queue_bytes + description: Total byte size of pending outgoing messages in all Store Liveness Transport per-store send queues + y_axis_label: Bytes + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.send-queue-idle + exported_name: storeliveness_transport_send_queue_idle + description: Number of Store Liveness Transport per-store send queues that have become idle due to no recently-sent messages + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.send-queue-size + exported_name: storeliveness_transport_send_queue_size + description: Number of pending outgoing messages in all Store Liveness Transport per-store send queues + y_axis_label: Messages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: storeliveness.transport.send_dropped + exported_name: storeliveness_transport_send_dropped + description: Number of Store Liveness messages dropped by the Store Liveness Transport on the sender side + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: storeliveness.transport.sent + exported_name: storeliveness_transport_sent + description: Number of Store Liveness messages sent by the Store Liveness Transport + y_axis_label: Messages + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: sysbytes + exported_name: sysbytes + description: Number of bytes in system KV pairs + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: syscount + exported_name: syscount + description: Count of system KV pairs + y_axis_label: Keys + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tenant.consumption.cross_region_network_ru + exported_name: tenant_consumption_cross_region_network_ru + description: Total number of RUs charged for cross-region network traffic + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.external_io_egress_bytes + exported_name: tenant_consumption_external_io_egress_bytes + description: Total number of bytes written to external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.external_io_ingress_bytes + exported_name: tenant_consumption_external_io_ingress_bytes + description: Total number of bytes read from external services such as cloud storage providers + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.kv_request_units + exported_name: tenant_consumption_kv_request_units + description: RU consumption attributable to KV + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.pgwire_egress_bytes + exported_name: tenant_consumption_pgwire_egress_bytes + description: Total number of bytes transferred from a SQL pod to the client + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_batches + exported_name: tenant_consumption_read_batches + description: Total number of KV read batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_bytes + exported_name: tenant_consumption_read_bytes + description: Total number of bytes read from KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.read_requests + exported_name: tenant_consumption_read_requests + description: Total number of KV read requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.request_units + exported_name: tenant_consumption_request_units + description: Total RU consumption + y_axis_label: Request Units + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.sql_pods_cpu_seconds + exported_name: tenant_consumption_sql_pods_cpu_seconds + description: Total amount of CPU used by SQL pods + y_axis_label: CPU Seconds + type: COUNTER + unit: SECONDS + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_batches + exported_name: tenant_consumption_write_batches + description: Total number of KV write batches + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_bytes + exported_name: tenant_consumption_write_bytes + description: Total number of bytes written to KV + y_axis_label: Bytes + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: tenant.consumption.write_requests + exported_name: tenant_consumption_write_requests + description: Total number of KV write requests + y_axis_label: Requests + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.bytes + exported_name: timeseries_write_bytes + description: Total size in bytes of metric samples written to disk + y_axis_label: Storage + type: COUNTER + unit: BYTES + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.errors + exported_name: timeseries_write_errors + description: Total errors encountered while attempting to write metrics to disk + y_axis_label: Errors + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: timeseries.write.samples + exported_name: timeseries_write_samples + description: Total number of metric samples written to disk + y_axis_label: Metric Samples + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: totalbytes + exported_name: totalbytes + description: Total number of bytes taken up by keys and values including non-live data + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: tscache.skl.pages + exported_name: tscache_skl_pages + description: Number of pages in the timestamp cache + y_axis_label: Pages + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: tscache.skl.rotations + exported_name: tscache_skl_rotations + description: Number of page rotations in the timestamp cache + y_axis_label: Page Rotations + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.commit_waits.before_commit_trigger + exported_name: txn_commit_waits_before_commit_trigger + description: Number of KV transactions that had to commit-wait on the server before committing because they had a commit trigger + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side.1PC.failure + exported_name: txn_server_side_1PC_failure + description: Number of batches that attempted to commit using 1PC and failed + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side.1PC.success + exported_name: txn_server_side_1PC_success + description: Number of batches that attempted to commit using 1PC and succeeded + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.read_evaluation.failure + exported_name: txn_server_side_retry_read_evaluation_failure + description: Number of read batches that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.read_evaluation.success + exported_name: txn_server_side_retry_read_evaluation_success + description: Number of read batches that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.uncertainty_interval_error.failure + exported_name: txn_server_side_retry_uncertainty_interval_error_failure + description: Number of batches that ran into uncertainty interval errors that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.uncertainty_interval_error.success + exported_name: txn_server_side_retry_uncertainty_interval_error_success + description: Number of batches that ran into uncertainty interval errors that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.write_evaluation.failure + exported_name: txn_server_side_retry_write_evaluation_failure + description: Number of write batches that were not successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txn.server_side_retry.write_evaluation.success + exported_name: txn_server_side_retry_write_evaluation_success + description: Number of write batches that were successfully refreshed server side + y_axis_label: KV Transactions + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.attempts.pending + exported_name: txnrecovery_attempts_pending + description: Number of transaction recovery attempts currently in-flight + y_axis_label: Recovery Attempts + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnrecovery.attempts.total + exported_name: txnrecovery_attempts_total + description: Number of transaction recovery attempts executed + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.failures + exported_name: txnrecovery_failures + description: Number of transaction recovery attempts that failed + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.aborted + exported_name: txnrecovery_successes_aborted + description: Number of transaction recovery attempts that aborted a transaction + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.committed + exported_name: txnrecovery_successes_committed + description: Number of transaction recovery attempts that committed a transaction + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnrecovery.successes.pending + exported_name: txnrecovery_successes_pending + description: Number of transaction recovery attempts that left a transaction pending + y_axis_label: Recovery Attempts + type: COUNTER + unit: COUNT + aggregation: AVG + derivative: NON_NEGATIVE_DERIVATIVE + - name: txnwaitqueue.pushee.waiting + exported_name: txnwaitqueue_pushee_waiting + description: Number of pushees on the txn wait queue + y_axis_label: Waiting Pushees + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.slow + exported_name: txnwaitqueue_pusher_slow + description: The total number of cases where a pusher waited more than the excessive wait threshold + y_axis_label: Slow Pushers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.wait_time + exported_name: txnwaitqueue_pusher_wait_time + description: Histogram of durations spent in queue by pushers + y_axis_label: Pusher wait time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.pusher.waiting + exported_name: txnwaitqueue_pusher_waiting + description: Number of pushers on the txn wait queue + y_axis_label: Waiting Pushers + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.query.wait_time + exported_name: txnwaitqueue_query_wait_time + description: Histogram of durations spent in queue by queries + y_axis_label: Query wait time + type: HISTOGRAM + unit: NANOSECONDS + aggregation: AVG + derivative: NONE + - name: txnwaitqueue.query.waiting + exported_name: txnwaitqueue_query_waiting + description: Number of transaction status queries waiting for an updated transaction record + y_axis_label: Waiting Queries + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE + - name: valbytes + exported_name: valbytes + description: Number of bytes taken up by values + y_axis_label: Storage + type: GAUGE + unit: BYTES + aggregation: AVG + derivative: NONE + - name: valcount + exported_name: valcount + description: Count of all values + y_axis_label: MVCC Values + type: GAUGE + unit: COUNT + aggregation: AVG + derivative: NONE diff --git a/src/current/_includes/cockroachcloud/metrics-tab.md b/src/current/_includes/cockroachcloud/metrics-tab.md index 6e6e584284e..5ce3df2d25c 100644 --- a/src/current/_includes/cockroachcloud/metrics-tab.md +++ b/src/current/_includes/cockroachcloud/metrics-tab.md @@ -1,11 +1,11 @@ {% assign version = site.current_cloud_version | replace: ".", "" %} -{% assign graphs = site.data[version].metrics.metrics | where_exp: "graphs", "graphs.metric_ui_tab contains tab" | map: "metric_ui_graph" | uniq %} +{% assign graphs = site.data[version].metrics.metrics-cloud | where_exp: "graphs", "graphs.metric_ui_tab contains tab" | map: "metric_ui_graph" | uniq %} {% for g in graphs %} {% comment %} Iterate through the graphs. {% endcomment %} ## {{ g }} -{% assign metrics = site.data[version].metrics.metrics | where: "metric_ui_graph", g %} +{% assign metrics = site.data[version].metrics.metrics-cloud | where: "metric_ui_graph", g %} {% comment %} Fetch all metrics for given metric_ui_tab. {% endcomment %} diff --git a/src/current/_includes/v25.3/essential-alerts.md b/src/current/_includes/v25.3/essential-alerts.md index dbc9dddc8fe..6abfd7b475f 100644 --- a/src/current/_includes/v25.3/essential-alerts.md +++ b/src/current/_includes/v25.3/essential-alerts.md @@ -483,7 +483,7 @@ Changefeed has fallen behind. This is determined by the end-to-end lag between a Changefeed jobs should not be paused for a long time because [the protected timestamp prevents garbage collection]({% link {{ page.version.version }}/protect-changefeed-data.md %}). To protect against an operational error, this alert guards against an inadvertently forgotten pause. **Metric** -
[`jobs.changefeed.currently_paused`]({% link {{ page.version.version }}/essential-metrics-{{ include.deployment }}.md %}#changefeed-currently-paused) +
[`jobs.changefeed.currently_paused`]({% link {{ page.version.version }}/essential-metrics-{{ include.deployment }}.md %}#jobs-changefeed-currently-paused) **Rule**
WARNING: `jobs.changefeed.currently_paused` is greater than `0` for more than `15 minutes` diff --git a/src/current/_includes/v25.3/essential-metrics.md b/src/current/_includes/v25.3/essential-metrics.md index 7c958db3f50..b510a3a11e3 100644 --- a/src/current/_includes/v25.3/essential-metrics.md +++ b/src/current/_includes/v25.3/essential-metrics.md @@ -1,197 +1,285 @@ -These essential CockroachDB metrics enable you to build custom dashboards with the following tools: +{% assign version = page.version.version | replace: ".", "" %} +{% comment %}DEBUG: {{ version }}{% endcomment %} + +These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.core }} cluster. Use them to build custom dashboards with the following tools: + +{% comment %} STEP 1. Assign variables specific to deployment {% endcomment %} {% if include.deployment == 'self-hosted' %} -* [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) -* [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}) - The [**Datadog Integration Metric Name**](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics) column lists the corresponding Datadog metric which requires the `cockroachdb.` prefix. + {% assign metrics_datadog = site.data[version].metrics.datadog-cockroachdb %} + {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics" %} + {% assign datadog_prefix = "cockroachdb" %} + {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,NETWORKING,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %} + +- [Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) +- [Datadog Integration]({% link {{ page.version.version }}/datadog.md %}): The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}.` prefix. + {% elsif include.deployment == 'advanced' %} -* [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics) column lists the corresponding Datadog metric which requires the `crdb_dedicated.` prefix. -* [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %}) -{% endif %} + {% assign metrics_datadog = site.data[version].metrics.datadog-crdb-dedicated %} + {% assign datadog_link = "https://docs.datadoghq.com/integrations/cockroach-cloud/#metrics" %} + {% assign datadog_prefix = "crdb_dedicated" %} +{% comment %} Removed NETWORKING category for advanced deployment {% endcomment %} + {% assign category_order = "HARDWARE,STORAGE,OVERLOAD,DISTRIBUTED,REPLICATION,SQL,CHANGEFEEDS,TTL,UNSET," %} -The **Usage** column explains why each metric is important to visualize in a custom dashboard and how to make both practical and actionable use of the metric in a production deployment. - -## Platform - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sys.cpu.combined.percent-normalized | sys.cpu.combined.percent.normalized | Current user+system CPU percentage consumed by the CRDB process, normalized by number of cores | This metric gives the CPU utilization percentage by the CockroachDB process. If it is equal to 1 (or 100%), then the CPU is overloaded. The CockroachDB process should not be running with over 80% utilization for extended periods of time (hours). This metric is used in the DB Console [**CPU Percent** graph]({% link {{ page.version.version }}/ui-hardware-dashboard.md %}#cpu-percent). | -| sys.cpu.host.combined.percent-normalized | NOT AVAILABLE | Current user+system CPU percentage consumed by all processes on the host OS, normalized by number of cores. If the CRDB process is run in a containerized environment, the host OS is the container since the CRDB process cannot inspect CPU usage beyond the container. | This metric gives the CPU utilization percentage of the underlying server, virtual machine, or container hosting the CockroachDB process. It includes CPU usage from both CockroachDB and non-CockroachDB processes. It also accounts for time spent processing hardware (`irq`) and software (`softirq`) interrupts, as well as `nice` time, which represents low-priority user-mode activity.

A value of 1 (or 100%) indicates that the CPU is overloaded. Avoid running the CockroachDB process in an environment where the CPU remains overloaded for extended periods (e.g. multiple hours). This metric appears in the DB Console on the **Host CPU Percent** graph. | -| sys.cpu.user.percent | sys.cpu.user.percent | Current user CPU percentage consumed by the CRDB process | This metric gives the CPU usage percentage at the user level by the CockroachDB process only. This is similar to the Linux `top` command output. The metric value can be more than 1 (or 100%) on multi-core systems. It is best to combine user and system metrics. | -| sys.cpu.sys.percent | sys.cpu.sys.percent | Current system CPU percentage consumed by the CRDB process | This metric gives the CPU usage percentage at the system (Linux kernel) level by the CockroachDB process only. This is similar to the Linux `top` command output. The metric value can be more than 1 (or 100%) on multi-core systems. It is best to combine user and system metrics. | -| sys.rss | sys.rss | Current process memory (RSS) | This metric gives the amount of RAM used by the CockroachDB process. Persistently low values over an extended period of time suggest there is underutilized memory that can be put to work with adjusted [settings for `--cache` or `--max_sql_memory`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size) or both. Conversely, a high utilization, even if a temporary spike, indicates an increased risk of [Out-of-memory (OOM) crash]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#out-of-memory-oom-crash) (particularly since the [swap is generally disabled]({% link {{ page.version.version }}/recommended-production-settings.md %}#memory)). | -| sql.mem.root.current | {% if include.deployment == 'self-hosted' %}sql.mem.root.current |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Current sql statement memory usage for root | This metric shows how memory set aside for temporary materializations, such as hash tables and intermediary result sets, is utilized. Use this metric to optimize memory allocations based on long term observations. The maximum amount is set with [`--max_sql_memory`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). If the utilization of sql memory is persistently low, perhaps some portion of this memory allocation can be shifted to [`--cache`]({% link {{ page.version.version }}/recommended-production-settings.md %}#cache-and-sql-memory-size). | -| sys.host.disk.write.bytes | {% if include.deployment == 'self-hosted' %}sys.host.disk.write.bytes |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes written to all disks since this process started | This metric reports the effective storage device write throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.write.count | {% if include.deployment == 'self-hosted' %}sys.host.disk.write |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Disk write operations across all disks since this process started | This metric reports the effective storage device write IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.read.bytes | {% if include.deployment == 'self-hosted' %}sys.host.disk.read.bytes |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes read from all disks since this process started | This metric reports the effective storage device read throughput (MB/s) rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.read.count | {% if include.deployment == 'self-hosted' %}sys.host.disk.read |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Disk read operations across all disks since this process started | This metric reports the effective storage device read IOPS rate. To confirm that storage is sufficiently provisioned, assess the I/O performance rates (IOPS and MBPS) in the context of the sys.host.disk.iopsinprogress metric. | -| sys.host.disk.iopsinprogress | {% if include.deployment == 'self-hosted' %}sys.host.disk.iopsinprogress |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} IO operations currently in progress on this host | This metric gives the average queue length of the storage device. It characterizes the storage device's performance capability. All I/O performance metrics are Linux counters and correspond to the `avgqu-sz` in the Linux `iostat` command output. You need to view the device queue graph in the context of the actual read/write IOPS and MBPS metrics that show the actual device utilization. If the device is not keeping up, the queue will grow. Values over 10 are bad. Values around 5 mean the device is working hard trying to keep up. For internal (on chassis) [NVMe](https://www.wikipedia.org/wiki/NVM_Express) devices, the queue values are typically 0. For network connected devices, such as [AWS EBS volumes](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html), the normal operating range of values is 1 to 2. Spikes in values are OK. They indicate an I/O spike where the device fell behind and then caught up. End users may experience inconsistent response times, but there should be no cluster stability issues. If the queue is greater than 5 for an extended period of time and IOPS or MBPS are low, then the storage is most likely not provisioned per Cockroach Labs guidance. In AWS EBS, it is commonly an EBS type, such as gp2, not suitable as database primary storage. If I/O is low and the queue is low, the most likely scenario is that the CPU is lacking and not driving I/O. One such case is a cluster with nodes with only 2 vcpus which is not supported [sizing]({% link {{ page.version.version }}/recommended-production-settings.md %}#sizing) for production deployments. There are quite a few background processes in the database that take CPU away from the workload, so the workload is just not getting the CPU. Review [storage and disk I/O]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#storage-and-disk-i-o). | -| sys.host.net.recv.bytes | sys.host.net.recv.bytes | Bytes received on all network interfaces since this process started | This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. | -| sys.host.net.send.bytes | sys.host.net.send.bytes | Bytes sent on all network interfaces since this process started | This metric gives the node's ingress/egress network transfer rates for flat sections which may indicate insufficiently provisioned networking or high error rates. CockroachDB is using a reliable TCP/IP protocol, so errors result in delivery retries that create a "slow network" effect. | -| clock-offset.meannanos | clock.offset.meannanos | Mean clock offset with other nodes | This metric gives the node's clock skew. In a well-configured environment, the actual clock skew would be in the sub-millisecond range. A skew exceeding 5 ms is likely due to a NTP service mis-configuration. Reducing the actual clock skew reduces the probability of uncertainty related conflicts and corresponding retires which has a positive impact on workload performance. Conversely, a larger actual clock skew increases the probability of retries due to uncertainty conflicts, with potentially measurable adverse effects on workload performance. | - -## Storage - -
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| capacity | {% if include.deployment == 'self-hosted' %}capacity.total |{% elsif include.deployment == 'advanced' %}capacity |{% endif %} Total storage capacity | This metric gives total storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| capacity.available | capacity.available | Available storage capacity | This metric gives available storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| capacity.used | capacity.used | Used storage capacity | This metric gives used storage capacity. Measurements should comply with the following rule: CockroachDB storage volumes should not be utilized more than 60% (40% free space). | -| storage.wal.fsync.latency | {% if include.deployment == 'self-hosted' %}storage.wal.fsync.latency |{% elsif include.deployment == 'advanced' %}storage.wal.fsync.latency |{% endif %} This metric reports the latency of writes to the [WAL]({% link {{ page.version.version }}/architecture/storage-layer.md %}#memtable-and-write-ahead-log). | If this value is greater than `100ms`, it is an indication of a [disk stall]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#disk-stalls). To mitigate the effects of disk stalls, consider deploying your cluster with [WAL failover]({% link {{ page.version.version }}/wal-failover.md %}) configured. | -| storage.write-stalls | {% if include.deployment == 'self-hosted' %}storage.write.stalls |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of instances of intentional write stalls to backpressure incoming writes | This metric reports actual disk stall events. Ideally, investigate all reports of disk stalls. As a pratical guideline, one stall per minute is not likely to have a material impact on workload beyond an occasional increase in response time. However one stall per second should be viewed as problematic and investigated actively. It is particularly problematic if the rate persists over an extended period of time, and worse, if it is increasing. | -| rocksdb.compactions | rocksdb.compactions.total | Number of SST compactions | This metric reports the number of a node's [LSM compactions]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#lsm-health). If the number of compactions remains elevated while the LSM health does not improve, compactions are not keeping up with the workload. If the condition persists for an extended period, the cluster will initially exhibit performance issues that will eventually escalate into stability issues. | -| rocksdb.block.cache.hits | rocksdb.block.cache.hits | Count of block cache hits | This metric gives hits to block cache which is reserved memory. It is allocated upon the start of a node process by the [`--cache` flag]({% link {{ page.version.version }}/cockroach-start.md %}#general) and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. | -| rocksdb.block.cache.misses | rocksdb.block.cache.misses | Count of block cache misses | This metric gives misses to block cache which is reserved memory. It is allocated upon the start of a node process by the [`--cache` flag]({% link {{ page.version.version }}/cockroach-start.md %}#general) and never shrinks. By observing block cache hits and misses, you can fine-tune memory allocations in the node process for the demands of the workload. | - -## Health - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sys.uptime | sys.uptime | Process uptime | This metric measures the length of time, in seconds, that the CockroachDB process has been running. Monitor this metric to detect events such as node restarts, which may require investigation or intervention. | -| admission.io.overload | {% if include.deployment == 'self-hosted' %}admission.io.overload |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} 1-normalized float indicating whether IO admission control considers the store as overloaded with respect to compaction out of L0 (considers sub-level and file counts). | If the value of this metric exceeds 1, then it indicates overload. You can also look at the metrics `storage.l0-num-files`, `storage.l0-sublevels` or `rocksdb.read-amplification` directly. A healthy LSM shape is defined as “read-amp < 20” and “L0-files < 1000”, looking at [cluster settings]({% link {{ page.version.version }}/cluster-settings.md %}) `admission.l0_sub_level_count_overload_threshold` and `admission.l0_file_count_overload_threshold` respectively. | -| admission.wait_durations.kv-p75 | {% if include.deployment == 'self-hosted' %}admission.wait.durations.kv |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Wait time durations for requests that waited | This metric shows if CPU utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling by CPU control. If observing over 100ms waits for over 5 seconds while there was excess CPU capacity available, then the admission control is overly aggressive. | -| admission.wait_durations.kv-stores-p75 | {% if include.deployment == 'self-hosted' %}admission.wait.durations.kv_stores |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Wait time durations for requests that waited | This metric shows if I/O utilization-based admission control feature is working effectively or potentially overaggressive. This is a latency histogram of how much delay was added to the workload due to throttling by I/O control. If observing over 100ms waits for over 5 seconds while there was excess I/O capacity available, then the admission control is overly aggressive. | -| sys.runnable.goroutines.per.cpu | {% if include.deployment == 'self-hosted' %}sys.runnable.goroutines.per_cpu |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Average number of goroutines that are waiting to run, normalized by number of cores | If this metric has a value over 30, it indicates a CPU overload. If the condition lasts a short period of time (a few seconds), the database users are likely to experience inconsistent response times. If the condition persists for an extended period of time (tens of seconds, or minutes) the cluster may start developing stability issues. Review [CPU planning]({% link {{ page.version.version }}/common-issues-to-monitor.md %}#cpu). +- [Datadog integration]({% link cockroachcloud/tools-page.md %}#monitor-cockroachdb-cloud-with-datadog) - The [**Datadog Integration Metric Name**]({{ datadog_link }}) column lists the corresponding Datadog metric which requires the `{{ datadog_prefix }}` prefix. +- [Metrics export]({% link cockroachcloud/export-metrics-advanced.md %}) -{% if include.deployment == 'self-hosted' %} -## Network - -|
CockroachDB Metric Name
|
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|
Description
| Usage | -| ------------------------------------------------------ | --------------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| rpc.connection.avg_round_trip_latency | rpc.connection.avg_round_trip_latency | Sum of exponentially weighted moving average of round-trip latencies, as measured through a gRPC RPC. Dividing this gauge by `rpc.connection.healthy` gives an approximation of average latency, but the top-level round-trip-latency histogram is more useful. Instead, users should consult the label families of this metric if they are available (which requires Prometheus and the cluster setting `server.child_metrics.enabled`); these provide per-peer moving averages. This metric does not track failed connection. A failed connection's contribution is reset to zero. | This metric is helpful in understanding general network issues outside of CockroachDB that could be impacting the user’s workload. | -| rpc.connection.failures | rpc.connection.failures.count | Counter of failed connections. This includes both the event in which a healthy connection terminates as well as unsuccessful reconnection attempts. Connections that are terminated as part of local node shutdown are excluded. Decommissioned peers are excluded. | See Description. | -| rpc.connection.healthy | rpc.connection.healthy | Gauge of current connections in a healthy state (i.e., bidirectionally connected and heartbeating). | See Description. | -| rpc.connection.healthy_nanos | rpc.connection.healthy_nanos | Gauge of nanoseconds of healthy connection time. On the Prometheus endpoint scraped when the cluster setting `server.child_metrics.enabled` is set, this gauge allows you to see the duration for which a given peer has been connected in a healthy state. | This can be useful for monitoring the stability and health of connections within your CockroachDB cluster. | -| rpc.connection.heartbeats | rpc.connection.heartbeats.count | Counter of successful heartbeats. | See Description. | -| rpc.connection.unhealthy | rpc.connection.unhealthy | Gauge of current connections in an unhealthy state (not bidirectionally connected or heartbeating). | If the value of this metric is greater than 0, this could indicate a network partition. | -| rpc.connection.unhealthy_nanos | rpc.connection.unhealthy_nanos | Gauge of nanoseconds of unhealthy connection time. On the Prometheus endpoint scraped when the cluster setting `server.child_metrics.enabled` is set, this gauge allows you to see the duration for which a given peer has been unreachable. | If this duration is greater than 0, this could indicate how long a network partition has been occurring. | {% endif %} +The **Usage** column explains why each metric is important to visualize and how to make both practical and actionable use of the metric in a production deployment. + +{% assign layers = site.data[version].metrics.metrics.layers %} + +{% comment %} STEP 2. Create array of layer names {% endcomment %} +{% assign layer_names_string = "" %} +{% for layer in layers %} + {% assign layer_names_string = layer_names_string | append: layer.name | append: "," %} +{% endfor %} + +{% comment %}DEBUG: layer_names_string = {{ layer_names_string }}{% endcomment %} +{% assign layer_names_array = layer_names_string | split: "," %} + +{% comment %} STEP 3. Create array of unique category names {% endcomment %} +{% assign category_names_string = "" %} +{% for layer_name in layer_names_array %} + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% comment %}DEBUG: layer_name = {{ layer_name }}{% endcomment %} + + {% for category in layer[0].categories %} + {% comment %}DEBUG: category.name = {{ category.name }}{% endcomment %} + {% unless category_names_string contains category.name %} + {% assign category_names_string = category_names_string | append: category.name | append: "," %} + {% endunless %} + + {% endfor %} + +{% endfor %} + +{% comment %} Order categories, NOTE: new categories may break this order, however all relevant categories will be displayed though not in the desired order{% endcomment %} +{% comment %}DEBUG: category_names_string = {{ category_names_string }}{% endcomment %} +{% assign category_names_string_ordered = category_names_string | replace: "CHANGEFEEDS,DISTRIBUTED,NETWORKING,SQL,TTL,UNSET,HARDWARE,OVERLOAD,REPLICATION,STORAGE,", category_order %} +{% comment %}DEBUG: category_names_string_ordered = {{ category_names_string_ordered }}{% endcomment %} +{% assign category_names_array = category_names_string_ordered | split: "," %} + +{% comment %} STEP 4. Create sections for each unique category. For example, both APPLICATION and STORAGE layers have a SQL category, however only one SQL category will be created. {% endcomment %} +{% for category_name in category_names_array %} + {% if category_name != "" %} + + {% comment %} STEP 4a. Loop 1 to count essential metrics {% endcomment %} + {% assign essential_metrics_total = 0 %} + {% for layer_name in layer_names_array %} + + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + {% if essential_metrics.size > 0 %} + {% comment %}DEBUG: 1 {{ layer_name }} 2 {{ layer[0].name }} 3 {{ category[0].name }} {{ essential_metrics.size }}{% endcomment %} + {% assign essential_metrics_total = essential_metrics_total | plus: essential_metrics.size %} + {% endif %}{% comment %}if essential_metrics.size > 0{% endcomment %} + + {% endfor %}{% comment %}for layer in layer_names_array{% endcomment %} + + {% comment %} STEP 4b. Only create a section for a category if essential metrics exist. For example, the UNSET category does not have any essential metrics.{% endcomment %} + {% if essential_metrics_total > 0 %} + + {% comment %} Transform category_name to user-facing name. {% endcomment %} + {% if category_name == "HARDWARE" %}{% assign category_display_name = "Platform" %} + {% elsif category_name == "STORAGE" %}{% assign category_display_name = "Storage" %} + {% elsif category_name == "OVERLOAD" %}{% assign category_display_name = "Health" %} + {% elsif category_name == "NETWORKING" %}{% assign category_display_name = "Network" %} + {% elsif category_name == "DISTRIBUTED" %}{% assign category_display_name = "KV Distributed" %} + {% elsif category_name == "REPLICATION" %}{% assign category_display_name = "KV Replication" %} + {% elsif category_name == "CHANGEFEEDS" %}{% assign category_display_name = "Changefeeds" %} + {% elsif category_name == "TTL" %}{% assign category_display_name = "Row-level TTL" %} + {% else %}{% assign category_display_name = category_name %}{% comment %} For example, SQL {% endcomment %} + {% endif %} + +## {{ category_display_name }} +{% comment %}DEBUG: {{ essential_metrics_total }} essential metrics{% endcomment %} + +
+ + + + + + + + + + + {% comment %} STEP 4c. Loop 2 to create essential metric rows for category{% endcomment %} + {% for layer_name in layer_names_array %} + + {% assign layer = layers | where_exp: "l", "l.name == layer_name" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + {% comment %}DEBUG: 1 {{ layer_name }} 2 {{ layer[0].name }} 3 {{ category[0].name }}{% endcomment %} + + {% for metric in essential_metrics %} + {% comment %} STEP 4d. Exclude SQL metrics that will be placed in special categories {% endcomment %} + {% unless category_name == SQL %} + {% unless metric.name contains "backup" or metric.name contains "BACKUP" or metric.name contains "create_stats" %} + + {% comment %} Transforms to match datadog_id {% endcomment %} + {% assign input_metric = metric.name %} + {% assign match1 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "." %} + {% assign match2 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | append: ".count" %} + {% assign match3 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "_", "." %} + {% assign match4 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "_" | append: ".count" %} + {% assign match5 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | replace: "-", "_" %} + {% assign match6 = metrics_datadog | where: "datadog_id", input_metric | first %} + {% assign input_metric = metric.name | append: ".total" %} + {% assign match7 = metrics_datadog | where: "datadog_id", input_metric | first %} + + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endunless %}{% comment %}unless metric.name contains "backup" or metric.name contains "BACKUP" or metric.name contains "create_stats"{% endcomment %} + {% endunless %}{% comment %}unless category_name == SQL{% endcomment %} + {% endfor %}{% comment %}for metric in essential_metrics{% endcomment %} + {% endfor %}{% comment %}for layer in layer_names_array{% endcomment %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+
{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %} +
+ {% if match1 %}{% comment %}Match1:{% endcomment %}{{ match1.datadog_id }} + {% elsif match2 %}{% comment %}Match2:{% endcomment %}{{ match2.datadog_id }} + {% elsif match3 %}{% comment %}Match3:{% endcomment %}{{ match3.datadog_id }} + {% elsif match4 %}{% comment %}Match4:{% endcomment %}{{ match4.datadog_id }} + {% elsif match5 %}{% comment %}Match5:{% endcomment %}{{ match5.datadog_id }} + {% elsif match6 %}{% comment %}Match6:{% endcomment %}{{ match6.datadog_id }} + {% elsif match7 %}{% comment %}Match7:{% endcomment %}{{ match7.datadog_id }} + {% else %}NOT AVAILABLE + {% endif %} + {{ metric.description }}{{ metric.how_to_use }}
+ + {% endif %}{% comment %}essential_metrics_total > 0{% endcomment %} + + {% comment %} STEP 4e. Create SQL special categories {% endcomment %} + {% if category_name == "SQL" %} + {% assign layer = layers | where_exp: "l", "l.name == 'APPLICATION'" %} + {% assign category = layer[0].categories | where_exp: "c", "c.name == category_name" %} + {% assign essential_metrics = category[0].metrics | where: "essential", true %} + +## Table Statistics + + + + + + + + + + + + + {% for metric in essential_metrics %} + {% if metric.name contains "create_stats" %} + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endif %} + {% endfor %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+
{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %} +
{{ metric.name }} + {{ metric.description }}{{ metric.how_to_use }}
+ +## Disaster Recovery + + + + + + + + + + + + + {% for metric in essential_metrics %} + {% if metric.name contains "backup" or metric.name contains "BACKUP" %} + + {% assign metric_link = metric.name | replace: "_", "-" | replace: ".", "-" %} + + + + + + + + + {% endif %} + {% endfor %} + + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+
{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %} +
{{ metric.name }} + {{ metric.description }}{{ metric.how_to_use }}
+ + {% endif %}{% comment %}if category_name == "SQL"{% endcomment %} + + {% endif %}{% comment %}if category_name != ""{% endcomment %} +{% endfor %}{% comment %}for category_name in category_names_array{% endcomment %} + +{% comment %} STEP 5. Add category for metrics that are not in metrics.yaml{% endcomment %} {% if include.deployment == 'self-hosted' %} + {% assign essential_metrics = site.data[version].metrics.available-metrics-not-in-metrics-list | where: "essential", true %} ## Expiration of license and certificates -|
CockroachDB Metric Name
|
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|
Description
| Usage | -| ----------------------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | -| seconds.until.enterprise.license.expiry | seconds.until.enterprise.license.expiry | Seconds until enterprise license expiry (0 if no license present or running without enterprise features) | See Description. | -| security.certificate.expiration.ca | security.certificate_expiration.ca | Expiration for the CA certificate. 0 means no certificate or error | See Description. | -| security.certificate.expiration.client-ca | security.certificate_expiration.client_ca | Expiration for the client CA certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.ui | security.certificate_expiration.ui | Expiration for the UI certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.ui-ca | security.certificate_expiration.ui_ca | Expiration for the UI CA certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.node | security.certificate_expiration.node | Expiration for the node certificate. 0 means no certificate or error| See Description. | -| security.certificate.expiration.node-client | security.certificate_expiration.node_client | Expiration for the node's client certificate. 0 means no certificate or error| See Description. | -{% endif %} + + + + + + + + + + + + {% for metric in essential_metrics %} + + {% assign metric_link = metric.metric_id | replace: "_", "-" | replace: ".", "-" %} -## KV distributed - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| liveness.heartbeatlatency | {% if include.deployment == 'self-hosted' %}liveness.heartbeatlatency-p90 |{% elsif include.deployment == 'advanced' %}liveness.heartbeatlatency |{% endif %} Node liveness heartbeat latency | If this metric exceeds 1 second, it is a sign of cluster instability. | -| liveness.livenodes | liveness.livenodes | Number of live nodes in the cluster (will be 0 if this node is not itself live) | This is a critical metric that tracks the live nodes in the cluster. | -| distsender.rpc.sent.nextreplicaerror | distsender.rpc.sent.nextreplicaerror | Number of replica-addressed RPCs sent due to per-replica errors | [RPC](architecture/overview.html#overview) errors do not necessarily indicate a problem. This metric tracks remote procedure calls that return a status value other than "success". A non-success status of an RPC should not be misconstrued as a network transport issue. It is database code logic executed on another cluster node. The non-success status is a result of an orderly execution of an RPC that reports a specific logical condition. | -| distsender.errors.notleaseholder | distsender.errors.notleaseholder | Number of NotLeaseHolderErrors encountered from replica-addressed RPCs | Errors of this type are normal during elastic cluster topology changes when leaseholders are actively rebalancing. They are automatically retried. However they may create occasional response time spikes. In that case, this metric may provide the explanation of the cause. | - -## KV replication - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| leases.transfers.success | leases.transfers.success | Number of successful lease transfers | A high number of [lease](architecture/replication-layer.html#leases) transfers is not a negative or positive signal, rather it is a reflection of the elastic cluster activities. For example, this metric is high during cluster topology changes. A high value is often the reason for NotLeaseHolderErrors which are normal and expected during rebalancing. Observing this metric may provide a confirmation of the cause of such errors. | -| rebalancing_lease_transfers | rebalancing.lease.transfers | Counter of the number of [lease transfers]({% link {{ page.version.version }}/architecture/replication-layer.md %}#leases) that occur during replica rebalancing. These lease transfers are tracked by a component that looks for a [store-level]({% link {{ page.version.version }}/cockroach-start.md %}#store) load imbalance of either QPS (`rebalancing.queriespersecond`) or CPU usage (`rebalancing.cpunanospersecond`), depending on the value of the `kv.allocator.load_based_rebalancing.objective` [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}#setting-kv-allocator-load-based-rebalancing-objective). | Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. | -| rebalancing_range_rebalances | {% if include.deployment == 'self-hosted' %}rebalancing.range.rebalances | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the number of [load-based range rebalances]({% link {{ page.version.version }}/architecture/replication-layer.md %}#load-based-replica-rebalancing). This range movement is tracked by a component that looks for [store-level]({% link {{ page.version.version }}/cockroach-start.md %}#store) load imbalance of either QPS (`rebalancing.queriespersecond`) or CPU usage (`rebalancing.cpunanospersecond`), depending on the value of the `kv.allocator.load_based_rebalancing.objective` [cluster setting]({% link {{ page.version.version }}/cluster-settings.md %}#setting-kv-allocator-load-based-rebalancing-objective). | Used to identify when there has been more rebalancing activity triggered by imbalance between stores (of QPS or CPU). If this is high (when the count is rated), it indicates that more rebalancing activity is taking place due to load imbalance between stores. | -| rebalancing_replicas_queriespersecond | {% if include.deployment == 'self-hosted' %}rebalancing.replicas.queriespersecond | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the KV-level requests received per second by a given [store]({% link {{ page.version.version }}/cockroach-start.md %}#store). The store aggregates all of the CPU and QPS stats across all its replicas and then creates a histogram that maintains buckets that can be queried for, e.g., the P95 replica's QPS or CPU. | A high value of this metric could indicate that one of the store's replicas is part of a [hot range]({% link {{ page.version.version }}/understand-hotspots.md %}#hot-range). See also: `rebalancing_replicas_cpunanospersecond`. | -| rebalancing_replicas_cpunanospersecond | {% if include.deployment == 'self-hosted' %}rebalancing.replicas.cpunanospersecond | {% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Counter of the CPU nanoseconds of execution time per second by a given [store]({% link {{ page.version.version }}/cockroach-start.md %}#store). The store aggregates all of the CPU and QPS stats across all its replicas and then creates a histogram that maintains buckets that can be queried for, e.g., the P95 replica's QPS or CPU. | A high value of this metric could indicate that one of the store's replicas is part of a [hot range]({% link {{ page.version.version }}/understand-hotspots.md %}#hot-range). See also the non-histogram variant: `rebalancing.cpunanospersecond`. | -| rebalancing.queriespersecond | {% if include.deployment == 'self-hosted' %}rebalancing.queriespersecond |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of kv-level requests received per second by the store, considering the last 30 minutes, as used in rebalancing decisions. | This metric shows hotspots along the queries per second (QPS) dimension. It provides insights into the ongoing rebalancing activities. | -| rebalancing.cpunanospersecond | {% if include.deployment == 'self-hosted' %}rebalancing.cpunanospersecond |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Non-histogram variant of `rebalancing_replicas_cpunanospersecond`. | See usage of `rebalancing_replicas_cpunanospersecond`. | -| ranges | ranges | Number of ranges | This metric provides a measure of the scale of the data size. | -| replicas | {% if include.deployment == 'self-hosted' %}replicas.total |{% elsif include.deployment == 'advanced' %}replicas |{% endif %} Number of replicas | This metric provides an essential characterization of the data distribution across cluster nodes. | -| replicas.leaseholders | replicas.leaseholders | Number of lease holders | This metric provides an essential characterization of the data processing points across cluster nodes. | -| ranges.underreplicated | ranges.underreplicated | Number of ranges with fewer live replicas than the replication target | This metric is an indicator of [replication issues]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#replication-issues). It shows whether the cluster has data that is not conforming to resilience goals. The next step is to determine the corresponding database object, such as the table or index, of these under-replicated ranges and whether the under-replication is temporarily expected. Use the statement `SELECT table_name, index_name FROM [SHOW RANGES WITH INDEXES] WHERE range_id = {id of under-replicated range};`| -| ranges.unavailable | ranges.unavailable | Number of ranges with fewer live replicas than needed for quorum | This metric is an indicator of [replication issues]({% link {{ page.version.version }}/cluster-setup-troubleshooting.md %}#replication-issues). It shows whether the cluster is unhealthy and can impact workload. If an entire range is unavailable, then it will be unable to process queries. | -| queue.replicate.replacedecommissioningreplica.error | {% if include.deployment == 'self-hosted' %}queue.replicate.replacedecommissioningreplica.error.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of failed decommissioning replica replacements processed by the replicate queue | Refer to [Decommission the node]({% link {{ page.version.version }}/node-shutdown.md %}?filters=decommission#decommission-the-node). | -| range.splits | {% if include.deployment == 'self-hosted' %}range.splits.total |{% elsif include.deployment == 'advanced' %}range.splits |{% endif %} Number of range splits | This metric indicates how fast a workload is scaling up. Spikes can indicate resource [hotspots]({% link {{ page.version.version }}/understand-hotspots.md %}) since the [split heuristic is based on QPS]({% link {{ page.version.version }}/load-based-splitting.md %}#control-load-based-splitting-threshold). To understand whether hotspots are an issue and with which tables and indexes they are occurring, correlate this metric with other metrics such as CPU usage, such as `sys.cpu.combined.percent-normalized`, or use the [**Hot Ranges** page]({% link {{ page.version.version }}/ui-hot-ranges-page.md %}). | -| range.merges | {% if include.deployment == 'self-hosted' %}range.merges.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of range merges | This metric indicates how fast a workload is scaling down. Merges are Cockroach's [optimization for performance](architecture/distribution-layer.html#range-merges). This metric indicates that there have been deletes in the workload. | - -## SQL - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| sql.conns | sql.conns | Number of active SQL connections | This metric shows the number of connections as well as the distribution, or balancing, of connections across cluster nodes. An imbalance can lead to nodes becoming overloaded. Review [Connection Pooling]({% link {{ page.version.version }}/connection-pooling.md %}). | -| sql.new_conns | {% if include.deployment == 'self-hosted' %}sql.new_conns.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of new connection attempts. | The rate of this metric shows how frequently new connections are being established. This can be useful in determining if a high rate of incoming new connections is causing additional load on the server due to a misconfigured application. | -| sql.txns.open | sql.txns.open | Number of currently open user SQL transactions | This metric should roughly correspond to the number of cores * 4. If this metric is consistently larger, scale out the cluster. | -| sql.statements.active | sql.statements.active | Number of currently active user SQL statements | This high-level metric reflects workload volume. | -| sql.failure.count | {% if include.deployment == 'self-hosted' %}sql.failure |{% elsif include.deployment == 'advanced' %}sql.failure.count |{% endif %} Number of statements resulting in a planning or runtime error | This metric is a high-level indicator of workload and application degradation with query failures. Use the [Insights page]({% link {{ page.version.version }}/ui-insights-page.md %}) to find failed executions with their error code to troubleshoot or use application-level logs, if instrumented, to determine the cause of error. | -| sql.full.scan.count | {% if include.deployment == 'self-hosted' %}sql.full.scan |{% elsif include.deployment == 'advanced' %}sql.full.scan.count |{% endif %} Number of full table or index scans | This metric is a high-level indicator of potentially suboptimal query plans in the workload that may require index tuning and maintenance. To identify the [statements with a full table scan]({% link {{ page.version.version }}/performance-recipes.md %}#statements-with-full-table-scans), use `SHOW FULL TABLE SCAN` or the [**SQL Activity Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}) with the corresponding metric time frame. The **Statements** page also includes [explain plans]({% link {{ page.version.version }}/ui-statements-page.md %}#explain-plans) and [index recommendations]({% link {{ page.version.version }}/ui-statements-page.md %}#insights). Not all full scans are necessarily bad especially over smaller tables. | -| sql.insert.count | sql.insert.count | Number of SQL INSERT statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.update.count | sql.update.count | Number of SQL UPDATE statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.delete.count | sql.delete.count | Number of SQL DELETE statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.select.count | sql.select.count | Number of SQL SELECT statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.ddl.count | sql.ddl.count | Number of SQL DDL statements successfully executed | This high-level metric reflects workload volume. Monitor this metric to identify abnormal application behavior or patterns over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. For example, on the [**Transactions** page]({% link {{ page.version.version }}/ui-transactions-page.md %}) and the [**Statements** page]({% link {{ page.version.version }}/ui-statements-page.md %}), sort on the Execution Count column. To find problematic sessions, on the [**Sessions** page]({% link {{ page.version.version }}/ui-sessions-page.md %}), sort on the Transaction Count column. Find the sessions with high transaction counts and trace back to a user or application. | -| sql.txn.begin.count | sql.txn.begin.count | Number of SQL transaction BEGIN statements successfully executed | This metric reflects workload volume by counting explicit [transactions]({% link {{ page.version.version }}/transactions.md %}). Use this metric to determine whether explicit transactions can be refactored as implicit transactions (individual statements). | -| sql.txn.commit.count | sql.txn.commit.count | Number of SQL transaction COMMIT statements successfully executed | This metric shows the number of [transactions]({% link {{ page.version.version }}/transactions.md %}) that completed successfully. This metric can be used as a proxy to measure the number of successful explicit transactions. | -| sql.txn.rollback.count | sql.txn.rollback.count | Number of SQL transaction ROLLBACK statements successfully executed | This metric shows the number of orderly transaction [rollbacks]({% link {{ page.version.version }}/rollback-transaction.md %}). A persistently high number of rollbacks may negatively impact the workload performance and needs to be investigated. | -| sql.txn.abort.count | sql.txn.abort.count | Number of SQL transaction abort errors | This high-level metric reflects workload performance. A persistently high number of SQL transaction abort errors may negatively impact the workload performance and needs to be investigated. | -| sql.service.latency-p90, sql.service.latency-p99 | sql.service.latency | Latency of SQL request execution | These high-level metrics reflect workload performance. Monitor these metrics to understand latency over time. If abnormal patterns emerge, apply the metric's time range to the [**SQL Activity** pages]({% link {{ page.version.version }}/monitoring-and-alerting.md %}#sql-activity-pages) to investigate interesting outliers or patterns. The [**Statements page**]({% link {{ page.version.version }}/ui-statements-page.md %}) has P90 Latency and P99 latency columns to enable correlation with this metric. | -| sql.txn.latency-p90, sql.txn.latency-p99 | sql.txn.latency | Latency of SQL transactions | These high-level metrics provide a latency histogram of all executed SQL transactions. These metrics provide an overview of the current SQL workload. | -| txnwaitqueue.deadlocks_total | {% if include.deployment == 'self-hosted' %}txnwaitqueue.deadlocks.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of deadlocks detected by the transaction wait queue | Alert on this metric if its value is greater than zero, especially if transaction throughput is lower than expected. Applications should be able to detect and recover from deadlock errors. However, transaction performance and throughput can be maximized if the application logic avoids deadlock conditions in the first place, for example, by keeping transactions as short as possible. | -| sql.distsql.contended_queries.count | {% if include.deployment == 'self-hosted' %}sql.distsql.contended.queries |{% elsif include.deployment == 'advanced' %} sql.distsql.contended.queries |{% endif %} Number of SQL queries that experienced contention | This metric is incremented whenever there is a non-trivial amount of contention experienced by a statement whether read-write or write-write conflicts. Monitor this metric to correlate possible workload performance issues to contention conflicts. | -| sql.conn.failures | sql.conn.failures.count | Number of SQL connection failures | This metric is incremented whenever a connection attempt fails for any reason, including timeouts. | -| sql.conn.latency-p90, sql.conn.latency-p99 | sql.conn.latency | Latency to establish and authenticate a SQL connection | These metrics characterize the database connection latency which can affect the application performance, for example, by having slow startup times. Connection failures are not recorded in these metrics.| -| txn.restarts.serializable | txn.restarts.serializable | Number of restarts due to a forwarded commit timestamp and isolation=SERIALIZABLE | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.writetooold | txn.restarts.writetooold | Number of restarts due to a concurrent writer committing first | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.writetoooldmulti | {% if include.deployment == 'self-hosted' %}txn.restarts.writetoooldmulti.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to multiple concurrent writers committing first | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.unknown | {% if include.deployment == 'self-hosted' %}txn.restarts.unknown.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to a unknown reasons | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.txnpush | {% if include.deployment == 'self-hosted' %}txn.restarts.txnpush.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to a transaction push failure | This metric is one measure of the impact of contention conflicts on workload performance. For guidance on contention conflicts, review [transaction contention best practices]({% link {{ page.version.version }}/performance-best-practices-overview.md %}#transaction-contention) and [performance tuning recipes]({% link {{ page.version.version }}/performance-recipes.md %}#transaction-contention). Tens of restarts per minute may be a high value, a signal of an elevated degree of contention in the workload, which should be investigated. | -| txn.restarts.txnaborted | {% if include.deployment == 'self-hosted' %}txn.restarts.txnaborted.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of restarts due to an abort by a concurrent transaction | The errors tracked by this metric are generally due to deadlocks. Deadlocks can often be prevented with a considered transaction design. Identify the conflicting transactions involved in the deadlocks, then, if possible, redesign the business logic implementation prone to deadlocks. | + + + + + + -## Table Statistics + {% endfor %}{% comment %}for metric in essential_metrics{% endcomment %} -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.auto_create_stats.resume_failed | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.resume_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs which failed with a non-retryable error | This metric is a high-level indicator that automatically generated [table statistics]({% link {{ page.version.version }}/cost-based-optimizer.md %}#table-statistics) is failing. Failed statistic creation can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. | -| jobs.auto_create_stats.currently_running | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs currently running | This metric tracks the number of active automatically generated statistics jobs that could also be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. | -| jobs.auto_create_stats.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.auto.create.stats.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of auto_create_stats jobs currently considered Paused | This metric is a high-level indicator that automatically generated statistics jobs are paused which can lead to the query optimizer running with stale statistics. Stale statistics can cause suboptimal query plans to be selected leading to poor query performance. | -| jobs.create_stats.currently_running | {% if include.deployment == 'self-hosted' %}jobs.create.stats.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of create_stats jobs currently running | This metric tracks the number of active create statistics jobs that may be consuming resources. Ensure that foreground SQL traffic is not impacted by correlating this metric with SQL latency and query volume metrics. | - -## Backup and Restore - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.backup.currently_running | {% if include.deployment == 'self-hosted' %}jobs.backup.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of backup jobs currently running | See Description. | -| jobs.backup.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.backup.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of backup jobs currently considered Paused | Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a backup job in a paused state for an extended period of time. In functional areas, a paused job can hold resources or have concurrency impact or some other negative consequence. Paused backup may break the [recovery point objective (RPO)]({% link {{ page.version.version }}/backup.md %}#performance). | -| schedules.BACKUP.failed | {% if include.deployment == 'self-hosted' %}schedules.backup.failed |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of BACKUP jobs failed | Monitor this metric and investigate backup job failures. | -| schedules.BACKUP.last-completed-time | {% if include.deployment == 'self-hosted' %}schedules.backup.last_completed_time |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} The Unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric | Monitor this metric to ensure that backups are meeting the [recovery point objective (RPO)]({% link {{ page.version.version }}/disaster-recovery-overview.md %}). Each node exports the time that it last completed a backup on behalf of the schedule. If a node is restarted, it will report `0` until it completes a backup. If all nodes are restarted, `max()` is `0` until a node completes a backup.

To make use of this metric, first, from each node, take the maximum over a rolling window equal to or greater than the backup frequency, and then take the maximum of those values across nodes. For example with a backup frequency of 60 minutes, monitor `time() - max_across_nodes(max_over_time(schedules_BACKUP_last_completed_time, 60min))`. | - -## Changefeeds - -If [changefeeds]({% link {{ page.version.version }}/change-data-capture-overview.md %}) are created in a CockroachDB cluster, monitor these additional metrics in your custom dashboards: - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| changefeed.running | changefeed.running | Number of currently running changefeeds, including sinkless | This metric tracks the total number of all running changefeeds. | -| jobs.changefeed.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.changefeed.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of changefeed jobs currently considered Paused | Monitor and alert on this metric to safeguard against an inadvertent operational error of leaving a changefeed job in a paused state for an extended period of time. Changefeed jobs should not be paused for a long time because the [protected timestamp prevents garbage collection]({% link {{ page.version.version }}/monitor-and-debug-changefeeds.md %}#protected-timestamp-and-garbage-collection-monitoring). | -| changefeed.failures | changefeed.failures | Total number of changefeed jobs which have failed | This metric tracks the permanent changefeed job failures that the jobs system will not try to restart. Any increase in this counter should be investigated. An alert on this metric is recommended. | -| changefeed.error_retries | changefeed.error.retries | Total retryable errors encountered by all changefeeds | This metric tracks transient changefeed errors. Alert on "too many" errors, such as 50 retries in 15 minutes. For example, during a rolling upgrade this counter will increase because the changefeed jobs will restart following node restarts. There is an exponential backoff, up to 10 minutes. But if there is no rolling upgrade in process or other cluster maintenance, and the error rate is high, investigate the changefeed job. -| changefeed.emitted_messages | changefeed.emitted.messages | Messages emitted by all feeds | This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the rate of changes being streamed from the CockroachDB cluster. | -| changefeed.emitted_bytes | {% if include.deployment == 'self-hosted' %}changefeed.emitted_bytes.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Bytes emitted by all feeds | This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the throughput bytes being streamed from the CockroachDB cluster. | -| changefeed.commit_latency | changefeed.commit.latency | The difference between the event MVCC timestamp and the time it was acknowledged by the downstream sink. If the sink batches events, then the difference between the oldest event in the batch and acknowledgement is recorded. Latency during backfill is excluded.| This metric provides a useful context when assessing the state of changefeeds. This metric characterizes the end-to-end lag between a committed change and that change applied at the destination. | -| jobs.changefeed.protected_age_sec | {% if include.deployment == 'self-hosted' %}jobs.changefeed.protected_age_sec |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} The age of the oldest PTS record protected by changefeed jobs | [Changefeeds use protected timestamps to protect the data from being garbage collected]({% link {{ page.version.version }}/monitor-and-debug-changefeeds.md %}#protected-timestamp-and-garbage-collection-monitoring). Ensure the protected timestamp age does not significantly exceed the [GC TTL zone configuration]({% link {{ page.version.version }}/configure-replication-zones.md %}#replication-zone-variables). Alert on this metric if the protected timestamp age is greater than 3 times the GC TTL. | - -## Row-Level TTL - -If [Row-Level TTL]({% link {{ page.version.version }}/row-level-ttl.md %}) is configured for any table in a CockroachDB cluster, monitor these additional metrics in your custom dashboards: - -|
CockroachDB Metric Name
| {% if include.deployment == 'self-hosted' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb/?tab=host#metrics)
(add `cockroachdb.` prefix)
|{% elsif include.deployment == 'advanced' %}
[Datadog Integration Metric Name](https://docs.datadoghq.com/integrations/cockroachdb_dedicated/#metrics)
(add `crdb_dedicated.` prefix)
|{% endif %}
Description
| Usage | -| ----------------------------------------------------- | {% if include.deployment == 'self-hosted' %}------ |{% elsif include.deployment == 'advanced' %}---- |{% endif %} ------------------------------------------------------------ | ------------------------------------------------------------ | -| jobs.row_level_ttl.resume_completed | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.resume_completed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs which successfully resumed to completion | If Row Level TTL is enabled, this metric should be nonzero and correspond to the `ttl_cron` setting that was chosen. If this metric is zero, it means the job is not running | -| jobs.row_level_ttl.resume_failed | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.resume_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs which failed with a non-retryable error | This metric should remain at zero. Repeated errors means the Row Level TTL job is not deleting data. | -| jobs.row_level_ttl.rows_selected | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.rows_selected.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of rows selected for deletion by the row level TTL job. | Correlate this metric with the metric `jobs.row_level_ttl.rows_deleted` to ensure all the rows that should be deleted are actually getting deleted. | -| jobs.row_level_ttl.rows_deleted | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.rows_deleted.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of rows deleted by the row level TTL job. | Correlate this metric with the metric `jobs.row_level_ttl.rows_selected` to ensure all the rows that should be deleted are actually getting deleted. | -| jobs.row_level_ttl.currently_paused | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.currently_paused |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs currently considered Paused | Monitor this metric to ensure the Row Level TTL job does not remain paused inadvertently for an extended period. | -| jobs.row_level_ttl.currently_running | {% if include.deployment == 'self-hosted' %}jobs.row.level.ttl.currently_running |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of row_level_ttl jobs currently running | Monitor this metric to ensure there are not too many Row Level TTL jobs running at the same time. Generally, this metric should be in the low single digits. | -| schedules.scheduled-row-level-ttl-executor.failed | {% if include.deployment == 'self-hosted' %}schedules.scheduled.row.level.ttl.executor_failed.count |{% elsif include.deployment == 'advanced' %}NOT AVAILABLE |{% endif %} Number of scheduled-row-level-ttl-executor jobs failed | Monitor this metric to ensure the Row Level TTL job is running. If it is non-zero, it means the job could not be created. | -| jobs.row_level_ttl.span_total_duration | NOT AVAILABLE | Duration for processing a span during row level TTL. | See Description. | -| jobs.row_level_ttl.select_duration | NOT AVAILABLE | Duration for select requests during row level TTL. | See Description. | -| jobs.row_level_ttl.delete_duration | NOT AVAILABLE | Duration for delete requests during row level TTL. | See Description. | -| jobs.row_level_ttl.num_active_spans | NOT AVAILABLE | Number of active spans the TTL job is deleting from. | See Description. | -| jobs.row_level_ttl.total_rows | NOT AVAILABLE | Approximate number of rows on the TTL table. | See Description. | -| jobs.row_level_ttl.total_expired_rows | NOT AVAILABLE | Approximate number of rows that have expired the TTL on the TTL table. | See Description. | + +
CockroachDB Metric Name[Datadog Integration Metric Name]({{ datadog_link }})
(add `{{ datadog_prefix }}.` prefix)
DescriptionUsage
+
{% if metric.labeled_name %}metrics endpoint:
{{ metric.labeled_name }}{% endif %} +
{{ metric.metric_id }} + {{ metric.description }}{{ metric.how_to_use }}
+{% endif %}{% comment %}if include.deployment == 'self-hosted'{% endcomment %} ## See also @@ -200,5 +288,4 @@ If [Row-Level TTL]({% link {{ page.version.version }}/row-level-ttl.md %}) is co - [Visualize metrics in Grafana]({% link {{ page.version.version }}/monitor-cockroachdb-with-prometheus.md %}#step-5-visualize-metrics-in-grafana) - [Custom Chart Debug Page]({% link {{ page.version.version }}/ui-custom-chart-debug-page.md %}) - [Cluster API]({% link {{ page.version.version }}/cluster-api.md %}) -- [Essential Alerts]({% link {{ page.version.version }}/essential-alerts-{{ include.deployment}}.md %}) -- [CockroachDB Source Code - DB Console metrics to graphs mappings (in *.tsx files)](https://github.com/cockroachdb/cockroach/tree/master/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards) +- [Essential Alerts]({% link {{ page.version.version }}/essential-alerts-self-hosted.md %}) \ No newline at end of file diff --git a/src/current/cockroachcloud/metrics-essential.md b/src/current/cockroachcloud/metrics-essential.md index d9077a15bcf..40e14b740a4 100644 --- a/src/current/cockroachcloud/metrics-essential.md +++ b/src/current/cockroachcloud/metrics-essential.md @@ -7,14 +7,14 @@ toc: true These essential CockroachDB metrics let you monitor your CockroachDB {{ site.data.products.standard }} cluster. The metrics are available on graphs on the tabs listed in the **Metrics tabs** column. Where **Custom** is listed, the metric may be graphed in a [**Custom Metrics Chart**]({% link cockroachcloud/custom-metrics-chart-page.md %}). The **Usage** column explains why each metric is important to visualize and how to make both practical and actionable use of the metric in a production deployment. {% assign version = site.current_cloud_version | replace: ".", "" %} -{% assign types = site.data[version].metrics.metrics | map: "metric_type" | uniq %} +{% assign types = site.data[version].metrics.metrics-cloud | map: "metric_type" | uniq %} {% comment %} Fetch the list of all metric types {% endcomment %} {% for t in types %} {% comment %} Iterate through the types. {% endcomment %} {% unless t contains "Request Units" %} {% comment %} Request Units is only for Basic tier. {% endcomment %} ## {{ t }} - {% assign metrics = site.data[version].metrics.metrics | where: "metric_type", t | sort: "metric_id" | where_exp: "metrics", "metrics.deploy_standard == true"%} + {% assign metrics = site.data[version].metrics.metrics-cloud | where: "metric_type", t | sort: "metric_id" | where_exp: "metrics", "metrics.deploy_standard == true"%} {% comment %} Fetch all metrics for that metric_type. {% endcomment %} diff --git a/src/current/v25.3/essential-metrics-self-hosted.md b/src/current/v25.3/essential-metrics-self-hosted.md index d18970c0ce4..61251c330cd 100644 --- a/src/current/v25.3/essential-metrics-self-hosted.md +++ b/src/current/v25.3/essential-metrics-self-hosted.md @@ -1,8 +1,7 @@ --- title: Essential Metrics for CockroachDB Self-Hosted Deployments -summary: Learn about the recommended essential metrics for monitoring your CockroachDB {{ site.data.products.core }} cluster. +summary: Learn about the recommended essential metrics for monitoring your CockroachDB self-hosted cluster. toc: true -docs_area: manage --- -{% include {{ page.version.version }}/essential-metrics.md deployment='self-hosted' %} +{% include {{ page.version.version }}/essential-metrics.md deployment='self-hosted' %} \ No newline at end of file