diff --git a/kafka/changelog.d/21687.added b/kafka/changelog.d/21687.added new file mode 100644 index 0000000000000..dcf48776d6664 --- /dev/null +++ b/kafka/changelog.d/21687.added @@ -0,0 +1 @@ +Add kafka_cluster_id tag to Kafka cluster metrics diff --git a/kafka/datadog_checks/kafka/data/metrics.yaml b/kafka/datadog_checks/kafka/data/metrics.yaml index 2271de20e183a..13f11cbca2b5c 100644 --- a/kafka/datadog_checks/kafka/data/metrics.yaml +++ b/kafka/datadog_checks/kafka/data/metrics.yaml @@ -338,6 +338,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.net.bytes_out.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec' @@ -345,6 +349,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.net.bytes_in.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec' @@ -352,6 +360,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.messages_in.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=BrokerTopicMetrics,name=BytesRejectedPerSec' @@ -359,6 +371,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.net.bytes_rejected.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Per Topic Broker Stats (only v0.8.x) @@ -372,6 +388,10 @@ jmx_metrics: alias: kafka.topic.messages_in.rate tags: topic: $1 + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Brokers: Per Topic Metrics (v0.9.0.x+) @@ -383,6 +403,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.topic.messages_in.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean_regex: 'kafka\.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=.*' @@ -390,6 +414,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.topic.net.bytes_out.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean_regex: 'kafka\.server:type=BrokerTopicMetrics,name=BytesInPerSec,topic=.*' @@ -397,6 +425,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.topic.net.bytes_in.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean_regex: 'kafka\.server:type=BrokerTopicMetrics,name=BytesRejectedPerSec,topic=.*' @@ -404,6 +436,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.topic.net.bytes_rejected.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Request timings @@ -415,6 +451,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.request.fetch.failed.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec' @@ -422,6 +462,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.request.produce.failed.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean_regex: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=Produce(?:,version=.*)?' @@ -429,6 +473,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.request.produce.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce' @@ -439,6 +487,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.produce.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean_regex: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchConsumer(?:,version=.*)?' @@ -446,6 +498,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.request.fetch_consumer.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean_regex: 'kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchFollower(?:,version=.*)?' @@ -453,6 +509,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.request.fetch_follower.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer' @@ -463,6 +523,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.fetch_consumer.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower' @@ -473,6 +537,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.fetch_follower.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=UpdateMetadata' @@ -483,6 +551,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.update_metadata.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Metadata' @@ -493,6 +565,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.metadata.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Offsets' @@ -503,6 +579,10 @@ jmx_metrics: 99thPercentile: metric_type: gauge alias: kafka.request.offsets.time.99percentile + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=RequestChannel,name=RequestQueueSize' @@ -510,6 +590,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.request.channel.queue.size + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.network' bean: 'kafka.network:type=SocketServer,name=NetworkProcessorAvgIdlePercent' @@ -517,6 +601,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.net.processor.avg.idle.pct.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=KafkaRequestHandlerPool,name=RequestHandlerAvgIdlePercent' @@ -524,6 +612,10 @@ jmx_metrics: OneMinuteRate: metric_type: gauge alias: kafka.request.handler.avg.idle.pct.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Request Purgatory (only v0.8.2.x) @@ -535,6 +627,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.request.producer_request_purgatory.size + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=FetchRequestPurgatory,name=PurgatorySize' @@ -542,6 +638,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.request.fetch_request_purgatory.size + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Request Purgatory (v0.9.0.x onwards) @@ -553,6 +653,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.request.producer_request_purgatory.size + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Fetch' @@ -560,6 +664,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.request.fetch_request_purgatory.size + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Replication stats @@ -571,6 +679,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.under_replicated_partitions + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaManager,name=UnderMinIsrPartitionCount' @@ -578,6 +690,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.under_min_isr_partition_count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaManager,name=IsrShrinksPerSec' @@ -585,6 +701,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.replication.isr_shrinks.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaManager,name=IsrExpandsPerSec' @@ -592,6 +712,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.replication.isr_expands.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.controller' bean: 'kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs' @@ -599,6 +723,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.replication.leader_elections.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.controller' bean: 'kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec' @@ -606,6 +734,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.replication.unclean_leader_elections.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.controller' bean: 'kafka.controller:type=KafkaController,name=OfflinePartitionsCount' @@ -613,6 +745,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.offline_partitions_count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.controller' bean: 'kafka.controller:type=KafkaController,name=ActiveControllerCount' @@ -620,6 +756,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.active_controller_count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaManager,name=PartitionCount' @@ -627,6 +767,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.partition_count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaManager,name=LeaderCount' @@ -634,6 +778,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.leader_count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica' @@ -641,6 +789,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.replication.max_lag + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Log flush stats @@ -652,6 +804,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.log.flush_rate.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Zookeeper stats @@ -663,6 +819,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.session.zookeeper.disconnect.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=SessionExpireListener,name=ZooKeeperExpiresPerSec' @@ -670,6 +830,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.session.zookeeper.expire.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=SessionExpireListener,name=ZooKeeperReadOnlyConnectsPerSec' @@ -677,6 +841,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.session.zookeeper.readonly.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=SessionExpireListener,name=ZooKeeperSyncConnectsPerSec' @@ -684,6 +852,10 @@ jmx_metrics: Count: metric_type: rate alias: kafka.session.zookeeper.sync.rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Session stats @@ -695,6 +867,10 @@ jmx_metrics: Value: metric_type: gauge alias: kafka.session.fetch.count + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value - include: domain: 'kafka.server' bean: 'kafka.server:type=FetchSessionCache,name=IncrementalFetchSessionEvictionsPerSec' @@ -702,6 +878,10 @@ jmx_metrics: Count: alias: kafka.session.fetch.eviction metric_type: rate + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value # # Listeners stats # @@ -714,3 +894,7 @@ jmx_metrics: alias: kafka.server.socket.connection_count tags: listener: $1 + dynamic_tags: + - tag_name: kafka_cluster_id + bean_name: kafka.server:type=KafkaServer,name=ClusterId + attribute: Value