Skip to content

Kafka add counters v1 uw2 #33503

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,9 @@ message MonitoringInfo {
SPANNER_TABLE_ID = 25 [(label_props) = { name: "SPANNER_TABLE_ID" }];
SPANNER_INSTANCE_ID = 26 [(label_props) = { name: "SPANNER_INSTANCE_ID" }];
SPANNER_QUERY_NAME = 27 [(label_props) = { name: "SPANNER_QUERY_NAME" }];
// Label which if has a "true" value indicates that the metric is intended
// to be aggregated per-worker.
PER_WORKER_METRIC = 28 [(label_props) = { name: "PER_WORKER_METRIC" }];
}

// A set of key and value labels which define the scope of the metric. For
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,13 @@ public MetricUpdates getUpdates() {
.setLabel(MonitoringInfoConstants.Labels.NAME, metricKey.metricName().getName())
.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, metricKey.stepName());
}

// Add any metricKey labels to the monitoringInfoLabels.
if (!metricName.getLabels().isEmpty()) {
for (Map.Entry<String, String> entry : metricName.getLabels().entrySet()) {
builder.setLabel(entry.getKey(), entry.getValue());
}
}
return builder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo.MonitoringInfoLabels;
import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfoSpecs;
import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfoTypeUrns;
import org.apache.beam.sdk.metrics.MetricName;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
import org.checkerframework.checker.nullness.qual.Nullable;

/** This static class fetches MonitoringInfo related values from metrics.proto. */
public final class MonitoringInfoConstants {
Expand Down Expand Up @@ -110,6 +112,7 @@ public static final class Labels {
public static final String SPANNER_DATABASE_ID = "SPANNER_DATABASE_ID";
public static final String SPANNER_INSTANCE_ID = "SPANNER_INSTANCE_ID";
public static final String SPANNER_QUERY_NAME = "SPANNER_QUERY_NAME";
public static final String PER_WORKER_METRIC = "PER_WORKER_METRIC";

static {
// Validate that compile time constants match the values stored in the protos.
Expand Down Expand Up @@ -151,6 +154,7 @@ public static final class Labels {
SPANNER_INSTANCE_ID.equals(extractLabel(MonitoringInfoLabels.SPANNER_INSTANCE_ID)));
checkArgument(
SPANNER_QUERY_NAME.equals(extractLabel(MonitoringInfoLabels.SPANNER_QUERY_NAME)));
checkArgument(PER_WORKER_METRIC.equals(extractLabel(MonitoringInfoLabels.PER_WORKER_METRIC)));
}
}

Expand Down Expand Up @@ -210,4 +214,13 @@ static String extractUrn(MonitoringInfoSpecs.Enum value) {
private static String extractLabel(MonitoringInfo.MonitoringInfoLabels value) {
return value.getValueDescriptor().getOptions().getExtension(labelProps).getName();
}

public static boolean isPerWorkerMetric(MetricName metricName) {
@Nullable
String value = metricName.getLabels().get(MonitoringInfoConstants.Labels.PER_WORKER_METRIC);
if (value != null && value.equals("true")) {
return true;
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ public String getUrn() {
}

/** @return The labels associated with this MonitoringInfo. */
@Override
public Map<String, String> getLabels() {
return this.labels;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.beam.sdk.metrics.MetricName;
import org.apache.beam.sdk.util.HistogramData;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
import org.junit.Assert;
import org.junit.Test;
Expand Down Expand Up @@ -241,6 +242,43 @@ public void testMonitoringInfosArePopulatedForUserCounters() {
assertThat(actualMonitoringInfos, containsInAnyOrder(builder1.build(), builder2.build()));
}

@Test
public void testMonitoringInfosLabelsArePopulatedForMetricNamesWithLabels() {
MetricsContainerImpl testObject = new MetricsContainerImpl("step1");

CounterCell c1 =
testObject.getCounter(
MetricName.named("KafkaSink", "name1", ImmutableMap.of("PER_WORKER_METRIC", "true")));
CounterCell c2 = testObject.getCounter(MetricName.named("BigQuerySink", "name2"));

c1.inc(2L);
c2.inc(4L);

SimpleMonitoringInfoBuilder builder1 = new SimpleMonitoringInfoBuilder();
builder1
.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64)
.setLabel(MonitoringInfoConstants.Labels.NAMESPACE, "KafkaSink")
.setLabel(MonitoringInfoConstants.Labels.NAME, "name1")
.setLabel(MonitoringInfoConstants.Labels.PER_WORKER_METRIC, "true")
.setInt64SumValue(2)
.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, "step1");

SimpleMonitoringInfoBuilder builder2 = new SimpleMonitoringInfoBuilder();
builder2
.setUrn(MonitoringInfoConstants.Urns.USER_SUM_INT64)
.setLabel(MonitoringInfoConstants.Labels.NAMESPACE, "BigQuerySink")
.setLabel(MonitoringInfoConstants.Labels.NAME, "name2")
.setInt64SumValue(4)
.setLabel(MonitoringInfoConstants.Labels.PTRANSFORM, "step1");

ArrayList<MonitoringInfo> actualMonitoringInfos = new ArrayList<MonitoringInfo>();
for (MonitoringInfo mi : testObject.getMonitoringInfos()) {
actualMonitoringInfos.add(mi);
}

assertThat(actualMonitoringInfos, containsInAnyOrder(builder1.build(), builder2.build()));
}

@Test
public void testMonitoringInfosArePopulatedForUserDistributions() {
MetricsContainerImpl testObject = new MetricsContainerImpl("step1");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,15 @@
package org.apache.beam.runners.core.metrics;

import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.extractUrn;
import static org.apache.beam.runners.core.metrics.MonitoringInfoConstants.isPerWorkerMetric;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfoSpecs;
import org.apache.beam.sdk.metrics.MetricName;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ArrayListMultimap;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimap;
import org.hamcrest.Matchers;
Expand All @@ -47,4 +52,17 @@ public void testUniqueUrnsDefinedForAllSpecs() {
}
assertThat(urnToEnum.entries(), Matchers.empty());
}

@Test
public void testIsPerWorkerMetric() {
MetricName metricName =
MetricName.named("IO", "name1", ImmutableMap.of("PER_WORKER_METRIC", "true"));
assertTrue(isPerWorkerMetric(metricName));

metricName = MetricName.named("IO", "name1", ImmutableMap.of("PER_WORKER_METRIC", "false"));
assertFalse(isPerWorkerMetric(metricName));

metricName = MetricName.named("IO", "name1", ImmutableMap.of());
assertFalse(isPerWorkerMetric(metricName));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,6 @@ public Gauge getGauge(MetricName metricName) {
return getCurrentContainer().getGauge(metricName);
}

@Override
public Gauge getPerWorkerGauge(MetricName metricName) {
Gauge gauge = getCurrentContainer().getPerWorkerGauge(metricName);
return gauge;
}

@Override
public StringSet getStringSet(MetricName metricName) {
return getCurrentContainer().getStringSet(metricName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.beam.runners.core.metrics.DistributionData;
import org.apache.beam.runners.core.metrics.GaugeCell;
import org.apache.beam.runners.core.metrics.MetricsMap;
import org.apache.beam.runners.core.metrics.MonitoringInfoConstants;
import org.apache.beam.runners.core.metrics.StringSetCell;
import org.apache.beam.runners.core.metrics.StringSetData;
import org.apache.beam.sdk.metrics.BoundedTrie;
Expand Down Expand Up @@ -75,9 +76,6 @@ public class StreamingStepMetricsContainer implements MetricsContainer {

private MetricsMap<MetricName, GaugeCell> gauges = new MetricsMap<>(GaugeCell::new);

private final ConcurrentHashMap<MetricName, GaugeCell> perWorkerGauges =
new ConcurrentHashMap<>();

private MetricsMap<MetricName, StringSetCell> stringSets = new MetricsMap<>(StringSetCell::new);

private MetricsMap<MetricName, DeltaDistributionCell> distributions =
Expand Down Expand Up @@ -178,19 +176,6 @@ public Gauge getGauge(MetricName metricName) {
return gauges.get(metricName);
}

@Override
public Gauge getPerWorkerGauge(MetricName metricName) {
if (!enablePerWorkerMetrics) {
return MetricsContainer.super.getPerWorkerGauge(metricName);
}
Gauge val = perWorkerGauges.get(metricName);
if (val != null) {
return val;
}

return perWorkerGauges.computeIfAbsent(metricName, name -> new GaugeCell(metricName));
}

@Override
public StringSet getStringSet(MetricName metricName) {
return stringSets.get(metricName);
Expand Down Expand Up @@ -256,10 +241,15 @@ private FluentIterable<CounterUpdate> gaugeUpdates() {
@Override
public @Nullable CounterUpdate apply(
@Nonnull Map.Entry<MetricName, GaugeCell> entry) {
long value = entry.getValue().getCumulative().value();
org.joda.time.Instant timestamp = entry.getValue().getCumulative().timestamp();
return MetricsToCounterUpdateConverter.fromGauge(
MetricKey.create(stepName, entry.getKey()), value, timestamp);
if (!MonitoringInfoConstants.isPerWorkerMetric(entry.getKey())) {
long value = entry.getValue().getCumulative().value();
org.joda.time.Instant timestamp = entry.getValue().getCumulative().timestamp();
return MetricsToCounterUpdateConverter.fromGauge(
MetricKey.create(stepName, entry.getKey()), value, timestamp);
} else {
// add a test for this.
return null;
}
}
})
.filter(Predicates.notNull());
Expand Down Expand Up @@ -388,7 +378,8 @@ private void deleteStaleCounters(
@VisibleForTesting
Iterable<PerStepNamespaceMetrics> extractPerWorkerMetricUpdates() {
ConcurrentHashMap<MetricName, Long> counters = new ConcurrentHashMap<MetricName, Long>();
ConcurrentHashMap<MetricName, Long> gauges = new ConcurrentHashMap<MetricName, Long>();
ConcurrentHashMap<MetricName, Long> per_worker_gauges =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I don't think this (or counters) needs to be Concurrent. How about using ImmutableMapBuilder and then build when passing to convert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, that should be done for all of them, will do that in the followup clean up CL.

new ConcurrentHashMap<MetricName, Long>();
ConcurrentHashMap<MetricName, LockFreeHistogram.Snapshot> histograms =
new ConcurrentHashMap<MetricName, LockFreeHistogram.Snapshot>();
HashSet<MetricName> currentZeroValuedCounters = new HashSet<MetricName>();
Expand All @@ -402,12 +393,18 @@ Iterable<PerStepNamespaceMetrics> extractPerWorkerMetricUpdates() {
counters.put(k, val);
});

perWorkerGauges.forEach(
gauges.forEach(
(k, v) -> {
Long val = v.getCumulative().value();
gauges.put(k, val);
v.reset();
// Check if metric name has the per worker label set.
// TODO(Naireen): Populate local map with perWorkerMetrics so we don't need to check each
// time we update the metrics.
if (MonitoringInfoConstants.isPerWorkerMetric(k)) {
Long val = v.getCumulative().value();
per_worker_gauges.put(k, val);
v.reset();
}
});

perWorkerHistograms.forEach(
(k, v) -> {
v.getSnapshotAndReset().ifPresent(snapshot -> histograms.put(k, snapshot));
Expand All @@ -416,7 +413,7 @@ Iterable<PerStepNamespaceMetrics> extractPerWorkerMetricUpdates() {
deleteStaleCounters(currentZeroValuedCounters, Instant.now(clock));

return MetricsToPerStepNamespaceMetricsConverter.convert(
stepName, counters, gauges, histograms, parsedPerWorkerMetricsCache);
stepName, counters, per_worker_gauges, histograms, parsedPerWorkerMetricsCache);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@
import com.google.api.services.dataflow.model.CounterStructuredName;
import com.google.api.services.dataflow.model.CounterStructuredNameAndMetadata;
import com.google.api.services.dataflow.model.CounterUpdate;
import com.google.api.services.dataflow.model.DataflowGaugeValue;
import com.google.api.services.dataflow.model.DataflowHistogramValue;
import com.google.api.services.dataflow.model.DistributionUpdate;
import com.google.api.services.dataflow.model.IntegerGauge;
import com.google.api.services.dataflow.model.Linear;
import com.google.api.services.dataflow.model.MetricValue;
import com.google.api.services.dataflow.model.PerStepNamespaceMetrics;
import com.google.api.services.dataflow.model.StringList;
import com.google.common.collect.ImmutableMap;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
Expand Down Expand Up @@ -88,6 +90,8 @@ public class StreamingStepMetricsContainerTest {

private MetricName name1 = MetricName.named("ns", "name1");
private MetricName name2 = MetricName.named("ns", "name2");
private MetricName name3 =
MetricName.named("ns", "name3", ImmutableMap.of("PER_WORKER_METRIC", "true"));

@Test
public void testDedupping() {
Expand Down Expand Up @@ -273,6 +277,16 @@ public void testGaugeUpdateExtraction() {
DateTimeUtils.setCurrentMillisSystem();
}

@Test
public void testNoPerWorkerGaugeUpdateExtraction() {
Gauge gauge = c1.getGauge(name3);
gauge.set(5);

// There is no update.
Iterable<CounterUpdate> updates = StreamingStepMetricsContainer.extractMetricUpdates(registry);
assertThat(updates, IsEmptyIterable.emptyIterable());
}

@Test
public void testStringSetUpdateExtraction() {
StringSet stringSet = c1.getStringSet(name1);
Expand Down Expand Up @@ -471,7 +485,7 @@ public void testExtractPerWorkerMetricUpdates_populatedMetrics() {
}

@Test
public void testExtractPerWorkerMetricUpdatesKafka_populatedMetrics() {
public void testExtractPerWorkerMetricUpdatesKafka_populatedHistogramMetrics() {
StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);

MetricName histogramMetricName = MetricName.named("KafkaSink", "histogram");
Expand Down Expand Up @@ -508,6 +522,48 @@ public void testExtractPerWorkerMetricUpdatesKafka_populatedMetrics() {
assertThat(updates, containsInAnyOrder(histograms));
}

@Test
public void testExtractPerWorkerMetricUpdatesKafka_populateGaugeMetrics() {
StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);

MetricName gaugeMetricName =
MetricName.named("KafkaSink", "gauge", ImmutableMap.of("PER_WORKER_METRIC", "true"));
c2.getGauge(gaugeMetricName).set(5L);

Iterable<PerStepNamespaceMetrics> updates =
StreamingStepMetricsContainer.extractPerWorkerMetricUpdates(registry);

DataflowGaugeValue gaugeValue = new DataflowGaugeValue();
gaugeValue.setValue(5L);

MetricValue expectedGauge =
new MetricValue()
.setMetric("gauge")
.setMetricLabels(new HashMap<>())
.setValueGauge64(gaugeValue);

PerStepNamespaceMetrics gauge =
new PerStepNamespaceMetrics()
.setOriginalStep("s2")
.setMetricsNamespace("KafkaSink")
.setMetricValues(Collections.singletonList(expectedGauge));

assertThat(updates, containsInAnyOrder(gauge));
}

@Test
public void testExtractPerWorkerMetricUpdatesKafka_gaugeMetricsDropped() {
StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);

MetricName gaugeMetricName =
MetricName.named("KafkaSink", "gauge", ImmutableMap.of("PER_WORKER_METRIC", "false"));
c2.getGauge(gaugeMetricName).set(5L);

Iterable<PerStepNamespaceMetrics> updates =
StreamingStepMetricsContainer.extractPerWorkerMetricUpdates(registry);
assertThat(updates, IsEmptyIterable.emptyIterable());
}

@Test
public void testExtractPerWorkerMetricUpdates_emptyMetrics() {
StreamingStepMetricsContainer.setEnablePerWorkerMetrics(true);
Expand Down
Loading
Loading