Skip to content

Commit 40a500e

Browse files
Can match phase shard APM metric with search request attributes
1 parent 2d180d2 commit 40a500e

File tree

4 files changed

+38
-11
lines changed

4 files changed

+38
-11
lines changed

server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchPhaseAPMMetrics.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,15 @@ public ShardSearchPhaseAPMMetrics(MeterRegistry meterRegistry) {
5757
}
5858

5959
@Override
60-
public void onCanMatchPhase(long tookInNanos) {
61-
recordPhaseLatency(canMatchPhaseMetric, tookInNanos);
60+
public void onCanMatchPhase(Map<String, Object> searchRequestAttributes, long tookInNanos) {
61+
canMatchPhaseMetric.record(TimeUnit.NANOSECONDS.toMillis(tookInNanos), searchRequestAttributes);
6262
}
6363

6464
@Override
6565
public void onDfsPhase(SearchContext searchContext, long tookInNanos) {
66-
recordPhaseLatency(dfsPhaseMetric, tookInNanos);
66+
SearchExecutionContext searchExecutionContext = searchContext.getSearchExecutionContext();
67+
Long timeRangeFilterFromMillis = searchExecutionContext.getTimeRangeFilterFromMillis();
68+
recordPhaseLatency(dfsPhaseMetric, tookInNanos, searchContext.request(), timeRangeFilterFromMillis);
6769
}
6870

6971
@Override
@@ -80,10 +82,6 @@ public void onFetchPhase(SearchContext searchContext, long tookInNanos) {
8082
recordPhaseLatency(fetchPhaseMetric, tookInNanos, searchContext.request(), timeRangeFilterFromMillis);
8183
}
8284

83-
private static void recordPhaseLatency(LongHistogram histogramMetric, long tookInNanos) {
84-
histogramMetric.record(TimeUnit.NANOSECONDS.toMillis(tookInNanos));
85-
}
86-
8785
private static void recordPhaseLatency(
8886
LongHistogram histogramMetric,
8987
long tookInNanos,

server/src/main/java/org/elasticsearch/index/shard/SearchOperationListener.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.elasticsearch.transport.TransportRequest;
1616

1717
import java.util.List;
18+
import java.util.Map;
1819

1920
/**
2021
* An listener for search, fetch and context events.
@@ -91,9 +92,10 @@ default void onFailedDfsPhase(SearchContext searchContext) {}
9192
* Executed after the can-match phase successfully finished.
9293
* Note: this is not invoked if the can match phase execution failed.
9394
*
95+
* @param searchRequestAttributes the attributes of the search request
9496
* @param tookInNanos the number of nanoseconds the can-match execution took
9597
*/
96-
default void onCanMatchPhase(long tookInNanos) {}
98+
default void onCanMatchPhase(Map<String, Object> searchRequestAttributes, long tookInNanos) {}
9799

98100
/**
99101
* Executed when a new reader context was created
@@ -246,10 +248,10 @@ public void onDfsPhase(SearchContext searchContext, long tookInNanos) {
246248
}
247249

248250
@Override
249-
public void onCanMatchPhase(long tookInNanos) {
251+
public void onCanMatchPhase(Map<String, Object> searchRequestAttributes, long tookInNanos) {
250252
for (SearchOperationListener listener : listeners) {
251253
try {
252-
listener.onCanMatchPhase(tookInNanos);
254+
listener.onCanMatchPhase(searchRequestAttributes, tookInNanos);
253255
} catch (Exception e) {
254256
logger.warn(() -> "onCanMatchPhase listener [" + listener + "] failed", e);
255257
}

server/src/main/java/org/elasticsearch/search/SearchService.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.elasticsearch.action.search.CanMatchNodeRequest;
2727
import org.elasticsearch.action.search.CanMatchNodeResponse;
2828
import org.elasticsearch.action.search.OnlinePrewarmingService;
29+
import org.elasticsearch.action.search.SearchRequestAttributesExtractor;
2930
import org.elasticsearch.action.search.SearchShardTask;
3031
import org.elasticsearch.action.search.SearchType;
3132
import org.elasticsearch.action.support.TransportActions;
@@ -1982,6 +1983,7 @@ public AliasFilter buildAliasFilter(ProjectState state, String index, Set<Resolv
19821983
public void canMatch(CanMatchNodeRequest request, ActionListener<CanMatchNodeResponse> listener) {
19831984
var shardLevelRequests = request.getShardLevelRequests();
19841985
final List<CanMatchNodeResponse.ResponseOrFailure> responses = new ArrayList<>(shardLevelRequests.size());
1986+
Map<String, Object> searchRequestAttributes = null;
19851987
for (var shardLevelRequest : shardLevelRequests) {
19861988
long shardCanMatchStartTimeInNanos = System.nanoTime();
19871989
ShardSearchRequest shardSearchRequest = request.createShardSearchRequest(shardLevelRequest);
@@ -1992,7 +1994,18 @@ public void canMatch(CanMatchNodeRequest request, ActionListener<CanMatchNodeRes
19921994
CanMatchContext canMatchContext = createCanMatchContext(shardSearchRequest);
19931995
CanMatchShardResponse canMatchShardResponse = canMatch(canMatchContext, true);
19941996
responses.add(new CanMatchNodeResponse.ResponseOrFailure(canMatchShardResponse));
1995-
indexShard.getSearchOperationListener().onCanMatchPhase(System.nanoTime() - shardCanMatchStartTimeInNanos);
1997+
1998+
if (searchRequestAttributes == null) {
1999+
// All of the shards in the request are for the same search, so the attributes will be the same for all shards.
2000+
searchRequestAttributes = SearchRequestAttributesExtractor.extractAttributes(
2001+
shardSearchRequest,
2002+
canMatchContext.getTimeRangeFilterFromMillis(),
2003+
shardSearchRequest.nowInMillis()
2004+
);
2005+
}
2006+
2007+
indexShard.getSearchOperationListener()
2008+
.onCanMatchPhase(searchRequestAttributes, System.nanoTime() - shardCanMatchStartTimeInNanos);
19962009
} catch (Exception e) {
19972010
responses.add(new CanMatchNodeResponse.ResponseOrFailure(e));
19982011
}

server/src/test/java/org/elasticsearch/search/TelemetryMetrics/ShardSearchPhaseAPMMetricsTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ public void testMetricsDfsQueryThenFetch() {
101101
);
102102
final List<Measurement> dfsMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(DFS_SEARCH_PHASE_METRIC);
103103
assertEquals(num_primaries, dfsMeasurements.size());
104+
assertAttributes(dfsMeasurements, false, false);
104105
final List<Measurement> queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC);
105106
assertEquals(num_primaries, queryMeasurements.size());
106107
final List<Measurement> fetchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(FETCH_SEARCH_PHASE_METRIC);
@@ -117,8 +118,10 @@ public void testMetricsDfsQueryThenFetchSystem() {
117118
);
118119
final List<Measurement> dfsMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(DFS_SEARCH_PHASE_METRIC);
119120
assertEquals(0, dfsMeasurements.size()); // DFS phase not done for index with single shard
121+
assertAttributes(dfsMeasurements, true, false);
120122
final List<Measurement> queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC);
121123
assertEquals(1, queryMeasurements.size());
124+
assertAttributes(queryMeasurements, true, false);
122125
final List<Measurement> fetchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(FETCH_SEARCH_PHASE_METRIC);
123126
assertEquals(1, fetchMeasurements.size());
124127
assertAttributes(fetchMeasurements, true, false);
@@ -368,6 +371,17 @@ public void testTimeRangeFilterAllResults() {
368371
});
369372
final List<Measurement> canMatchMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(CAN_MATCH_SEARCH_PHASE_METRIC);
370373
assertEquals(num_primaries, canMatchMeasurements.size());
374+
for (Measurement measurement : canMatchMeasurements) {
375+
Map<String, Object> attributes = measurement.attributes();
376+
assertEquals(5, attributes.size());
377+
assertEquals("user", attributes.get("target"));
378+
assertEquals("hits_only", attributes.get("query_type"));
379+
assertEquals("_score", attributes.get("sort"));
380+
assertEquals(false, attributes.get(SearchRequestAttributesExtractor.SYSTEM_THREAD_ATTRIBUTE_NAME));
381+
// the range query was rewritten to one without bounds: we do track the time range filter from value but we don't set
382+
// the time range filter field because no range query is executed at the shard level.
383+
assertEquals("older_than_14_days", attributes.get("time_range_filter_from"));
384+
}
371385
final List<Measurement> queryMeasurements = getTestTelemetryPlugin().getLongHistogramMeasurement(QUERY_SEARCH_PHASE_METRIC);
372386
// the two docs are at most spread across two shards, other shards are empty and get filtered out
373387
assertThat(queryMeasurements.size(), Matchers.lessThanOrEqualTo(2));

0 commit comments

Comments
 (0)