Skip to content

Commit 7986ec2

Browse files
author
Piotr Kołaczkowski
committed
CNDB-15508: Query planner metrics
This commit adds new metrics related to the operation of SAI query planner. The metrics should help checking if the query planner makes proper decisions by correlating them with the other metrics, e.g. the metrics of the actual query execution. Per-query metrics (histograms): - `RowsEstimated`: the estimated number of rows to be returned by the query - `CostEstimated`: the abstract cost of query execution - `InverseSelectivityEstimated`: the inverse of query selectivity, before applying the query LIMIT (1 means the query selects all rows, 10 means it selects every 10th row, etc.) - `IndexReferencesInQuery`: the number of index references in the unoptimized query execution plan (the same index may be referenced multiple times and counts separately) - `IndexReferencesInPlan`: the number of index references in the optimized query execution plan (the same index may be referenced multiple times and counts separately) Per-table: - `TotalRowsEstimated`: counts the sum of all row estimates from all completed queries - `TotalCostEstimated`: counts the sum of all cost estimates from all completed queries - `TotalQueriesCompletedInSelectivityGroup{N}`, where N in [0, 12): counts the number of completed queries with selectivity S: 10^(-N-1) < S <= 10^(-N) for N < 11, S <= 10^(-N) for N = 11 In other words, the higher the group N, the smaller fraction of rows the query is estimated to return. The selectivity calculation does not include the final LIMIT of the query.
1 parent 95724c0 commit 7986ec2

File tree

7 files changed

+281
-56
lines changed

7 files changed

+281
-56
lines changed

src/java/org/apache/cassandra/index/sai/QueryContext.java

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.google.common.annotations.VisibleForTesting;
2626

2727
import org.apache.cassandra.config.DatabaseDescriptor;
28+
import org.apache.cassandra.index.sai.plan.Plan;
2829
import org.apache.cassandra.index.sai.utils.AbortedOperationException;
2930
import org.apache.cassandra.utils.MonotonicClock;
3031

@@ -66,9 +67,10 @@ public class QueryContext
6667

6768
private final LongAdder shadowedPrimaryKeyCount = new LongAdder();
6869

69-
// Determines the order of using indexes for filtering and sorting.
70-
// Null means the query execution order hasn't been decided yet.
71-
private FilterSortOrder filterSortOrder = null;
70+
private Plan originalPlan = null;
71+
72+
73+
private Plan optimizedPlan = null;
7274

7375
@VisibleForTesting
7476
public QueryContext()
@@ -145,11 +147,6 @@ public void addAnnGraphSearchLatency(long val)
145147
annGraphSearchLatency.add(val);
146148
}
147149

148-
public void setFilterSortOrder(FilterSortOrder filterSortOrder)
149-
{
150-
this.filterSortOrder = filterSortOrder;
151-
}
152-
153150
// getters
154151

155152
public long sstablesHit()
@@ -208,9 +205,14 @@ public long annGraphSearchLatency()
208205
return annGraphSearchLatency.longValue();
209206
}
210207

211-
public FilterSortOrder filterSortOrder()
208+
public Plan optimizedPlan()
212209
{
213-
return filterSortOrder;
210+
return optimizedPlan;
211+
}
212+
213+
public Plan originalPlan()
214+
{
215+
return originalPlan;
214216
}
215217

216218
public void checkpoint()
@@ -246,17 +248,14 @@ public void updateAnnRerankFloor(float observedFloor)
246248
annRerankFloor = max(annRerankFloor, observedFloor);
247249
}
248250

249-
/**
250-
* Determines the order of filtering and sorting operations.
251-
* Currently used only by vector search.
252-
*/
253-
public enum FilterSortOrder
251+
public void setOriginalPlan(Plan originalPlan)
254252
{
255-
/** First get the matching keys from the non-vector indexes, then use vector index to return the top K by similarity order */
256-
SEARCH_THEN_ORDER,
253+
this.originalPlan = originalPlan;
254+
}
257255

258-
/** First get the candidates in ANN order from the vector index, then fetch the rows and filter them until we find K matching the predicates */
259-
SCAN_THEN_FILTER
256+
public void setOptimizedPlan(Plan optimizedPlan)
257+
{
258+
this.optimizedPlan = optimizedPlan;
260259
}
261260

262261
public Snapshot snapshot()
@@ -290,7 +289,18 @@ public static class Snapshot
290289
public final long queryTimeouts;
291290
public final long annGraphSearchLatency;
292291
public final long shadowedPrimaryKeyCount;
293-
public final FilterSortOrder filterSortOrder;
292+
293+
public final boolean searchExecutedBeforeOrder;
294+
public final boolean filterExecutedAfterOrderedScan;
295+
296+
public final double rowsEstimated;
297+
public final double selectivityEstimated;
298+
public final double costEstimated;
299+
300+
public final int indexReferencesInQuery;
301+
public final int indexReferencesInPlan;
302+
303+
294304

295305
/**
296306
* Creates a snapshot of all the metrics in the given {@link QueryContext}.
@@ -315,7 +325,16 @@ private Snapshot(QueryContext context)
315325
queryTimeouts = context.queryTimeouts();
316326
annGraphSearchLatency = context.annGraphSearchLatency();
317327
shadowedPrimaryKeyCount = context.getShadowedPrimaryKeyCount();
318-
filterSortOrder = context.filterSortOrder();
328+
329+
costEstimated = context.optimizedPlan().fullCost();
330+
rowsEstimated = context.optimizedPlan().expectedRows();
331+
selectivityEstimated = context.optimizedPlan().selectivity();
332+
indexReferencesInQuery = context.originalPlan().referencedIndexCount();
333+
indexReferencesInPlan = context.optimizedPlan().referencedIndexCount();
334+
searchExecutedBeforeOrder = context.optimizedPlan().isSearchThenOrderHybrid();
335+
filterExecutedAfterOrderedScan = context.optimizedPlan().isOrderedScanThenFilterHybrid();
319336
}
320337
}
338+
339+
321340
}

src/java/org/apache/cassandra/index/sai/metrics/TableQueryMetrics.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ public void record(QueryContext context, ReadCommand command)
115115
{
116116
final long queryLatencyMicros = TimeUnit.NANOSECONDS.toMicros(snapshot.totalQueryTimeNs);
117117

118-
if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER)
118+
if (snapshot.searchExecutedBeforeOrder)
119119
{
120120
Tracing.trace("Index query accessed memtable indexes, {}, and {}, selected {} before ranking, " +
121121
"post-filtered {} in {}, and took {} microseconds.",
@@ -189,11 +189,18 @@ public static class PerTable extends AbstractQueryMetrics
189189
{
190190
public static final String METRIC_TYPE = "TableQueryMetrics";
191191

192+
// How small selectivities we track, queries with selectivity < 10^(-12) will all go to one group
193+
private static final int SELECTIVITY_GROUPS_COUNT = 12;
194+
192195
public final Counter totalQueryTimeouts;
193196
public final Counter totalPartitionReads;
194197
public final Counter totalRowsFiltered;
195198
public final Counter totalQueriesCompleted;
196199

200+
public final Counter totalRowsEstimated;
201+
public final Counter totalCostEstimated;
202+
public final Counter[] totalQueriesCompletedInSelectivityGroup;
203+
197204
public final Counter sortThenFilterQueriesCompleted;
198205
public final Counter filterThenSortQueriesCompleted;
199206

@@ -211,6 +218,13 @@ public PerTable(TableMetadata table, QueryKind queryKind, Predicate<ReadCommand>
211218
totalQueriesCompleted = Metrics.counter(createMetricName("TotalQueriesCompleted"));
212219
totalQueryTimeouts = Metrics.counter(createMetricName("TotalQueryTimeouts"));
213220

221+
totalRowsEstimated = Metrics.counter(createMetricName("TotalRowsEstimated"));
222+
totalCostEstimated = Metrics.counter(createMetricName("TotalCostEstimated"));
223+
224+
totalQueriesCompletedInSelectivityGroup = new Counter[SELECTIVITY_GROUPS_COUNT];
225+
for (int i = 0; i < totalQueriesCompletedInSelectivityGroup.length; i++)
226+
totalQueriesCompletedInSelectivityGroup[i] = Metrics.counter(createMetricName("TotalQueriesCompletedInSelectivityGroup" + i));
227+
214228
sortThenFilterQueriesCompleted = Metrics.counter(createMetricName("SortThenFilterQueriesCompleted"));
215229
filterThenSortQueriesCompleted = Metrics.counter(createMetricName("FilterThenSortQueriesCompleted"));
216230
}
@@ -227,10 +241,16 @@ public void record(QueryContext.Snapshot snapshot)
227241
totalQueriesCompleted.inc();
228242
totalPartitionReads.inc(snapshot.partitionsRead);
229243
totalRowsFiltered.inc(snapshot.rowsFiltered);
244+
totalCostEstimated.inc(Math.round(snapshot.costEstimated));
245+
totalRowsEstimated.inc(Math.round(snapshot.rowsEstimated));
230246

231-
if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SCAN_THEN_FILTER)
247+
int selectivityGroup = (int)(Math.min(totalQueriesCompletedInSelectivityGroup.length - 1,
248+
Math.floor(-Math.log10(snapshot.selectivityEstimated))));
249+
totalQueriesCompletedInSelectivityGroup[selectivityGroup].inc();
250+
251+
if (snapshot.filterExecutedAfterOrderedScan)
232252
sortThenFilterQueriesCompleted.inc();
233-
else if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER)
253+
if (snapshot.searchExecutedBeforeOrder)
234254
filterThenSortQueriesCompleted.inc();
235255
}
236256
}
@@ -276,6 +296,32 @@ public static class PerQuery extends AbstractQueryMetrics
276296
*/
277297
public final Timer annGraphSearchLatency;
278298

299+
/** Query execution cost as estimated by the planner */
300+
public final Histogram costEstimated;
301+
302+
/** Number of rows returned by the query estimated by the planner */
303+
public final Histogram rowsEstimated;
304+
305+
/**
306+
* Inverse selectivity of the query, before applying the LIMIT clause.
307+
* Denotes by how many rows on average the index iterator
308+
* is advanced to get the next matching key. High inverse selectivity means many rows
309+
* are skipped thanks to use of the index. Inverse selectivity equals 1 means rows are not skipped.
310+
* We track inverse of selectivity, because histograms do not support fractional values,
311+
* and selectivity is <= 1.0.
312+
*/
313+
public final Histogram inverseSelectivityEstimated;
314+
315+
/**
316+
* Number of indexes referenced by the optimized query plan.
317+
* The same index referenced from unrelated query clauses,
318+
* leading to separate index searches, are counted separately.
319+
*/
320+
public final Histogram indexReferencesInPlan;
321+
322+
/** Number of indexes referenced by the original query plan before optimization (as stated in the query text) */
323+
public final Histogram indexReferencesInQuery;
324+
279325
/**
280326
* @param table the table to measure metrics for
281327
* @param queryKind an identifier for the kind of query which metrics are being recorded for
@@ -304,6 +350,12 @@ public PerQuery(TableMetadata table, QueryKind queryKind, Predicate<ReadCommand>
304350

305351
// Key vector metrics that translate to performance
306352
annGraphSearchLatency = Metrics.timer(createMetricName("ANNGraphSearchLatency"));
353+
354+
costEstimated = Metrics.histogram(createMetricName("CostEstimated"), false);
355+
rowsEstimated = Metrics.histogram(createMetricName("RowsEstimated"), true);
356+
inverseSelectivityEstimated = Metrics.histogram(createMetricName("InverseSelectivityEstimated"), false);
357+
indexReferencesInPlan = Metrics.histogram(createMetricName("IndexReferencesInPlan"), true);
358+
indexReferencesInQuery = Metrics.histogram(createMetricName("IndexReferencesInQuery"), false);
307359
}
308360

309361
@Override
@@ -340,6 +392,13 @@ public void record(QueryContext.Snapshot snapshot)
340392
}
341393

342394
shadowedKeysScannedHistogram.update(snapshot.shadowedPrimaryKeyCount);
395+
396+
costEstimated.update(Math.round(snapshot.costEstimated));
397+
rowsEstimated.update(Math.round(snapshot.rowsEstimated));
398+
inverseSelectivityEstimated.update(Math.round(1.0 / snapshot.selectivityEstimated));
399+
400+
indexReferencesInQuery.update(snapshot.indexReferencesInQuery);
401+
indexReferencesInPlan.update(snapshot.indexReferencesInPlan);
343402
}
344403
}
345404
}

0 commit comments

Comments
 (0)