Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,13 @@ public enum CassandraRelevantProperties
*/
SAI_QUERY_KIND_PER_QUERY_METRICS_ENABLED("cassandra.sai.metrics.query_kind.per_query.enabled", "false"),

/**
* Whether to enable SAI query plan metrics such as the estimated cost, estimated number of rows,
* number of indexes used in the original and optimized query plan, etc.
* These metrics are counters and histograms.
*/
SAI_QUERY_PLAN_METRICS_ENABLED("cassandra.sai.metrics.query_plan.enabled", "true"),

/**
* Whether to enable SAI index metrics such as memtable flush metrics, compaction metrics, and disk usage metrics.
* These metrics include timers, histograms, counters, and gauges for index operations.
Expand Down
68 changes: 46 additions & 22 deletions src/java/org/apache/cassandra/index/sai/QueryContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@

import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;

import com.google.common.annotations.VisibleForTesting;

import org.apache.cassandra.config.CassandraRelevantProperties;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.index.sai.plan.Plan;
import org.apache.cassandra.index.sai.utils.AbortedOperationException;
import org.apache.cassandra.utils.MonotonicClock;

Expand Down Expand Up @@ -86,9 +90,7 @@ public class QueryContext

private float annRerankFloor = 0.0f; // only called from single-threaded setup code

// Determines the order of using indexes for filtering and sorting.
// Null means the query execution order hasn't been decided yet.
private FilterSortOrder filterSortOrder = null;
private PlanInfo queryPlanInfo;

@VisibleForTesting
public QueryContext()
Expand Down Expand Up @@ -197,11 +199,6 @@ public void addAnnGraphSearchLatency(long val)
annGraphSearchLatency.add(val);
}

public void setFilterSortOrder(FilterSortOrder filterSortOrder)
{
this.filterSortOrder = filterSortOrder;
}

// getters

public long sstablesHit()
Expand Down Expand Up @@ -293,9 +290,10 @@ public long annGraphSearchLatency()
return annGraphSearchLatency.longValue();
}

public FilterSortOrder filterSortOrder()
@Nullable
public PlanInfo queryPlanInfo()
{
return filterSortOrder;
return queryPlanInfo;
}

public void checkpoint()
Expand All @@ -318,17 +316,10 @@ public void updateAnnRerankFloor(float observedFloor)
annRerankFloor = max(annRerankFloor, observedFloor);
}

/**
* Determines the order of filtering and sorting operations.
* Currently used only by vector search.
*/
public enum FilterSortOrder
public void recordQueryPlan(Plan.RowsIteration originalPlan, Plan.RowsIteration optimizedPlan)
{
/** First get the matching keys from the non-vector indexes, then use vector index to return the top K by similarity order */
SEARCH_THEN_ORDER,

/** First get the candidates in ANN order from the vector index, then fetch the rows and filter them until we find K matching the predicates */
SCAN_THEN_FILTER
if (CassandraRelevantProperties.SAI_QUERY_PLAN_METRICS_ENABLED.getBoolean())
this.queryPlanInfo = new PlanInfo(originalPlan, optimizedPlan);
}

public Snapshot snapshot()
Expand Down Expand Up @@ -365,7 +356,9 @@ public static class Snapshot
public final long triePostingsDecodes;
public final long queryTimeouts;
public final long annGraphSearchLatency;
public final FilterSortOrder filterSortOrder;

@Nullable
public final PlanInfo queryPlanInfo;

/**
* Creates a snapshot of all the metrics in the given {@link QueryContext}.
Expand Down Expand Up @@ -393,7 +386,38 @@ private Snapshot(QueryContext context)
triePostingsDecodes = context.triePostingsDecodes();
queryTimeouts = context.queryTimeouts();
annGraphSearchLatency = context.annGraphSearchLatency();
filterSortOrder = context.filterSortOrder();
queryPlanInfo = context.queryPlanInfo();
}
}

/**
* Captures relevant information about a query plan, both original and optimized.
*/
public static class PlanInfo
{
public final boolean searchExecutedBeforeOrder;
public final boolean filterExecutedAfterOrderedScan;

public final double rowsToReturnEstimated;
public final double rowsToFetchEstimated;
public final double keysToIterateEstimated;
public final double selectivityEstimated;
public final double costEstimated;

public final int indexReferencesInQuery;
public final int indexReferencesInPlan;

public PlanInfo(@Nonnull Plan.RowsIteration originalPlan, @Nonnull Plan.RowsIteration optimizedPlan)
{
this.costEstimated = optimizedPlan.fullCost();
this.rowsToReturnEstimated = optimizedPlan.expectedRows();
this.rowsToFetchEstimated = optimizedPlan.estimatedRowsToFetch();
this.keysToIterateEstimated = optimizedPlan.estimatedKeysToIterate();
this.selectivityEstimated = optimizedPlan.selectivity();
this.indexReferencesInQuery = originalPlan.referencedIndexCount();
this.indexReferencesInPlan = optimizedPlan.referencedIndexCount();
this.searchExecutedBeforeOrder = optimizedPlan.isSearchThenOrderHybrid();
this.filterExecutedAfterOrderedScan = optimizedPlan.isOrderedScanThenFilterHybrid();
Comment on lines +417 to +420

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indexReferencesInQuery and indexReferencesInPlan have associated histograms in TableQueryMetrics, but not counters. searchExecutedBeforeOrder and filterExecutedAfterOrderedScan have counters but not histograms. Is that intentional?

Copy link

@ekaterinadimitrova2 ekaterinadimitrova2 Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While it breaks the consistency, I am wondering whether it is this way because:

  • histograms for boolean values are not very useful
  • for the others the index reference counts are small integers
    Maybe we add some note to make clear the rationale? Wondering whether that would be helpful or it may become outdated doc in time

Copy link
Author

@pkolaczk pkolaczk Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As for booleans - I think nothing technically stops us from having a boolean histogram (but we'd have to convert it to 0 and 1 as currently histograms are based on longs); but I feel it wouldn't be very useful in this case. We get 100% of the same information from the per-table counters.

As for the other histograms - well, it is hard to represent them accurately as per table counters. Like, I feel it doesn't make much sense to sum all index references from all queries... So we have counters in per table for things which sum events naturally (like rows returned - it is quite easy to understand what a total number of rows returned is), but per-query histograms for other properties of queries which are not "summable" in that way.

Note there are many other histograms in perQuery which don't have (a) corresponding PerTable counter(s), so I don't think it breaks consistency. Anyway, we really need to expose those histograms in CNDB somehow, but let's live it for a followup ticket.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes perfect sense to me, thanks.

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public void record(QueryContext context, ReadCommand command)
{
final long queryLatencyMicros = TimeUnit.NANOSECONDS.toMicros(snapshot.totalQueryTimeNs);

if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER)
if (snapshot.queryPlanInfo != null && snapshot.queryPlanInfo.searchExecutedBeforeOrder)
{
Tracing.trace("Index query accessed memtable indexes, {}, and {}, selected {} before ranking, " +
"post-filtered {} in {}, and took {} microseconds.",
Expand Down Expand Up @@ -199,6 +199,11 @@ public static class PerTable extends AbstractQueryMetrics
public final Counter totalRowTombstonesFetched;
public final Counter totalQueriesCompleted;

public final Counter totalRowsToReturnEstimated;
public final Counter totalRowsToFetchEstimated;
public final Counter totalKeysToIterateEstimated;
public final Counter totalCostEstimated;

public final Counter sortThenFilterQueriesCompleted;
public final Counter filterThenSortQueriesCompleted;

Expand All @@ -220,6 +225,10 @@ public PerTable(TableMetadata table, QueryKind queryKind, Predicate<ReadCommand>
totalRowTombstonesFetched = Metrics.counter(createMetricName("TotalRowTombstonesFetched"));
totalQueriesCompleted = Metrics.counter(createMetricName("TotalQueriesCompleted"));
totalQueryTimeouts = Metrics.counter(createMetricName("TotalQueryTimeouts"));
totalRowsToReturnEstimated = Metrics.counter(createMetricName("TotalRowsToReturnEstimated"));
totalRowsToFetchEstimated = Metrics.counter(createMetricName("TotalRowsToFetchEstimated"));
totalKeysToIterateEstimated = Metrics.counter(createMetricName("TotalKeysToIterateEstimated"));
totalCostEstimated = Metrics.counter(createMetricName("TotalCostEstimated"));

sortThenFilterQueriesCompleted = Metrics.counter(createMetricName("SortThenFilterQueriesCompleted"));
filterThenSortQueriesCompleted = Metrics.counter(createMetricName("FilterThenSortQueriesCompleted"));
Expand All @@ -243,10 +252,19 @@ public void record(QueryContext.Snapshot snapshot)
totalRowsReturned.inc(snapshot.rowsReturned);
totalRowTombstonesFetched.inc(snapshot.rowTombstonesFetched);

if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SCAN_THEN_FILTER)
sortThenFilterQueriesCompleted.inc();
else if (snapshot.filterSortOrder == QueryContext.FilterSortOrder.SEARCH_THEN_ORDER)
filterThenSortQueriesCompleted.inc();
QueryContext.PlanInfo queryPlanInfo = snapshot.queryPlanInfo;
if (queryPlanInfo != null)
{
totalCostEstimated.inc(Math.round(queryPlanInfo.costEstimated));
totalRowsToReturnEstimated.inc(Math.round(queryPlanInfo.rowsToReturnEstimated));
totalRowsToFetchEstimated.inc(Math.round(queryPlanInfo.rowsToFetchEstimated));
totalKeysToIterateEstimated.inc(Math.round(queryPlanInfo.keysToIterateEstimated));

if (queryPlanInfo.filterExecutedAfterOrderedScan)
sortThenFilterQueriesCompleted.inc();
if (queryPlanInfo.searchExecutedBeforeOrder)
filterThenSortQueriesCompleted.inc();
}
}
}

Expand Down Expand Up @@ -293,6 +311,34 @@ public static class PerQuery extends AbstractQueryMetrics
*/
public final Timer annGraphSearchLatency;

/** Query execution cost as estimated by the planner */
public final Histogram costEstimated;

/** Number of rows to be returned from the query as estimated by the planner */
public final Histogram rowsToReturnEstimated;

/** Number of rows to be fetched by the query as estimated by the planner */
public final Histogram rowsToFetchEstimated;

/** Number of keys to be iterated by the query as estimated by the planner */
public final Histogram keysToIterateEstimated;

/**
* Negative decimal logarithm of selectivity of the query, before applying the LIMIT clause.
* We use logarithm because selectivity values can be very small (e.g. 10^-9).
*/
public final Histogram logSelectivityEstimated;

/**
* Number of indexes referenced by the optimized query plan.
* The same index referenced from unrelated query clauses,
* leading to separate index searches, are counted separately.
*/
public final Histogram indexReferencesInPlan;

/** Number of indexes referenced by the original query plan before optimization (as stated in the query text) */
public final Histogram indexReferencesInQuery;

/**
* @param table the table to measure metrics for
* @param queryKind an identifier for the kind of query which metrics are being recorded for
Expand Down Expand Up @@ -323,6 +369,14 @@ public PerQuery(TableMetadata table, QueryKind queryKind, Predicate<ReadCommand>

// Key vector metrics that translate to performance
annGraphSearchLatency = Metrics.timer(createMetricName("ANNGraphSearchLatency"));

costEstimated = Metrics.histogram(createMetricName("CostEstimated"), false);
rowsToReturnEstimated = Metrics.histogram(createMetricName("RowsToReturnEstimated"), true);
rowsToFetchEstimated = Metrics.histogram(createMetricName("RowsToFetchEstimated"), true);
keysToIterateEstimated = Metrics.histogram(createMetricName("KeysToIterateEstimated"), true);
logSelectivityEstimated = Metrics.histogram(createMetricName("LogSelectivityEstimated"), true);
indexReferencesInPlan = Metrics.histogram(createMetricName("IndexReferencesInPlan"), true);
indexReferencesInQuery = Metrics.histogram(createMetricName("IndexReferencesInQuery"), false);
}

@Override
Expand Down Expand Up @@ -362,6 +416,19 @@ public void record(QueryContext.Snapshot snapshot)
{
annGraphSearchLatency.update(snapshot.annGraphSearchLatency, TimeUnit.NANOSECONDS);
}

QueryContext.PlanInfo queryPlanInfo = snapshot.queryPlanInfo;
if (queryPlanInfo != null)
{
costEstimated.update(Math.round(queryPlanInfo.costEstimated));
rowsToReturnEstimated.update(Math.round(queryPlanInfo.rowsToReturnEstimated));
rowsToFetchEstimated.update(Math.round(queryPlanInfo.rowsToFetchEstimated));
keysToIterateEstimated.update(Math.round(queryPlanInfo.keysToIterateEstimated));
double logSelectivity = -Math.log10(queryPlanInfo.selectivityEstimated);
logSelectivityEstimated.update((int) (Math.min(20, Math.floor(logSelectivity))));
Comment on lines +427 to +428
Copy link

@adelapena adelapena Nov 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big deal, but Math.log10 is a relatively costly operation, and we'll call it seven times per query if per-kind metrics are enabled. Since this is the only read of QueryPlanInfo.selectivityEstimated, maybe we can put the logarithm of the selectivity in the plan info, as QueryPlanInfo.logSelectivityEstimated, so it's computed only once per query?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could do the same for the quicker calls to Math.round for costEstimated, rowsToReturnEstimated, rowsToFetchEstimated and keysToIterateEstimated? Those will be repeated 14 times per query with per-query-kind metrics, so we can save up to 5*13=65 calls per query:

public final long costEstimated;
public final long rowsToReturnEstimated;
public final long rowsToFetchEstimated;
public final long keysToIterateEstimated;
public final int logSelectivityEstimated;
...
public PlanInfo(@Nonnull Plan.RowsIteration originalPlan, @Nonnull Plan.RowsIteration optimizedPlan)
{
    this.costEstimated = Math.round(optimizedPlan.fullCost());
    this.rowsToReturnEstimated = Math.round(optimizedPlan.expectedRows());
    this.rowsToFetchEstimated = Math.round(optimizedPlan.estimatedRowsToFetch());
    this.keysToIterateEstimated = Math.round(optimizedPlan.estimatedKeysToIterate());
    this.logSelectivityEstimated = (Math.min(20, (int) Math.floor(-Math.log10(optimizedPlan.selectivity()))));

That was the intention of having a QueryContext.Snapshot class, reducing the amplification caused by having multiple consumers of these metrics (per-query-kind metrics and slow query logger).

indexReferencesInQuery.update(queryPlanInfo.indexReferencesInQuery);
indexReferencesInPlan.update(queryPlanInfo.indexReferencesInPlan);
}
}
}
}
Loading