datastax · pkolaczk · Apr 2, 2026 · Mar 30, 2026
diff --git a/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java b/src/java/org/apache/cassandra/config/CassandraRelevantProperties.java
@@ -416,6 +416,14 @@ public enum CassandraRelevantProperties
     /** Whether to optimize query plans */
     SAI_QUERY_OPTIMIZATION_LEVEL("cassandra.sai.query_optimization_level", "1"),
 
+    /**
+     * If disabled, the query optimizer runs index search to estimate the number of matching keys.
+     * If enabled, the query optimizer uses, if present, the term statistics stored
+     * with the help of histograms in the metadata component of each SSTable index.
+     * Using terms statistics is significantly less costly but less precise.
+     */
+    SAI_QUERY_OPTIMIZATION_USE_TERM_STATISTICS("cassandra.sai.query_optimization.use_term_statistics", "false"),
+
     /** Controls the number of rows read in a single batch when fetching rows for a partition key */
     SAI_PARTITION_ROW_BATCH_SIZE("cassandra.sai.partition_row_batch_size", "100"),
 

diff --git a/src/java/org/apache/cassandra/index/sai/plan/QueryController.java b/src/java/org/apache/cassandra/index/sai/plan/QueryController.java
@@ -114,6 +114,8 @@
     @VisibleForTesting
     public static int QUERY_OPT_LEVEL = CassandraRelevantProperties.SAI_QUERY_OPTIMIZATION_LEVEL.getInt();
 
+    public static volatile boolean QUERY_OPT_USE_TERM_STATS = CassandraRelevantProperties.SAI_QUERY_OPTIMIZATION_USE_TERM_STATISTICS.getBoolean();
+
     private final ColumnFamilyStore cfs;
     private final ReadCommand command;
     private final Orderer orderer;
@@ -957,7 +959,7 @@
             case NOT_CONTAINS_KEY:
             case NOT_CONTAINS_VALUE:
             case RANGE:
-                return (indexFeatureSet.hasTermsHistogram())
+                return (indexFeatureSet.hasTermsHistogram() && QUERY_OPT_USE_TERM_STATS)
                        ? estimateMatchingRowCountUsingHistograms(predicate)
                        : estimateMatchingRowCountUsingIndex(predicate);
             default:

diff --git a/test/unit/org/apache/cassandra/index/sai/SAITester.java b/test/unit/org/apache/cassandra/index/sai/SAITester.java
@@ -60,7 +60,6 @@
 import com.datastax.driver.core.ResultSet;
 import com.datastax.driver.core.Session;
 import com.datastax.driver.core.exceptions.ReadFailureException;
-import org.apache.cassandra.config.CassandraRelevantProperties;
 import org.apache.cassandra.cql3.CQLTester;
 import org.apache.cassandra.cql3.ColumnIdentifier;
 import org.apache.cassandra.cql3.UntypedResultSet;
@@ -95,7 +94,6 @@
 import org.apache.cassandra.io.sstable.SSTable;
 import org.apache.cassandra.io.sstable.format.SSTableReader;
 import org.apache.cassandra.io.util.File;
-import org.apache.cassandra.net.MessagingService;
 import org.apache.cassandra.schema.ColumnMetadata;
 import org.apache.cassandra.schema.IndexMetadata;
 import org.apache.cassandra.schema.MockSchema;
@@ -234,7 +232,7 @@ public void resetQueryOptimizationLevel()
     {
         // Enable the optimizer by default. If there are any tests that need to disable it, they can do so explicitly.
         QueryController.QUERY_OPT_LEVEL = 1;
-
+        QueryController.QUERY_OPT_USE_TERM_STATS = true;
     }
 
     @Before

diff --git a/test/unit/org/apache/cassandra/index/sai/plan/SingleRestrictionEstimatedRowCountTest.java b/test/unit/org/apache/cassandra/index/sai/plan/SingleRestrictionEstimatedRowCountTest.java
@@ -36,7 +36,6 @@
 import org.apache.cassandra.index.sai.SAITester;
 import org.apache.cassandra.index.sai.SAIUtil;
 import org.apache.cassandra.index.sai.disk.format.Version;
-import org.assertj.core.api.Assertions;
 
 import static org.apache.cassandra.cql3.CQL3Type.Native.DECIMAL;
 import static org.apache.cassandra.cql3.CQL3Type.Native.INT;
@@ -90,30 +89,38 @@ public void testMemtablesSAI()
 
         for (Version version : versions)
         {
-            RowCountTest test = new RowCountTest(Operator.NEQ, 25);
-            test.doTest(version, INT, 95, 100);
-            test.doTest(version, DECIMAL, 95, version.onOrAfter(Version.EB) ? 99 : 100);
-            test.doTest(version, VARINT, 95, 99);
-
-            test = new RowCountTest(Operator.LT, 50);
-            test.doTest(version, INT, 40, 60);
-            test.doTest(version, DECIMAL, 40, 60);
-            test.doTest(version, VARINT, 40, 60);
-
-            test = new RowCountTest(Operator.LT, 150);
-            test.doTest(version, INT, 95, 100);
-            test.doTest(version, DECIMAL, 95, 100);
-            test.doTest(version, VARINT, 95, 100);
-
-            test = new RowCountTest(Operator.EQ, 31);
-            // For older on-disk formats we expect less accurate estimates due to lack of per-index stats and due to
-            // lazy search on the first shard only; in this scenario each shard iterator will report at least one row,
-            // even if none are matching. We could have run the search on all shards to get more accurate estimates,
-            // but search is expensive, so we accept less accurate estimates for older formats.
-            int maxExpectedRows = version.onOrAfter(Version.EB) ? 1 : TrieMemtable.SHARD_COUNT;
-            test.doTest(version, INT, 1, maxExpectedRows);
-            test.doTest(version, DECIMAL, 1, maxExpectedRows);
-            test.doTest(version, VARINT, 1, maxExpectedRows);
+            boolean[] useTermStatsTestedValues = new boolean[]{ false };
+            if (version.onOrAfter(Version.EB))
+                useTermStatsTestedValues = new boolean[]{ true, false };
+            for (boolean useTermStats : useTermStatsTestedValues)
+            {
+                QueryController.QUERY_OPT_USE_TERM_STATS = useTermStats;
+
+                RowCountTest test = new RowCountTest(Operator.NEQ, 25);
+                test.doTest(version, INT, 95, 99);
+                test.doTest(version, DECIMAL, 95, 99);
+                test.doTest(version, VARINT, 95, 99);
+
+                test = new RowCountTest(Operator.LT, 50);
+                test.doTest(version, INT, 40, 60);
+                test.doTest(version, DECIMAL, 40, 60);
+                test.doTest(version, VARINT, 40, 60);
+
+                test = new RowCountTest(Operator.LT, 150);
+                test.doTest(version, INT, 95, 100);
+                test.doTest(version, DECIMAL, 95, 100);
+                test.doTest(version, VARINT, 95, 100);
+
+                test = new RowCountTest(Operator.EQ, 31);
+                // For older on-disk formats we expect less accurate estimates due to lack of per-index stats and due to
+                // lazy search on the first shard only; in this scenario each shard iterator will report at least one row,
+                // even if none are matching. We could have run the search on all shards to get more accurate estimates,
+                // but search is expensive, so we accept less accurate estimates for older formats.
+                int maxExpectedRows = useTermStats ? 1 : TrieMemtable.SHARD_COUNT;
+                test.doTest(version, INT, 1, maxExpectedRows);
+                test.doTest(version, DECIMAL, 1, maxExpectedRows);
+                test.doTest(version, VARINT, 1, maxExpectedRows);
+            }
         }
     }