Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,14 @@ public enum CassandraRelevantProperties
/** Whether to optimize query plans */
SAI_QUERY_OPTIMIZATION_LEVEL("cassandra.sai.query_optimization_level", "1"),

/**
* If disabled, the query optimizer runs index search to estimate the number of matching keys.
* If enabled, the query optimizer uses, if present, the term statistics stored
* with the help of histograms in the metadata component of each SSTable index.
* Using terms statistics is significantly less costly but less precise.
*/
SAI_QUERY_OPTIMIZATION_USE_TERM_STATISTICS("cassandra.sai.query_optimization.use_term_statistics", "false"),

/** Controls the number of rows read in a single batch when fetching rows for a partition key */
SAI_PARTITION_ROW_BATCH_SIZE("cassandra.sai.partition_row_batch_size", "100"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@
@VisibleForTesting
public static int QUERY_OPT_LEVEL = CassandraRelevantProperties.SAI_QUERY_OPTIMIZATION_LEVEL.getInt();

public static volatile boolean QUERY_OPT_USE_TERM_STATS = CassandraRelevantProperties.SAI_QUERY_OPTIMIZATION_USE_TERM_STATISTICS.getBoolean();

Check warning on line 117 in src/java/org/apache/cassandra/index/sai/plan/QueryController.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Make QUERY_OPT_USE_TERM_STATS a static final constant or non-public and provide accessors if needed.

See more on https://sonarcloud.io/project/issues?id=cassandra-stargazer&issues=AZ1KH2IDhQMMTXOch07G&open=AZ1KH2IDhQMMTXOch07G&pullRequest=2290

Check warning on line 117 in src/java/org/apache/cassandra/index/sai/plan/QueryController.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Make this "public static QUERY_OPT_USE_TERM_STATS" field final

See more on https://sonarcloud.io/project/issues?id=cassandra-stargazer&issues=AZ1KH2IDhQMMTXOch07H&open=AZ1KH2IDhQMMTXOch07H&pullRequest=2290

Check warning on line 117 in src/java/org/apache/cassandra/index/sai/plan/QueryController.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Rename this field "QUERY_OPT_USE_TERM_STATS" to match the regular expression '^[a-z][a-zA-Z0-9]*$'.

See more on https://sonarcloud.io/project/issues?id=cassandra-stargazer&issues=AZ1KH2IDhQMMTXOch07I&open=AZ1KH2IDhQMMTXOch07I&pullRequest=2290

private final ColumnFamilyStore cfs;
private final ReadCommand command;
private final Orderer orderer;
Expand Down Expand Up @@ -957,7 +959,7 @@
case NOT_CONTAINS_KEY:
case NOT_CONTAINS_VALUE:
case RANGE:
return (indexFeatureSet.hasTermsHistogram())
return (indexFeatureSet.hasTermsHistogram() && QUERY_OPT_USE_TERM_STATS)
? estimateMatchingRowCountUsingHistograms(predicate)
: estimateMatchingRowCountUsingIndex(predicate);
default:
Expand Down
4 changes: 1 addition & 3 deletions test/unit/org/apache/cassandra/index/sai/SAITester.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.exceptions.ReadFailureException;
import org.apache.cassandra.config.CassandraRelevantProperties;
import org.apache.cassandra.cql3.CQLTester;
import org.apache.cassandra.cql3.ColumnIdentifier;
import org.apache.cassandra.cql3.UntypedResultSet;
Expand Down Expand Up @@ -95,7 +94,6 @@
import org.apache.cassandra.io.sstable.SSTable;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.File;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.IndexMetadata;
import org.apache.cassandra.schema.MockSchema;
Expand Down Expand Up @@ -234,7 +232,7 @@ public void resetQueryOptimizationLevel()
{
// Enable the optimizer by default. If there are any tests that need to disable it, they can do so explicitly.
QueryController.QUERY_OPT_LEVEL = 1;

QueryController.QUERY_OPT_USE_TERM_STATS = true;
}

@Before
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.cassandra.index.sai.SAITester;
import org.apache.cassandra.index.sai.SAIUtil;
import org.apache.cassandra.index.sai.disk.format.Version;
import org.assertj.core.api.Assertions;

import static org.apache.cassandra.cql3.CQL3Type.Native.DECIMAL;
import static org.apache.cassandra.cql3.CQL3Type.Native.INT;
Expand Down Expand Up @@ -90,30 +89,38 @@ public void testMemtablesSAI()

for (Version version : versions)
{
RowCountTest test = new RowCountTest(Operator.NEQ, 25);
test.doTest(version, INT, 95, 100);
test.doTest(version, DECIMAL, 95, version.onOrAfter(Version.EB) ? 99 : 100);
test.doTest(version, VARINT, 95, 99);

test = new RowCountTest(Operator.LT, 50);
test.doTest(version, INT, 40, 60);
test.doTest(version, DECIMAL, 40, 60);
test.doTest(version, VARINT, 40, 60);

test = new RowCountTest(Operator.LT, 150);
test.doTest(version, INT, 95, 100);
test.doTest(version, DECIMAL, 95, 100);
test.doTest(version, VARINT, 95, 100);

test = new RowCountTest(Operator.EQ, 31);
// For older on-disk formats we expect less accurate estimates due to lack of per-index stats and due to
// lazy search on the first shard only; in this scenario each shard iterator will report at least one row,
// even if none are matching. We could have run the search on all shards to get more accurate estimates,
// but search is expensive, so we accept less accurate estimates for older formats.
int maxExpectedRows = version.onOrAfter(Version.EB) ? 1 : TrieMemtable.SHARD_COUNT;
test.doTest(version, INT, 1, maxExpectedRows);
test.doTest(version, DECIMAL, 1, maxExpectedRows);
test.doTest(version, VARINT, 1, maxExpectedRows);
boolean[] useTermStatsTestedValues = new boolean[]{ false };
if (version.onOrAfter(Version.EB))
useTermStatsTestedValues = new boolean[]{ true, false };
for (boolean useTermStats : useTermStatsTestedValues)
{
QueryController.QUERY_OPT_USE_TERM_STATS = useTermStats;

RowCountTest test = new RowCountTest(Operator.NEQ, 25);
test.doTest(version, INT, 95, 99);
test.doTest(version, DECIMAL, 95, 99);
test.doTest(version, VARINT, 95, 99);

test = new RowCountTest(Operator.LT, 50);
test.doTest(version, INT, 40, 60);
test.doTest(version, DECIMAL, 40, 60);
test.doTest(version, VARINT, 40, 60);

test = new RowCountTest(Operator.LT, 150);
test.doTest(version, INT, 95, 100);
test.doTest(version, DECIMAL, 95, 100);
test.doTest(version, VARINT, 95, 100);

test = new RowCountTest(Operator.EQ, 31);
// For older on-disk formats we expect less accurate estimates due to lack of per-index stats and due to
// lazy search on the first shard only; in this scenario each shard iterator will report at least one row,
// even if none are matching. We could have run the search on all shards to get more accurate estimates,
// but search is expensive, so we accept less accurate estimates for older formats.
int maxExpectedRows = useTermStats ? 1 : TrieMemtable.SHARD_COUNT;
test.doTest(version, INT, 1, maxExpectedRows);
test.doTest(version, DECIMAL, 1, maxExpectedRows);
test.doTest(version, VARINT, 1, maxExpectedRows);
}
}
}

Expand Down