Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for PQS test oracle #33

Merged
merged 3 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql-jdbc-driver</artifactId>
<version>16.1.0</version>
<version>17.0.0</version>
</dependency>
</dependencies>
<reporting>
Expand Down
3 changes: 2 additions & 1 deletion src/sqlancer/ComparatorHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ public static void assumeResultSetsAreEqual(List<String> resultSet, List<String>
public static void assumeResultSetsAreEqual(List<String> resultSet, List<String> secondResultSet,
String originalQueryString, List<String> combinedString, SQLGlobalState<?, ?> state,
UnaryOperator<String> canonicalizationRule) {
// Overloaded version of assumeResultSetsAreEqual that takes a canonicalization function which is applied to
// Overloaded version of assumeResultSetsAreEqual that takes a canonicalization
// function which is applied to
// both result sets before their comparison.
List<String> canonicalizedResultSet = resultSet.stream().map(canonicalizationRule).collect(Collectors.toList());
List<String> canonicalizedSecondResultSet = secondResultSet.stream().map(canonicalizationRule)
Expand Down
7 changes: 7 additions & 0 deletions src/sqlancer/IgnoreMeException.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,11 @@ public class IgnoreMeException extends RuntimeException {

private static final long serialVersionUID = 1L;

public IgnoreMeException() {
super();
}

public IgnoreMeException(String message) {
super(message);
}
}
2 changes: 2 additions & 0 deletions src/sqlancer/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import sqlancer.common.query.Query;
import sqlancer.common.query.SQLancerResultSet;
import sqlancer.databend.DatabendProvider;
import sqlancer.datafusion.DataFusionProvider;
import sqlancer.doris.DorisProvider;
import sqlancer.duckdb.DuckDBProvider;
import sqlancer.h2.H2Provider;
Expand Down Expand Up @@ -734,6 +735,7 @@ private static void checkForIssue799(List<DatabaseProvider<?, ?, ?>> providers)
providers.add(new CnosDBProvider());
providers.add(new CockroachDBProvider());
providers.add(new DatabendProvider());
providers.add(new DataFusionProvider());
providers.add(new DorisProvider());
providers.add(new DuckDBProvider());
providers.add(new H2Provider());
Expand Down
2 changes: 1 addition & 1 deletion src/sqlancer/common/query/SQLancerResultSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

public class SQLancerResultSet implements Closeable {

ResultSet rs;
public ResultSet rs;
private Runnable runnableEpilogue;

public SQLancerResultSet(ResultSet rs) {
Expand Down
29 changes: 19 additions & 10 deletions src/sqlancer/datafusion/DataFusionErrors.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,34 +44,43 @@ public static void registerExpectedExecutionErrors(ExpectedErrors errors) {
errors.add("There is only support Literal types for field at idx:");
errors.add("nth_value not supported for n:");
errors.add("Invalid argument error: Nested comparison: List(");
errors.add("This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal");
errors.add(
"This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal");

/*
* Known bugs
*/
errors.add("to type Int"); // https://github.com/apache/datafusion/issues/11249
errors.add("bitwise"); // https://github.com/apache/datafusion/issues/11260
errors.add("Sort expressions cannot be empty for streaming merge."); // https://github.com/apache/datafusion/issues/11561
errors.add("compute_utf8_flag_op_scalar failed to cast literal value NULL for operation"); // https://github.com/apache/datafusion/issues/11623
errors.add("Schema error: No field named "); // https://github.com/apache/datafusion/issues/12006
errors.add("Internal error: PhysicalExpr Column references column"); // https://github.com/apache/datafusion/issues/12012
errors.add("APPROX_"); // https://github.com/apache/datafusion/issues/12058
errors.add("External error: task"); // https://github.com/apache/datafusion/issues/12057
errors.add("NTH_VALUE"); // https://github.com/apache/datafusion/issues/12073
errors.add("SUBSTR"); // https://github.com/apache/datafusion/issues/12129
errors.add("NATURAL JOIN"); // https://github.com/apache/datafusion/issues/14015

/*
* False positives
*/
errors.add("Cannot cast string"); // ifnull() is passed two non-compattable type and caused execution error
errors.add("Physical plan does not support logical expression AggregateFunction"); // False positive: when aggr
// is generated in where
// clause
// False positive: when aggr is generated in where clause
errors.add("Physical plan does not support logical expression AggregateFunction");
errors.add("Unsupported ArrowType Utf8View"); // Maybe bug in arrow flight
// jdbc driver

/*
* Not critical, investigate in the future
*/
errors.add("does not match with the projection expression");
errors.add("invalid operator for nested");
errors.add("Arrow error: Cast error: Can't cast value");
errors.add("Nth value indices are 1 based");
/*
* Example query that triggers this error: create table t1(v1 int, v2 bool); select v1, sum(1) over (partition
* by v1 order by v2 range between 0 preceding and 0 following) from t1;
*
* Current error message: Arrow error: Invalid argument error: Invalid arithmetic operation: Boolean - Boolean
*
* TODO: The error message could be more meaningful to indicate that RANGE frame is not supported for boolean
* ORDER BY columns
*/
errors.add("Invalid arithmetic operation");
}
}
17 changes: 11 additions & 6 deletions src/sqlancer/datafusion/DataFusionOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sqlancer.datafusion.test.DataFusionNoCrashAggregate;
import sqlancer.datafusion.test.DataFusionNoCrashWindow;
import sqlancer.datafusion.test.DataFusionNoRECOracle;
import sqlancer.datafusion.test.DataFusionPQS;
import sqlancer.datafusion.test.DataFusionQueryPartitioningAggrTester;
import sqlancer.datafusion.test.DataFusionQueryPartitioningHavingTester;
import sqlancer.datafusion.test.DataFusionQueryPartitioningWhereTester;
Expand All @@ -26,13 +27,11 @@ public class DataFusionOptions implements DBMSSpecificOptions<DataFusionOracleFa

@Override
public List<DataFusionOracleFactory> getTestOracleFactory() {
return Arrays.asList(
// DataFusionOracleFactory.NO_CRASH_WINDOW,
// DataFusionOracleFactory.NO_CRASH_AGGREGATE,
DataFusionOracleFactory.NOREC, DataFusionOracleFactory.QUERY_PARTITIONING_WHERE
return Arrays.asList(DataFusionOracleFactory.PQS, DataFusionOracleFactory.NO_CRASH_WINDOW,
DataFusionOracleFactory.NO_CRASH_AGGREGATE, DataFusionOracleFactory.NOREC,
DataFusionOracleFactory.QUERY_PARTITIONING_WHERE);
// DataFusionOracleFactory.QUERY_PARTITIONING_AGGREGATE
// ,DataFusionOracleFactory.QUERY_PARTITIONING_HAVING
);
// DataFusionOracleFactory.QUERY_PARTITIONING_HAVING);
}

public enum DataFusionOracleFactory implements OracleFactory<DataFusionGlobalState> {
Expand All @@ -42,6 +41,12 @@ public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalStat
return new DataFusionNoRECOracle(globalState);
}
},
PQS {
@Override
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
return new DataFusionPQS(globalState);
}
},
QUERY_PARTITIONING_WHERE {
@Override
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
Expand Down
56 changes: 47 additions & 9 deletions src/sqlancer/datafusion/DataFusionProvider.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package sqlancer.datafusion;

import static sqlancer.datafusion.DataFusionUtil.DataFusionLogger.DataFusionLogType.DML;
import static sqlancer.datafusion.DataFusionUtil.dfAssert;
import static sqlancer.datafusion.DataFusionUtil.displayTables;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.stream.Collectors;

Expand All @@ -34,29 +34,52 @@ public DataFusionProvider() {
super(DataFusionGlobalState.class, DataFusionOptions.class);
}

// Basic tables generated are DataFusion memory tables (named t1, t2, ...)
// Equivalent table can be backed by different physical implementation
// which will be named like t1_stringview, t2_parquet, etc.
//
// e.g. t1 and t1_stringview are logically equivalent table, but backed by
// different physical representation
//
// This helps to do more metamorphic testing on tables, for example
// `select * from t1` and `select * from t1_stringview` should give same
// result
//
// Supported physical implementation for tables:
// 1. Memory table (t1)
// 2. Memory table use StringView for TEXT columns (t1_stringview)
// Note: It's possible only convert random TEXT columns to StringView
@Override
public void generateDatabase(DataFusionGlobalState globalState) throws Exception {
int tableCount = Randomly.fromOptions(1, 2, 3, 4, 5, 6, 7);
// Create base tables
// ============================

int tableCount = Randomly.fromOptions(1, 2, 3, 4);
for (int i = 0; i < tableCount; i++) {
SQLQueryAdapter queryCreateRandomTable = new DataFusionTableGenerator().getQuery(globalState);
SQLQueryAdapter queryCreateRandomTable = new DataFusionTableGenerator().getCreateStmt(globalState);
queryCreateRandomTable.execute(globalState);
globalState.updateSchema();
globalState.dfLogger.appendToLog(DML, queryCreateRandomTable.toString() + "\n");
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML,
queryCreateRandomTable.toString() + "\n");
}

// Now only `INSERT` DML is supported
// If more DMLs are added later, should use`StatementExecutor` instead
// (see DuckDB's implementation for reference)

// Generating rows in base tables (t1, t2, ... not include t1_stringview, etc.)
// ============================

globalState.updateSchema();
List<DataFusionTable> allTables = globalState.getSchema().getDatabaseTables();
List<String> allTablesName = allTables.stream().map(t -> t.getName()).collect(Collectors.toList());
if (allTablesName.isEmpty()) {
List<DataFusionTable> allBaseTables = globalState.getSchema().getDatabaseTables();
List<String> allBaseTablesName = allBaseTables.stream().map(DataFusionTable::getName)
.collect(Collectors.toList());
if (allBaseTablesName.isEmpty()) {
dfAssert(false, "Generate Database failed.");
}

// Randomly insert some data into existing tables
for (DataFusionTable table : allTables) {
for (DataFusionTable table : allBaseTables) {
int nInsertQuery = globalState.getRandomly().getInteger(0, globalState.getOptions().getMaxNumberInserts());

for (int i = 0; i < nInsertQuery; i++) {
Expand All @@ -69,9 +92,24 @@ public void generateDatabase(DataFusionGlobalState globalState) throws Exception
}

insertQuery.execute(globalState);
globalState.dfLogger.appendToLog(DML, insertQuery.toString() + "\n");
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML, insertQuery.toString() + "\n");
}
}

// Construct mutated tables like t1_stringview, etc.
// ============================
for (DataFusionTable table : allBaseTables) {
Optional<SQLQueryAdapter> queryCreateStringViewTable = new DataFusionTableGenerator()
.createStringViewTable(globalState, table);
if (queryCreateStringViewTable.isPresent()) {
queryCreateStringViewTable.get().execute(globalState);
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML,
queryCreateStringViewTable.get().toString() + "\n");
}
}
globalState.updateSchema();
List<DataFusionTable> allTables = globalState.getSchema().getDatabaseTables();
List<String> allTablesName = allTables.stream().map(DataFusionTable::getName).collect(Collectors.toList());

// TODO(datafusion) add `DataFUsionLogType.STATE` for this whole db state log
if (globalState.getDbmsSpecificOptions().showDebugInfo) {
Expand Down
91 changes: 90 additions & 1 deletion src/sqlancer/datafusion/DataFusionSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import sqlancer.Randomly;
Expand All @@ -32,6 +34,9 @@ public DataFusionSchema(List<DataFusionTable> databaseTables) {

// update existing tables in DB by query again
// (like `show tables;`)
//
// This function also setup table<->column reference pointers
// and equivalent tables(see `DataFusionTable.equivalentTables)
public static DataFusionSchema fromConnection(SQLConnection con, String databaseName) throws SQLException {
List<DataFusionTable> databaseTables = new ArrayList<>();
List<String> tableNames = getTableNames(con);
Expand All @@ -47,6 +52,24 @@ public static DataFusionSchema fromConnection(SQLConnection con, String database
databaseTables.add(t);
}

// Setup equivalent tables
// For example, now we have t1, t1_csv, t1_parquet, t2_csv, t2_parquet
// t1's equivalent tables: t1, t1_csv, t1_parquet
// t2_csv's equivalent tables: t2_csv, t2_parquet
// ...
//
// It can be assumed that:
// base table names are like t1, t2, ...
// equivalent tables are like t1_csv, t1_parquet, ...
for (DataFusionTable t : databaseTables) {
String baseTableName = t.getName().split("_")[0];
String patternString = "^" + baseTableName + "(_.*)?$"; // t1 or t1_*
Pattern pattern = Pattern.compile(patternString);

t.equivalentTables = databaseTables.stream().filter(table -> pattern.matcher(table.getName()).matches())
.map(DataFusionTable::getName).collect(Collectors.toList());
}

return new DataFusionSchema(databaseTables);
}

Expand Down Expand Up @@ -120,8 +143,10 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
return DataFusionDataType.BOOLEAN;
case "Utf8":
return DataFusionDataType.STRING;
case "Utf8View":
return DataFusionDataType.STRING;
default:
dfAssert(false, "Unreachable. All branches should be eovered");
dfAssert(false, "Uncovered branch typeString: " + typeString);
}

dfAssert(false, "Unreachable. All branches should be eovered");
Expand Down Expand Up @@ -169,25 +194,89 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
public static class DataFusionColumn extends AbstractTableColumn<DataFusionTable, DataFusionDataType> {

private final boolean isNullable;
public Optional<String> alias;

public DataFusionColumn(String name, DataFusionDataType columnType, boolean isNullable) {
super(name, null, columnType);
this.isNullable = isNullable;
this.alias = Optional.empty();
}

public boolean isNullable() {
return isNullable;
}

public String getOrignalName() {
return getTable().getName() + "." + getName();
}

@Override
public String getFullQualifiedName() {
if (getTable() == null) {
return getName();
} else {
if (alias.isPresent()) {
return alias.get();
} else {
return getTable().getName() + "." + getName();
}
}
}
}

public static class DataFusionTable
extends AbstractRelationalTable<DataFusionColumn, TableIndex, DataFusionGlobalState> {
// There might exist multiple logically equivalent tables with
// different physical format.
// e.g. t1_csv, t1_parquet, ...
//
// When generating random query, it's possible to randomly pick one
// of them for stronger randomization.
public List<String> equivalentTables;

// Pick a random equivalent table name
// This can be used when generating differential queries
public Optional<String> currentEquivalentTableName;

// For example in query `select * from t1 as tt1, t1 as tt2`
// `tt1` is the alias for the first occurance of `t1`
public Optional<String> alias;

public DataFusionTable(String tableName, List<DataFusionColumn> columns, boolean isView) {
super(tableName, columns, Collections.emptyList(), isView);
}

public String getNotAliasedName() {
if (currentEquivalentTableName != null && currentEquivalentTableName.isPresent()) {
// In case setup is not done yet
return currentEquivalentTableName.get();
} else {
return super.getName();
}
}

// TODO(datafusion) Now implementation is hacky, should send a patch
// to core to support this
@Override
public String getName() {
// Before setup equivalent tables, we use the original table name
// Setup happens in `fromConnection()`
if (equivalentTables == null || currentEquivalentTableName == null) {
return super.getName();
}

if (alias.isPresent()) {
return alias.get();
} else {
return currentEquivalentTableName.get();
}
}

public void pickAnotherEquivalentTableName() {
dfAssert(!equivalentTables.isEmpty(), "equivalentTables should not be empty");
currentEquivalentTableName = Optional.of(Randomly.fromList(equivalentTables));
}

public static List<DataFusionColumn> getAllColumns(List<DataFusionTable> tables) {
return tables.stream().map(AbstractTable::getColumns).flatMap(List::stream).collect(Collectors.toList());
}
Expand Down
Loading
Loading