Skip to content

Commit 321c9d1

Browse files
authored
Add support for PQS test oracle (#33)
* StringView support * Support PQS test oracle * rm Cargo.lock
1 parent 45a9a96 commit 321c9d1

20 files changed

+813
-81
lines changed

pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@
376376
<dependency>
377377
<groupId>org.apache.arrow</groupId>
378378
<artifactId>flight-sql-jdbc-driver</artifactId>
379-
<version>16.1.0</version>
379+
<version>17.0.0</version>
380380
</dependency>
381381
</dependencies>
382382
<reporting>

src/sqlancer/ComparatorHelper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ public static void assumeResultSetsAreEqual(List<String> resultSet, List<String>
131131
public static void assumeResultSetsAreEqual(List<String> resultSet, List<String> secondResultSet,
132132
String originalQueryString, List<String> combinedString, SQLGlobalState<?, ?> state,
133133
UnaryOperator<String> canonicalizationRule) {
134-
// Overloaded version of assumeResultSetsAreEqual that takes a canonicalization function which is applied to
134+
// Overloaded version of assumeResultSetsAreEqual that takes a canonicalization
135+
// function which is applied to
135136
// both result sets before their comparison.
136137
List<String> canonicalizedResultSet = resultSet.stream().map(canonicalizationRule).collect(Collectors.toList());
137138
List<String> canonicalizedSecondResultSet = secondResultSet.stream().map(canonicalizationRule)

src/sqlancer/IgnoreMeException.java

+7
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,11 @@ public class IgnoreMeException extends RuntimeException {
44

55
private static final long serialVersionUID = 1L;
66

7+
public IgnoreMeException() {
8+
super();
9+
}
10+
11+
public IgnoreMeException(String message) {
12+
super(message);
13+
}
714
}

src/sqlancer/Main.java

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import sqlancer.common.query.Query;
3232
import sqlancer.common.query.SQLancerResultSet;
3333
import sqlancer.databend.DatabendProvider;
34+
import sqlancer.datafusion.DataFusionProvider;
3435
import sqlancer.doris.DorisProvider;
3536
import sqlancer.duckdb.DuckDBProvider;
3637
import sqlancer.h2.H2Provider;
@@ -734,6 +735,7 @@ private static void checkForIssue799(List<DatabaseProvider<?, ?, ?>> providers)
734735
providers.add(new CnosDBProvider());
735736
providers.add(new CockroachDBProvider());
736737
providers.add(new DatabendProvider());
738+
providers.add(new DataFusionProvider());
737739
providers.add(new DorisProvider());
738740
providers.add(new DuckDBProvider());
739741
providers.add(new H2Provider());

src/sqlancer/common/query/SQLancerResultSet.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
public class SQLancerResultSet implements Closeable {
88

9-
ResultSet rs;
9+
public ResultSet rs;
1010
private Runnable runnableEpilogue;
1111

1212
public SQLancerResultSet(ResultSet rs) {

src/sqlancer/datafusion/DataFusionErrors.java

+19-10
Original file line numberDiff line numberDiff line change
@@ -44,34 +44,43 @@ public static void registerExpectedExecutionErrors(ExpectedErrors errors) {
4444
errors.add("There is only support Literal types for field at idx:");
4545
errors.add("nth_value not supported for n:");
4646
errors.add("Invalid argument error: Nested comparison: List(");
47+
errors.add("This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal");
48+
errors.add(
49+
"This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal");
4750

4851
/*
4952
* Known bugs
5053
*/
51-
errors.add("to type Int"); // https://github.com/apache/datafusion/issues/11249
5254
errors.add("bitwise"); // https://github.com/apache/datafusion/issues/11260
5355
errors.add("Sort expressions cannot be empty for streaming merge."); // https://github.com/apache/datafusion/issues/11561
54-
errors.add("compute_utf8_flag_op_scalar failed to cast literal value NULL for operation"); // https://github.com/apache/datafusion/issues/11623
5556
errors.add("Schema error: No field named "); // https://github.com/apache/datafusion/issues/12006
56-
errors.add("Internal error: PhysicalExpr Column references column"); // https://github.com/apache/datafusion/issues/12012
57-
errors.add("APPROX_"); // https://github.com/apache/datafusion/issues/12058
58-
errors.add("External error: task"); // https://github.com/apache/datafusion/issues/12057
59-
errors.add("NTH_VALUE"); // https://github.com/apache/datafusion/issues/12073
60-
errors.add("SUBSTR"); // https://github.com/apache/datafusion/issues/12129
57+
errors.add("NATURAL JOIN"); // https://github.com/apache/datafusion/issues/14015
6158

6259
/*
6360
* False positives
6461
*/
6562
errors.add("Cannot cast string"); // ifnull() is passed two non-compattable type and caused execution error
66-
errors.add("Physical plan does not support logical expression AggregateFunction"); // False positive: when aggr
67-
// is generated in where
68-
// clause
63+
// False positive: when aggr is generated in where clause
64+
errors.add("Physical plan does not support logical expression AggregateFunction");
65+
errors.add("Unsupported ArrowType Utf8View"); // Maybe bug in arrow flight
66+
// jdbc driver
6967

7068
/*
7169
* Not critical, investigate in the future
7270
*/
7371
errors.add("does not match with the projection expression");
7472
errors.add("invalid operator for nested");
7573
errors.add("Arrow error: Cast error: Can't cast value");
74+
errors.add("Nth value indices are 1 based");
75+
/*
76+
* Example query that triggers this error: create table t1(v1 int, v2 bool); select v1, sum(1) over (partition
77+
* by v1 order by v2 range between 0 preceding and 0 following) from t1;
78+
*
79+
* Current error message: Arrow error: Invalid argument error: Invalid arithmetic operation: Boolean - Boolean
80+
*
81+
* TODO: The error message could be more meaningful to indicate that RANGE frame is not supported for boolean
82+
* ORDER BY columns
83+
*/
84+
errors.add("Invalid arithmetic operation");
7685
}
7786
}

src/sqlancer/datafusion/DataFusionOptions.java

+11-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sqlancer.datafusion.test.DataFusionNoCrashAggregate;
1616
import sqlancer.datafusion.test.DataFusionNoCrashWindow;
1717
import sqlancer.datafusion.test.DataFusionNoRECOracle;
18+
import sqlancer.datafusion.test.DataFusionPQS;
1819
import sqlancer.datafusion.test.DataFusionQueryPartitioningAggrTester;
1920
import sqlancer.datafusion.test.DataFusionQueryPartitioningHavingTester;
2021
import sqlancer.datafusion.test.DataFusionQueryPartitioningWhereTester;
@@ -26,13 +27,11 @@ public class DataFusionOptions implements DBMSSpecificOptions<DataFusionOracleFa
2627

2728
@Override
2829
public List<DataFusionOracleFactory> getTestOracleFactory() {
29-
return Arrays.asList(
30-
// DataFusionOracleFactory.NO_CRASH_WINDOW,
31-
// DataFusionOracleFactory.NO_CRASH_AGGREGATE,
32-
DataFusionOracleFactory.NOREC, DataFusionOracleFactory.QUERY_PARTITIONING_WHERE
30+
return Arrays.asList(DataFusionOracleFactory.PQS, DataFusionOracleFactory.NO_CRASH_WINDOW,
31+
DataFusionOracleFactory.NO_CRASH_AGGREGATE, DataFusionOracleFactory.NOREC,
32+
DataFusionOracleFactory.QUERY_PARTITIONING_WHERE);
3333
// DataFusionOracleFactory.QUERY_PARTITIONING_AGGREGATE
34-
// ,DataFusionOracleFactory.QUERY_PARTITIONING_HAVING
35-
);
34+
// DataFusionOracleFactory.QUERY_PARTITIONING_HAVING);
3635
}
3736

3837
public enum DataFusionOracleFactory implements OracleFactory<DataFusionGlobalState> {
@@ -42,6 +41,12 @@ public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalStat
4241
return new DataFusionNoRECOracle(globalState);
4342
}
4443
},
44+
PQS {
45+
@Override
46+
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {
47+
return new DataFusionPQS(globalState);
48+
}
49+
},
4550
QUERY_PARTITIONING_WHERE {
4651
@Override
4752
public TestOracle<DataFusionGlobalState> create(DataFusionGlobalState globalState) throws SQLException {

src/sqlancer/datafusion/DataFusionProvider.java

+47-9
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
package sqlancer.datafusion;
22

3-
import static sqlancer.datafusion.DataFusionUtil.DataFusionLogger.DataFusionLogType.DML;
43
import static sqlancer.datafusion.DataFusionUtil.dfAssert;
54
import static sqlancer.datafusion.DataFusionUtil.displayTables;
65

76
import java.sql.Connection;
87
import java.sql.DriverManager;
98
import java.sql.SQLException;
109
import java.util.List;
10+
import java.util.Optional;
1111
import java.util.Properties;
1212
import java.util.stream.Collectors;
1313

@@ -34,29 +34,52 @@ public DataFusionProvider() {
3434
super(DataFusionGlobalState.class, DataFusionOptions.class);
3535
}
3636

37+
// Basic tables generated are DataFusion memory tables (named t1, t2, ...)
38+
// Equivalent table can be backed by different physical implementation
39+
// which will be named like t1_stringview, t2_parquet, etc.
40+
//
41+
// e.g. t1 and t1_stringview are logically equivalent table, but backed by
42+
// different physical representation
43+
//
44+
// This helps to do more metamorphic testing on tables, for example
45+
// `select * from t1` and `select * from t1_stringview` should give same
46+
// result
47+
//
48+
// Supported physical implementation for tables:
49+
// 1. Memory table (t1)
50+
// 2. Memory table use StringView for TEXT columns (t1_stringview)
51+
// Note: It's possible only convert random TEXT columns to StringView
3752
@Override
3853
public void generateDatabase(DataFusionGlobalState globalState) throws Exception {
39-
int tableCount = Randomly.fromOptions(1, 2, 3, 4, 5, 6, 7);
54+
// Create base tables
55+
// ============================
56+
57+
int tableCount = Randomly.fromOptions(1, 2, 3, 4);
4058
for (int i = 0; i < tableCount; i++) {
41-
SQLQueryAdapter queryCreateRandomTable = new DataFusionTableGenerator().getQuery(globalState);
59+
SQLQueryAdapter queryCreateRandomTable = new DataFusionTableGenerator().getCreateStmt(globalState);
4260
queryCreateRandomTable.execute(globalState);
4361
globalState.updateSchema();
44-
globalState.dfLogger.appendToLog(DML, queryCreateRandomTable.toString() + "\n");
62+
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML,
63+
queryCreateRandomTable.toString() + "\n");
4564
}
4665

4766
// Now only `INSERT` DML is supported
4867
// If more DMLs are added later, should use`StatementExecutor` instead
4968
// (see DuckDB's implementation for reference)
5069

70+
// Generating rows in base tables (t1, t2, ... not include t1_stringview, etc.)
71+
// ============================
72+
5173
globalState.updateSchema();
52-
List<DataFusionTable> allTables = globalState.getSchema().getDatabaseTables();
53-
List<String> allTablesName = allTables.stream().map(t -> t.getName()).collect(Collectors.toList());
54-
if (allTablesName.isEmpty()) {
74+
List<DataFusionTable> allBaseTables = globalState.getSchema().getDatabaseTables();
75+
List<String> allBaseTablesName = allBaseTables.stream().map(DataFusionTable::getName)
76+
.collect(Collectors.toList());
77+
if (allBaseTablesName.isEmpty()) {
5578
dfAssert(false, "Generate Database failed.");
5679
}
5780

5881
// Randomly insert some data into existing tables
59-
for (DataFusionTable table : allTables) {
82+
for (DataFusionTable table : allBaseTables) {
6083
int nInsertQuery = globalState.getRandomly().getInteger(0, globalState.getOptions().getMaxNumberInserts());
6184

6285
for (int i = 0; i < nInsertQuery; i++) {
@@ -69,9 +92,24 @@ public void generateDatabase(DataFusionGlobalState globalState) throws Exception
6992
}
7093

7194
insertQuery.execute(globalState);
72-
globalState.dfLogger.appendToLog(DML, insertQuery.toString() + "\n");
95+
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML, insertQuery.toString() + "\n");
96+
}
97+
}
98+
99+
// Construct mutated tables like t1_stringview, etc.
100+
// ============================
101+
for (DataFusionTable table : allBaseTables) {
102+
Optional<SQLQueryAdapter> queryCreateStringViewTable = new DataFusionTableGenerator()
103+
.createStringViewTable(globalState, table);
104+
if (queryCreateStringViewTable.isPresent()) {
105+
queryCreateStringViewTable.get().execute(globalState);
106+
globalState.dfLogger.appendToLog(DataFusionLogger.DataFusionLogType.DML,
107+
queryCreateStringViewTable.get().toString() + "\n");
73108
}
74109
}
110+
globalState.updateSchema();
111+
List<DataFusionTable> allTables = globalState.getSchema().getDatabaseTables();
112+
List<String> allTablesName = allTables.stream().map(DataFusionTable::getName).collect(Collectors.toList());
75113

76114
// TODO(datafusion) add `DataFUsionLogType.STATE` for this whole db state log
77115
if (globalState.getDbmsSpecificOptions().showDebugInfo) {

src/sqlancer/datafusion/DataFusionSchema.java

+90-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import java.util.Arrays;
1010
import java.util.Collections;
1111
import java.util.List;
12+
import java.util.Optional;
13+
import java.util.regex.Pattern;
1214
import java.util.stream.Collectors;
1315

1416
import sqlancer.Randomly;
@@ -32,6 +34,9 @@ public DataFusionSchema(List<DataFusionTable> databaseTables) {
3234

3335
// update existing tables in DB by query again
3436
// (like `show tables;`)
37+
//
38+
// This function also setup table<->column reference pointers
39+
// and equivalent tables(see `DataFusionTable.equivalentTables)
3540
public static DataFusionSchema fromConnection(SQLConnection con, String databaseName) throws SQLException {
3641
List<DataFusionTable> databaseTables = new ArrayList<>();
3742
List<String> tableNames = getTableNames(con);
@@ -47,6 +52,24 @@ public static DataFusionSchema fromConnection(SQLConnection con, String database
4752
databaseTables.add(t);
4853
}
4954

55+
// Setup equivalent tables
56+
// For example, now we have t1, t1_csv, t1_parquet, t2_csv, t2_parquet
57+
// t1's equivalent tables: t1, t1_csv, t1_parquet
58+
// t2_csv's equivalent tables: t2_csv, t2_parquet
59+
// ...
60+
//
61+
// It can be assumed that:
62+
// base table names are like t1, t2, ...
63+
// equivalent tables are like t1_csv, t1_parquet, ...
64+
for (DataFusionTable t : databaseTables) {
65+
String baseTableName = t.getName().split("_")[0];
66+
String patternString = "^" + baseTableName + "(_.*)?$"; // t1 or t1_*
67+
Pattern pattern = Pattern.compile(patternString);
68+
69+
t.equivalentTables = databaseTables.stream().filter(table -> pattern.matcher(table.getName()).matches())
70+
.map(DataFusionTable::getName).collect(Collectors.toList());
71+
}
72+
5073
return new DataFusionSchema(databaseTables);
5174
}
5275

@@ -120,8 +143,10 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
120143
return DataFusionDataType.BOOLEAN;
121144
case "Utf8":
122145
return DataFusionDataType.STRING;
146+
case "Utf8View":
147+
return DataFusionDataType.STRING;
123148
default:
124-
dfAssert(false, "Unreachable. All branches should be eovered");
149+
dfAssert(false, "Uncovered branch typeString: " + typeString);
125150
}
126151

127152
dfAssert(false, "Unreachable. All branches should be eovered");
@@ -169,25 +194,89 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
169194
public static class DataFusionColumn extends AbstractTableColumn<DataFusionTable, DataFusionDataType> {
170195

171196
private final boolean isNullable;
197+
public Optional<String> alias;
172198

173199
public DataFusionColumn(String name, DataFusionDataType columnType, boolean isNullable) {
174200
super(name, null, columnType);
175201
this.isNullable = isNullable;
202+
this.alias = Optional.empty();
176203
}
177204

178205
public boolean isNullable() {
179206
return isNullable;
180207
}
181208

209+
public String getOrignalName() {
210+
return getTable().getName() + "." + getName();
211+
}
212+
213+
@Override
214+
public String getFullQualifiedName() {
215+
if (getTable() == null) {
216+
return getName();
217+
} else {
218+
if (alias.isPresent()) {
219+
return alias.get();
220+
} else {
221+
return getTable().getName() + "." + getName();
222+
}
223+
}
224+
}
182225
}
183226

184227
public static class DataFusionTable
185228
extends AbstractRelationalTable<DataFusionColumn, TableIndex, DataFusionGlobalState> {
229+
// There might exist multiple logically equivalent tables with
230+
// different physical format.
231+
// e.g. t1_csv, t1_parquet, ...
232+
//
233+
// When generating random query, it's possible to randomly pick one
234+
// of them for stronger randomization.
235+
public List<String> equivalentTables;
236+
237+
// Pick a random equivalent table name
238+
// This can be used when generating differential queries
239+
public Optional<String> currentEquivalentTableName;
240+
241+
// For example in query `select * from t1 as tt1, t1 as tt2`
242+
// `tt1` is the alias for the first occurance of `t1`
243+
public Optional<String> alias;
186244

187245
public DataFusionTable(String tableName, List<DataFusionColumn> columns, boolean isView) {
188246
super(tableName, columns, Collections.emptyList(), isView);
189247
}
190248

249+
public String getNotAliasedName() {
250+
if (currentEquivalentTableName != null && currentEquivalentTableName.isPresent()) {
251+
// In case setup is not done yet
252+
return currentEquivalentTableName.get();
253+
} else {
254+
return super.getName();
255+
}
256+
}
257+
258+
// TODO(datafusion) Now implementation is hacky, should send a patch
259+
// to core to support this
260+
@Override
261+
public String getName() {
262+
// Before setup equivalent tables, we use the original table name
263+
// Setup happens in `fromConnection()`
264+
if (equivalentTables == null || currentEquivalentTableName == null) {
265+
return super.getName();
266+
}
267+
268+
if (alias.isPresent()) {
269+
return alias.get();
270+
} else {
271+
return currentEquivalentTableName.get();
272+
}
273+
}
274+
275+
public void pickAnotherEquivalentTableName() {
276+
dfAssert(!equivalentTables.isEmpty(), "equivalentTables should not be empty");
277+
currentEquivalentTableName = Optional.of(Randomly.fromList(equivalentTables));
278+
}
279+
191280
public static List<DataFusionColumn> getAllColumns(List<DataFusionTable> tables) {
192281
return tables.stream().map(AbstractTable::getColumns).flatMap(List::stream).collect(Collectors.toList());
193282
}

0 commit comments

Comments
 (0)