9
9
import java .util .Arrays ;
10
10
import java .util .Collections ;
11
11
import java .util .List ;
12
+ import java .util .Optional ;
13
+ import java .util .regex .Pattern ;
12
14
import java .util .stream .Collectors ;
13
15
14
16
import sqlancer .Randomly ;
@@ -32,6 +34,9 @@ public DataFusionSchema(List<DataFusionTable> databaseTables) {
32
34
33
35
// update existing tables in DB by query again
34
36
// (like `show tables;`)
37
+ //
38
+ // This function also setup table<->column reference pointers
39
+ // and equivalent tables(see `DataFusionTable.equivalentTables)
35
40
public static DataFusionSchema fromConnection (SQLConnection con , String databaseName ) throws SQLException {
36
41
List <DataFusionTable > databaseTables = new ArrayList <>();
37
42
List <String > tableNames = getTableNames (con );
@@ -47,6 +52,24 @@ public static DataFusionSchema fromConnection(SQLConnection con, String database
47
52
databaseTables .add (t );
48
53
}
49
54
55
+ // Setup equivalent tables
56
+ // For example, now we have t1, t1_csv, t1_parquet, t2_csv, t2_parquet
57
+ // t1's equivalent tables: t1, t1_csv, t1_parquet
58
+ // t2_csv's equivalent tables: t2_csv, t2_parquet
59
+ // ...
60
+ //
61
+ // It can be assumed that:
62
+ // base table names are like t1, t2, ...
63
+ // equivalent tables are like t1_csv, t1_parquet, ...
64
+ for (DataFusionTable t : databaseTables ) {
65
+ String baseTableName = t .getName ().split ("_" )[0 ];
66
+ String patternString = "^" + baseTableName + "(_.*)?$" ; // t1 or t1_*
67
+ Pattern pattern = Pattern .compile (patternString );
68
+
69
+ t .equivalentTables = databaseTables .stream ().filter (table -> pattern .matcher (table .getName ()).matches ())
70
+ .map (DataFusionTable ::getName ).collect (Collectors .toList ());
71
+ }
72
+
50
73
return new DataFusionSchema (databaseTables );
51
74
}
52
75
@@ -120,8 +143,10 @@ public static DataFusionDataType parseFromDataFusionCatalog(String typeString) {
120
143
return DataFusionDataType .BOOLEAN ;
121
144
case "Utf8" :
122
145
return DataFusionDataType .STRING ;
146
+ case "Utf8View" :
147
+ return DataFusionDataType .STRING ;
123
148
default :
124
- dfAssert (false , "Unreachable. All branches should be eovered" );
149
+ dfAssert (false , "Uncovered branch typeString: " + typeString );
125
150
}
126
151
127
152
dfAssert (false , "Unreachable. All branches should be eovered" );
@@ -169,25 +194,89 @@ public Node<DataFusionExpression> getRandomConstant(DataFusionGlobalState state)
169
194
public static class DataFusionColumn extends AbstractTableColumn <DataFusionTable , DataFusionDataType > {
170
195
171
196
private final boolean isNullable ;
197
+ public Optional <String > alias ;
172
198
173
199
public DataFusionColumn (String name , DataFusionDataType columnType , boolean isNullable ) {
174
200
super (name , null , columnType );
175
201
this .isNullable = isNullable ;
202
+ this .alias = Optional .empty ();
176
203
}
177
204
178
205
public boolean isNullable () {
179
206
return isNullable ;
180
207
}
181
208
209
+ public String getOrignalName () {
210
+ return getTable ().getName () + "." + getName ();
211
+ }
212
+
213
+ @ Override
214
+ public String getFullQualifiedName () {
215
+ if (getTable () == null ) {
216
+ return getName ();
217
+ } else {
218
+ if (alias .isPresent ()) {
219
+ return alias .get ();
220
+ } else {
221
+ return getTable ().getName () + "." + getName ();
222
+ }
223
+ }
224
+ }
182
225
}
183
226
184
227
public static class DataFusionTable
185
228
extends AbstractRelationalTable <DataFusionColumn , TableIndex , DataFusionGlobalState > {
229
+ // There might exist multiple logically equivalent tables with
230
+ // different physical format.
231
+ // e.g. t1_csv, t1_parquet, ...
232
+ //
233
+ // When generating random query, it's possible to randomly pick one
234
+ // of them for stronger randomization.
235
+ public List <String > equivalentTables ;
236
+
237
+ // Pick a random equivalent table name
238
+ // This can be used when generating differential queries
239
+ public Optional <String > currentEquivalentTableName ;
240
+
241
+ // For example in query `select * from t1 as tt1, t1 as tt2`
242
+ // `tt1` is the alias for the first occurance of `t1`
243
+ public Optional <String > alias ;
186
244
187
245
public DataFusionTable (String tableName , List <DataFusionColumn > columns , boolean isView ) {
188
246
super (tableName , columns , Collections .emptyList (), isView );
189
247
}
190
248
249
+ public String getNotAliasedName () {
250
+ if (currentEquivalentTableName != null && currentEquivalentTableName .isPresent ()) {
251
+ // In case setup is not done yet
252
+ return currentEquivalentTableName .get ();
253
+ } else {
254
+ return super .getName ();
255
+ }
256
+ }
257
+
258
+ // TODO(datafusion) Now implementation is hacky, should send a patch
259
+ // to core to support this
260
+ @ Override
261
+ public String getName () {
262
+ // Before setup equivalent tables, we use the original table name
263
+ // Setup happens in `fromConnection()`
264
+ if (equivalentTables == null || currentEquivalentTableName == null ) {
265
+ return super .getName ();
266
+ }
267
+
268
+ if (alias .isPresent ()) {
269
+ return alias .get ();
270
+ } else {
271
+ return currentEquivalentTableName .get ();
272
+ }
273
+ }
274
+
275
+ public void pickAnotherEquivalentTableName () {
276
+ dfAssert (!equivalentTables .isEmpty (), "equivalentTables should not be empty" );
277
+ currentEquivalentTableName = Optional .of (Randomly .fromList (equivalentTables ));
278
+ }
279
+
191
280
public static List <DataFusionColumn > getAllColumns (List <DataFusionTable > tables ) {
192
281
return tables .stream ().map (AbstractTable ::getColumns ).flatMap (List ::stream ).collect (Collectors .toList ());
193
282
}
0 commit comments