Skip to content

Commit 47b7bee

Browse files
authored
Expose configuration options on the session context (#87)
* Expose configuration options on the session context * Formatting fix
1 parent 4f56a9a commit 47b7bee

File tree

10 files changed

+862
-1
lines changed

10 files changed

+862
-1
lines changed

datafusion-java/src/main/java/org/apache/arrow/datafusion/AbstractProxy.java

+5
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ public final long getPointer() {
4747

4848
abstract void doClose(long pointer) throws Exception;
4949

50+
// Ensure native library is loaded before any proxy object is used
51+
static {
52+
JNILoader.load();
53+
}
54+
5055
@Override
5156
public final void close() throws Exception {
5257
if (closed.compareAndSet(false, true)) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package org.apache.arrow.datafusion;
2+
3+
/** Configures options related to query execution */
4+
@SuppressWarnings("UnusedReturnValue")
5+
public class ExecutionOptions {
6+
private final SessionConfig config;
7+
8+
ExecutionOptions(SessionConfig config) {
9+
this.config = config;
10+
}
11+
12+
/**
13+
* Get execution options related to reading Parquet data
14+
*
15+
* @return {@link ParquetOptions} instance for this config
16+
*/
17+
public ParquetOptions parquet() {
18+
return new ParquetOptions(config);
19+
}
20+
21+
/**
22+
* Get the batch size
23+
*
24+
* @return batch size
25+
*/
26+
public long batchSize() {
27+
return SessionConfig.getExecutionOptionsBatchSize(config.getPointer());
28+
}
29+
30+
/**
31+
* Set the size of batches to use when creating new data batches
32+
*
33+
* @param batchSize the batch size to set
34+
* @return the modified {@link ExecutionOptions} instance
35+
*/
36+
public ExecutionOptions withBatchSize(long batchSize) {
37+
SessionConfig.setExecutionOptionsBatchSize(config.getPointer(), batchSize);
38+
return this;
39+
}
40+
41+
/**
42+
* Get whether batch coalescing is enabled
43+
*
44+
* @return whether batch coalescing is enabled
45+
*/
46+
public boolean coalesceBatches() {
47+
return SessionConfig.getExecutionOptionsCoalesceBatches(config.getPointer());
48+
}
49+
50+
/**
51+
* Set whether to enable batch coalescing
52+
*
53+
* @param enabled whether to enable batch coalescing
54+
* @return the modified {@link ExecutionOptions} instance
55+
*/
56+
public ExecutionOptions withCoalesceBatches(boolean enabled) {
57+
SessionConfig.setExecutionOptionsCoalesceBatches(config.getPointer(), enabled);
58+
return this;
59+
}
60+
61+
/**
62+
* Get whether statistics collection is enabled
63+
*
64+
* @return whether statistics collection is enabled
65+
*/
66+
public boolean collectStatistics() {
67+
return SessionConfig.getExecutionOptionsCollectStatistics(config.getPointer());
68+
}
69+
70+
/**
71+
* Set whether to enable statistics collection
72+
*
73+
* @param enabled whether to enable statistics collection
74+
* @return the modified {@link ExecutionOptions} instance
75+
*/
76+
public ExecutionOptions withCollectStatistics(boolean enabled) {
77+
SessionConfig.setExecutionOptionsCollectStatistics(config.getPointer(), enabled);
78+
return this;
79+
}
80+
81+
/**
82+
* Get the target number of partitions
83+
*
84+
* @return number of partitions
85+
*/
86+
public long targetPartitions() {
87+
return SessionConfig.getExecutionOptionsTargetPartitions(config.getPointer());
88+
}
89+
90+
/**
91+
* Set the target number of partitions
92+
*
93+
* @param targetPartitions the number of partitions to set
94+
* @return the modified {@link ExecutionOptions} instance
95+
*/
96+
public ExecutionOptions withTargetPartitions(long targetPartitions) {
97+
SessionConfig.setExecutionOptionsTargetPartitions(config.getPointer(), targetPartitions);
98+
return this;
99+
}
100+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
package org.apache.arrow.datafusion;
2+
3+
import java.util.Optional;
4+
5+
/** Configures options specific to reading Parquet data */
6+
@SuppressWarnings("UnusedReturnValue")
7+
public class ParquetOptions {
8+
private final SessionConfig config;
9+
10+
ParquetOptions(SessionConfig config) {
11+
this.config = config;
12+
}
13+
14+
/**
15+
* Get whether parquet data page level metadata (Page Index) statistics are used
16+
*
17+
* @return whether using the page index is enabled
18+
*/
19+
public boolean enablePageIndex() {
20+
return SessionConfig.getParquetOptionsEnablePageIndex(config.getPointer());
21+
}
22+
23+
/**
24+
* Set whether to use parquet data page level metadata (Page Index) statistics to reduce the
25+
* number of rows decoded.
26+
*
27+
* @param enabled whether using the page index is enabled
28+
* @return the modified {@link ParquetOptions} instance
29+
*/
30+
public ParquetOptions withEnablePageIndex(boolean enabled) {
31+
SessionConfig.setParquetOptionsEnablePageIndex(config.getPointer(), enabled);
32+
return this;
33+
}
34+
35+
/**
36+
* Get whether pruning is enabled, meaning reading row groups will be skipped based on metadata
37+
*
38+
* @return whether pruning is enabled
39+
*/
40+
public boolean pruning() {
41+
return SessionConfig.getParquetOptionsPruning(config.getPointer());
42+
}
43+
44+
/**
45+
* Set whether pruning is enabled, meaning reading row groups will be skipped based on metadata
46+
*
47+
* @param enabled whether to enable pruning
48+
* @return the modified {@link ParquetOptions} instance
49+
*/
50+
public ParquetOptions withPruning(boolean enabled) {
51+
SessionConfig.setParquetOptionsPruning(config.getPointer(), enabled);
52+
return this;
53+
}
54+
55+
/**
56+
* Get whether file metadata is skipped, to avoid schema conflicts
57+
*
58+
* @return whether metadata is skipped
59+
*/
60+
public boolean skipMetadata() {
61+
return SessionConfig.getParquetOptionsSkipMetadata(config.getPointer());
62+
}
63+
64+
/**
65+
* Set whether file metadata is skipped, to avoid schema conflicts
66+
*
67+
* @param enabled whether to skip metadata
68+
* @return the modified {@link ParquetOptions} instance
69+
*/
70+
public ParquetOptions withSkipMetadata(boolean enabled) {
71+
SessionConfig.setParquetOptionsSkipMetadata(config.getPointer(), enabled);
72+
return this;
73+
}
74+
75+
/**
76+
* Get the metadata size hint
77+
*
78+
* @return metadata size hint value
79+
*/
80+
public Optional<Long> metadataSizeHint() {
81+
long sizeHint = SessionConfig.getParquetOptionsMetadataSizeHint(config.getPointer());
82+
return sizeHint < 0 ? Optional.empty() : Optional.of(sizeHint);
83+
}
84+
85+
/**
86+
* Set the metadata size hint, which is used to attempt to read the full metadata at once rather
87+
* than needing one read to get the metadata size and then a second read for the metadata itself.
88+
*
89+
* @param metadataSizeHint the metadata size hint
90+
* @return the modified {@link ParquetOptions} instance
91+
*/
92+
public ParquetOptions withMetadataSizeHint(Optional<Long> metadataSizeHint) {
93+
long value = -1L;
94+
if (metadataSizeHint.isPresent()) {
95+
value = metadataSizeHint.get();
96+
if (value < 0) {
97+
throw new RuntimeException("metadataSizeHint cannot be negative");
98+
}
99+
}
100+
SessionConfig.setParquetOptionsMetadataSizeHint(config.getPointer(), value);
101+
return this;
102+
}
103+
104+
/**
105+
* Get whether filter pushdown is enabled, so filters are applied during parquet decoding
106+
*
107+
* @return whether filter pushdown is enabled
108+
*/
109+
public boolean pushdownFilters() {
110+
return SessionConfig.getParquetOptionsPushdownFilters(config.getPointer());
111+
}
112+
113+
/**
114+
* Set whether filter pushdown is enabled, so filters are applied during parquet decoding
115+
*
116+
* @param enabled whether to pushdown filters
117+
* @return the modified {@link ParquetOptions} instance
118+
*/
119+
public ParquetOptions withPushdownFilters(boolean enabled) {
120+
SessionConfig.setParquetOptionsPushdownFilters(config.getPointer(), enabled);
121+
return this;
122+
}
123+
124+
/**
125+
* Get whether filter reordering is enabled to minimize evaluation cost
126+
*
127+
* @return whether filter reordering is enabled
128+
*/
129+
public boolean reorderFilters() {
130+
return SessionConfig.getParquetOptionsReorderFilters(config.getPointer());
131+
}
132+
133+
/**
134+
* Set whether filter reordering is enabled to minimize evaluation cost
135+
*
136+
* @param enabled whether to reorder filters
137+
* @return the modified {@link ParquetOptions} instance
138+
*/
139+
public ParquetOptions withReorderFilters(boolean enabled) {
140+
SessionConfig.setParquetOptionsReorderFilters(config.getPointer(), enabled);
141+
return this;
142+
}
143+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package org.apache.arrow.datafusion;
2+
3+
import java.util.function.Consumer;
4+
5+
/** Configuration for creating a {@link SessionContext} using {@link SessionContexts#withConfig} */
6+
public class SessionConfig extends AbstractProxy implements AutoCloseable {
7+
/** Create a new default {@link SessionConfig} */
8+
public SessionConfig() {
9+
super(create());
10+
}
11+
12+
/**
13+
* Get options related to query execution
14+
*
15+
* @return {@link ExecutionOptions} instance for this config
16+
*/
17+
public ExecutionOptions executionOptions() {
18+
return new ExecutionOptions(this);
19+
}
20+
21+
/**
22+
* Get options specific to parsing SQL queries
23+
*
24+
* @return {@link SqlParserOptions} instance for this config
25+
*/
26+
public SqlParserOptions sqlParserOptions() {
27+
return new SqlParserOptions(this);
28+
}
29+
30+
/**
31+
* Modify this session configuration and then return it, to simplify use in a try-with-resources
32+
* statement
33+
*
34+
* @param configurationCallback Callback used to update the configuration
35+
* @return This {@link SessionConfig} instance after being updated
36+
*/
37+
public SessionConfig withConfiguration(Consumer<SessionConfig> configurationCallback) {
38+
configurationCallback.accept(this);
39+
return this;
40+
}
41+
42+
@Override
43+
void doClose(long pointer) {
44+
destroy(pointer);
45+
}
46+
47+
private static native long create();
48+
49+
private static native void destroy(long pointer);
50+
51+
// ExecutionOptions native methods
52+
53+
static native long getExecutionOptionsBatchSize(long pointer);
54+
55+
static native void setExecutionOptionsBatchSize(long pointer, long batchSize);
56+
57+
static native boolean getExecutionOptionsCoalesceBatches(long pointer);
58+
59+
static native void setExecutionOptionsCoalesceBatches(long pointer, boolean enabled);
60+
61+
static native boolean getExecutionOptionsCollectStatistics(long pointer);
62+
63+
static native void setExecutionOptionsCollectStatistics(long pointer, boolean enabled);
64+
65+
static native long getExecutionOptionsTargetPartitions(long pointer);
66+
67+
static native void setExecutionOptionsTargetPartitions(long pointer, long batchSize);
68+
69+
// ParquetOptions native methods
70+
71+
static native boolean getParquetOptionsEnablePageIndex(long pointer);
72+
73+
static native void setParquetOptionsEnablePageIndex(long pointer, boolean enabled);
74+
75+
static native boolean getParquetOptionsPruning(long pointer);
76+
77+
static native void setParquetOptionsPruning(long pointer, boolean enabled);
78+
79+
static native boolean getParquetOptionsSkipMetadata(long pointer);
80+
81+
static native void setParquetOptionsSkipMetadata(long pointer, boolean enabled);
82+
83+
static native long getParquetOptionsMetadataSizeHint(long pointer);
84+
85+
static native void setParquetOptionsMetadataSizeHint(long pointer, long value);
86+
87+
static native boolean getParquetOptionsPushdownFilters(long pointer);
88+
89+
static native void setParquetOptionsPushdownFilters(long pointer, boolean enabled);
90+
91+
static native boolean getParquetOptionsReorderFilters(long pointer);
92+
93+
static native void setParquetOptionsReorderFilters(long pointer, boolean enabled);
94+
95+
// SqlParserOptions native methods
96+
97+
static native boolean getSqlParserOptionsParseFloatAsDecimal(long pointer);
98+
99+
static native void setSqlParserOptionsParseFloatAsDecimal(long pointer, boolean enabled);
100+
101+
static native boolean getSqlParserOptionsEnableIdentNormalization(long pointer);
102+
103+
static native void setSqlParserOptionsEnableIdentNormalization(long pointer, boolean enabled);
104+
105+
static native String getSqlParserOptionsDialect(long pointer);
106+
107+
static native void setSqlParserOptionsDialect(long pointer, String dialect);
108+
}

0 commit comments

Comments
 (0)