Skip to content

Commit 4d8d5b7

Browse files
authored
Merge pull request #750 from gknz/feature/jdbc-sql-table-sink
Add in-database JdbcTableSinkOperator for SQL-level table writes
2 parents 796f1fa + e6901a3 commit 4d8d5b7

14 files changed

Lines changed: 802 additions & 10 deletions

File tree

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,3 @@ pom.xml.*
3535
# Scala Plugin for VSCode
3636
.metals
3737
.bloop/
38-

wayang-platforms/wayang-generic-jdbc/src/main/java/org/apache/wayang/genericjdbc/mapping/Mappings.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ public class Mappings {
3030

3131
public static final Collection<Mapping> ALL = Arrays.asList(
3232
new FilterMapping(),
33-
new ProjectionMapping()
33+
new ProjectionMapping(),
34+
new TableSinkMapping()
3435
);
3536

3637
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.wayang.genericjdbc.mapping;
20+
21+
import org.apache.wayang.basic.operators.TableSink;
22+
import org.apache.wayang.core.mapping.Mapping;
23+
import org.apache.wayang.core.mapping.OperatorPattern;
24+
import org.apache.wayang.core.mapping.PlanTransformation;
25+
import org.apache.wayang.core.mapping.ReplacementSubplanFactory;
26+
import org.apache.wayang.core.mapping.SubplanPattern;
27+
import org.apache.wayang.genericjdbc.operators.GenericJdbcTableSinkOperator;
28+
import org.apache.wayang.genericjdbc.platform.GenericJdbcPlatform;
29+
30+
import java.util.Collection;
31+
import java.util.Collections;
32+
33+
/**
34+
* Mapping from {@link TableSink} to {@link GenericJdbcTableSinkOperator}.
35+
*/
36+
public class TableSinkMapping implements Mapping {
37+
38+
@Override
39+
public Collection<PlanTransformation> getTransformations() {
40+
return Collections.singleton(new PlanTransformation(
41+
this.createSubplanPattern(),
42+
this.createReplacementSubplanFactory(),
43+
GenericJdbcPlatform.getInstance()
44+
));
45+
}
46+
47+
private SubplanPattern createSubplanPattern() {
48+
final OperatorPattern<TableSink> operatorPattern = new OperatorPattern<>(
49+
"sink", new TableSink<>(null, null, null), false
50+
);
51+
return SubplanPattern.createSingleton(operatorPattern);
52+
}
53+
54+
private ReplacementSubplanFactory createReplacementSubplanFactory() {
55+
return new ReplacementSubplanFactory.OfSingleOperators<TableSink>(
56+
(matchedOperator, epoch) -> new GenericJdbcTableSinkOperator(matchedOperator).at(epoch)
57+
);
58+
}
59+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.wayang.genericjdbc.operators;
20+
21+
import org.apache.wayang.basic.operators.TableSink;
22+
import org.apache.wayang.basic.data.Record;
23+
import org.apache.wayang.jdbc.operators.JdbcTableSinkOperator;
24+
25+
/**
26+
* GenericJdbc implementation of the {@link JdbcTableSinkOperator}.
27+
*/
28+
public class GenericJdbcTableSinkOperator extends JdbcTableSinkOperator implements GenericJdbcExecutionOperator {
29+
30+
public GenericJdbcTableSinkOperator(String tableName, String[] columnNames) {
31+
super(tableName, columnNames);
32+
}
33+
34+
public GenericJdbcTableSinkOperator(TableSink<Record> that) {
35+
super(that);
36+
}
37+
}

wayang-platforms/wayang-jdbc-template/src/main/java/org/apache/wayang/jdbc/execution/JdbcExecutor.java

Lines changed: 90 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.apache.wayang.jdbc.operators.JdbcFilterOperator;
4545
import org.apache.wayang.jdbc.operators.JdbcJoinOperator;
4646
import org.apache.wayang.jdbc.operators.JdbcProjectionOperator;
47+
import org.apache.wayang.jdbc.operators.JdbcTableSinkOperator;
4748
import org.apache.wayang.jdbc.operators.JdbcTableSource;
4849
import org.apache.wayang.jdbc.platform.JdbcPlatformTemplate;
4950
import org.apache.logging.log4j.LogManager;
@@ -56,6 +57,7 @@
5657
import java.sql.ResultSet;
5758
import java.sql.ResultSetMetaData;
5859
import java.sql.SQLException;
60+
import java.sql.Statement;
5961
import java.util.ArrayList;
6062
import java.util.Collection;
6163
import java.util.Set;
@@ -82,14 +84,96 @@ public JdbcExecutor(final JdbcPlatformTemplate platform, final Job job) {
8284

8385
@Override
8486
public void execute(final ExecutionStage stage, final OptimizationContext optimizationContext, final ExecutionState executionState) {
85-
final Tuple2<String, SqlQueryChannel.Instance> pair = JdbcExecutor.createSqlQuery(stage, optimizationContext, this);
86-
final String query = pair.field0;
87-
final SqlQueryChannel.Instance queryChannel = pair.field1;
87+
// Check if this stage ends with a sink operator
88+
final Collection<?> termTasks = stage.getTerminalTasks();
89+
assert termTasks.size() == 1 : "Invalid JDBC stage: multiple terminal tasks are not currently supported.";
90+
final ExecutionTask termTask = (ExecutionTask) termTasks.toArray()[0];
8891

89-
queryChannel.setSqlQuery(query);
92+
if (termTask.getOperator() instanceof JdbcTableSinkOperator) {
93+
// If it is a sink stage: compose and execute SQL directly within the database
94+
JdbcExecutor.executeSinkStage(stage, optimizationContext, this);
95+
} else {
96+
//If it is normal stage: compose SQL and store in channel for downstream consumption
97+
final Tuple2<String, SqlQueryChannel.Instance> pair = JdbcExecutor.createSqlQuery(stage, optimizationContext, this);
98+
final String query = pair.field0;
99+
final SqlQueryChannel.Instance queryChannel = pair.field1;
100+
queryChannel.setSqlQuery(query);
101+
executionState.register(queryChannel);
102+
}
103+
}
104+
105+
/**
106+
* Handles execution stages that end with a {@link JdbcTableSinkOperator}.
107+
* Composes a SQL query from the stage's operators and executes it directly
108+
* on the database connection, keeping all data within the database.
109+
*
110+
* @param stage the execution stage ending with a sink
111+
* @param optimizationContext provides optimization information
112+
* @param jdbcExecutor the executor with the database connection
113+
*/
114+
private static void executeSinkStage(final ExecutionStage stage,
115+
final OptimizationContext optimizationContext,
116+
final JdbcExecutor jdbcExecutor) {
117+
final Collection<?> startTasks = stage.getStartTasks();
118+
final Collection<?> termTasks = stage.getTerminalTasks();
119+
120+
assert startTasks.size() == 1 : "Invalid JDBC stage: multiple sources are not currently supported";
121+
final ExecutionTask startTask = (ExecutionTask) startTasks.toArray()[0];
122+
assert termTasks.size() == 1 : "Invalid JDBC stage: multiple terminal tasks are not currently supported.";
123+
final ExecutionTask termTask = (ExecutionTask) termTasks.toArray()[0];
124+
assert startTask.getOperator() instanceof TableSource
125+
: "Invalid JDBC stage: Start task has to be a TableSource";
126+
assert termTask.getOperator() instanceof JdbcTableSinkOperator
127+
: "Invalid JDBC stage: Terminal task has to be a JdbcTableSinkOperator";
90128

91-
// Return the tipChannelInstance.
92-
executionState.register(queryChannel);
129+
// Extract operators from the stage
130+
final JdbcTableSource tableOp = (JdbcTableSource) startTask.getOperator();
131+
final JdbcTableSinkOperator sinkOp = (JdbcTableSinkOperator) termTask.getOperator();
132+
final Collection<JdbcFilterOperator> filterTasks = new ArrayList<>(4);
133+
JdbcProjectionOperator projectionTask = null;
134+
final Collection<JdbcJoinOperator<?>> joinTasks = new ArrayList<>();
135+
136+
// Walk through intermediate operators, stopping at the sink
137+
ExecutionTask nextTask = JdbcExecutor.findJdbcExecutionOperatorTaskInStage(startTask, stage);
138+
while (nextTask != null && !(nextTask.getOperator() instanceof JdbcTableSinkOperator)) {
139+
if (nextTask.getOperator() instanceof final JdbcFilterOperator filterOperator) {
140+
filterTasks.add(filterOperator);
141+
} else if (nextTask.getOperator() instanceof JdbcProjectionOperator projectionOperator) {
142+
assert projectionTask == null;
143+
projectionTask = projectionOperator;
144+
} else if (nextTask.getOperator() instanceof JdbcJoinOperator joinOperator) {
145+
joinTasks.add(joinOperator);
146+
} else {
147+
throw new WayangException(String.format("Unsupported JDBC execution task %s", nextTask.toString()));
148+
}
149+
nextTask = JdbcExecutor.findJdbcExecutionOperatorTaskInStage(nextTask, stage);
150+
}
151+
152+
// Compose the SELECT query
153+
final StringBuilder selectQuery = createSqlString(jdbcExecutor, tableOp, filterTasks, projectionTask, joinTasks);
154+
155+
// Remove trailing semicolon from SELECT
156+
String selectSql = selectQuery.toString();
157+
if (selectSql.endsWith(";")) {
158+
selectSql = selectSql.substring(0, selectSql.length() - 1);
159+
}
160+
161+
// Get the sink's SQL clause
162+
final String sinkClause = sinkOp.createSqlClause(jdbcExecutor.connection, jdbcExecutor.functionCompiler);
163+
164+
// Execute on the database
165+
try (Statement stmt = jdbcExecutor.connection.createStatement()) {
166+
// Handle overwrite: drop existing table first
167+
if ("overwrite".equals(sinkOp.getMode())) {
168+
stmt.execute("DROP TABLE IF EXISTS " + sinkOp.getTableName());
169+
}
170+
// Execute the composed query: CREATE TABLE x AS SELECT ... or INSERT INTO x SELECT ...
171+
final String fullSql = sinkClause + " " + selectSql + sinkOp.createSqlSuffix();
172+
stmt.execute(fullSql);
173+
jdbcExecutor.logger.info("Executed SQL sink: {}", fullSql);
174+
} catch (SQLException e) {
175+
throw new WayangException("Failed to execute SQL sink on table: " + sinkOp.getTableName(), e);
176+
}
93177
}
94178

95179
/**
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.wayang.jdbc.operators;
20+
21+
import org.apache.wayang.basic.data.Record;
22+
import org.apache.wayang.basic.operators.TableSink;
23+
import org.apache.wayang.core.api.Configuration;
24+
import org.apache.wayang.core.optimizer.costs.LoadProfileEstimator;
25+
import org.apache.wayang.core.platform.ChannelDescriptor;
26+
import org.apache.wayang.jdbc.compiler.FunctionCompiler;
27+
28+
import java.sql.Connection;
29+
import java.util.Collections;
30+
import java.util.List;
31+
import java.util.Optional;
32+
33+
/**
34+
* Abstract JDBC-based implementation of {@link TableSink} that operates within
35+
* the {@link org.apache.wayang.jdbc.channels.SqlQueryChannel} ecosystem.
36+
* Instead of pulling data into Java/Spark memory and inserting via JDBC,
37+
* this operator wraps the composed SQL query in a CREATE TABLE AS SELECT
38+
* or INSERT INTO ... SELECT statement, keeping all data within the database.
39+
*/
40+
public abstract class JdbcTableSinkOperator extends TableSink<Record> implements JdbcExecutionOperator {
41+
42+
public JdbcTableSinkOperator(String tableName, String[] columnNames) {
43+
super(null, null, tableName, columnNames);
44+
}
45+
46+
public JdbcTableSinkOperator(TableSink<Record> that) {
47+
super(that);
48+
}
49+
50+
@Override
51+
public String createSqlClause(Connection connection, FunctionCompiler compiler) {
52+
String mode = this.getMode();
53+
if ("overwrite".equals(mode)) {
54+
return "CREATE TABLE " + this.getTableName() + " AS";
55+
}
56+
return "INSERT INTO " + this.getTableName();
57+
}
58+
59+
/**
60+
* Returns a SQL suffix appended after the composed SELECT query.
61+
* Default is empty, which works for most databases (PostgreSQL, SQLite, MySQL).
62+
* Subclasses can potentiallyoverride for dialect-specific syntax (e.g., HSQLDB that we used for the tests requires
63+
* parenthesized subquery form: {@code CREATE TABLE x AS (SELECT ...)}).
64+
*/
65+
public String createSqlSuffix() {
66+
return "";
67+
}
68+
69+
@Override
70+
public List<ChannelDescriptor> getSupportedInputChannels(int index) {
71+
return Collections.singletonList(this.getPlatform().getSqlQueryChannelDescriptor());
72+
}
73+
74+
@Override
75+
public List<ChannelDescriptor> getSupportedOutputChannels(int index) {
76+
throw new UnsupportedOperationException("This operator has no outputs.");
77+
}
78+
79+
@Override
80+
public String getLoadProfileEstimatorConfigurationKey() {
81+
return String.format("wayang.%s.tablesink.load", this.getPlatform().getPlatformId());
82+
}
83+
84+
@Override
85+
public Optional<LoadProfileEstimator> createLoadProfileEstimator(Configuration configuration) {
86+
return JdbcExecutionOperator.super.createLoadProfileEstimator(configuration);
87+
}
88+
}

0 commit comments

Comments
 (0)