-
Notifications
You must be signed in to change notification settings - Fork 2k
[Feature][Clickhouse] Support multi-table source read #7529
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
c6427c8
c732ca5
3cf3d2c
6a667c6
63b6d7d
75a5a19
5bb68cc
aa515fc
6ef832a
7f32a8a
5fa9fbc
7ef2baa
813f763
471a473
9b4076e
d126d28
0f67c8e
c173cbe
cd7e8a1
4a3e791
818d215
6e40d89
56ed9da
cdd1300
7e6dc65
1a6f302
bfc6826
9623a6a
78c48a0
719df6a
198ef0f
af1b2b2
6d63449
8e4b2a0
4ec9c4f
ac979ef
4c12b00
db635ef
f22f187
402fabc
a27cda1
8117937
11c3411
6c006b2
c8343b8
34be2f4
d26fe3a
ae14b4a
732d0e8
7e25dbc
250f678
4b21681
369cf4b
982a5b7
0c94f95
78d55b3
d7cf336
dd3be4b
563b453
42ccd5c
26d5d89
d6f7f11
9216ce1
c32397d
a28b009
d3c3d5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -95,6 +95,49 @@ sink { | |
} | ||
``` | ||
|
||
### Multi Table read | ||
|
||
> This is a multi-table read case | ||
|
||
```bash | ||
|
||
|
||
env { | ||
parallelism = 1 | ||
job.mode = "BATCH" | ||
} | ||
|
||
source { | ||
Clickhouse { | ||
host = "clickhouse:8123" | ||
database = "default" | ||
username = "default" | ||
password = "" | ||
table_list = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add to |
||
{ | ||
table_path = "t1" | ||
sql = "select * from t1 where 1=1 " | ||
|
||
}, | ||
{ | ||
table_path = "t2", | ||
sql = "select * from t2" | ||
} | ||
] | ||
} | ||
} | ||
|
||
sink { | ||
Clickhouse { | ||
host = "clickhouse:8123" | ||
database = "default" | ||
table = "t3" | ||
username = "default" | ||
password = "" | ||
} | ||
} | ||
``` | ||
|
||
### Tips | ||
|
||
> 1.[SeaTunnel Deployment Document](../../start-v2/locally/deployment.md). | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.seatunnel.connectors.seatunnel.clickhouse.config; | ||
|
||
import org.apache.seatunnel.api.table.catalog.CatalogTable; | ||
|
||
import lombok.Data; | ||
|
||
import java.io.Serializable; | ||
|
||
@Data | ||
public class ClickhouseCatalogConfig implements Serializable { | ||
|
||
private String sql; | ||
private CatalogTable catalogTable; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,70 +17,193 @@ | |
|
||
package org.apache.seatunnel.connectors.seatunnel.clickhouse.source; | ||
|
||
import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; | ||
import org.apache.seatunnel.api.configuration.ReadonlyConfig; | ||
import org.apache.seatunnel.api.source.Boundedness; | ||
import org.apache.seatunnel.api.source.SeaTunnelSource; | ||
import org.apache.seatunnel.api.source.SourceReader; | ||
import org.apache.seatunnel.api.source.SourceSplitEnumerator; | ||
import org.apache.seatunnel.api.source.SupportColumnProjection; | ||
import org.apache.seatunnel.api.source.SupportParallelism; | ||
import org.apache.seatunnel.api.table.catalog.CatalogTable; | ||
import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; | ||
import org.apache.seatunnel.api.table.catalog.PhysicalColumn; | ||
import org.apache.seatunnel.api.table.catalog.TableIdentifier; | ||
import org.apache.seatunnel.api.table.catalog.TablePath; | ||
import org.apache.seatunnel.api.table.catalog.TableSchema; | ||
import org.apache.seatunnel.api.table.type.SeaTunnelDataType; | ||
import org.apache.seatunnel.api.table.type.SeaTunnelRow; | ||
import org.apache.seatunnel.api.table.type.SeaTunnelRowType; | ||
import org.apache.seatunnel.common.constants.PluginType; | ||
import org.apache.seatunnel.common.utils.ExceptionUtils; | ||
import org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseCatalogConfig; | ||
import org.apache.seatunnel.connectors.seatunnel.clickhouse.exception.ClickhouseConnectorException; | ||
import org.apache.seatunnel.connectors.seatunnel.clickhouse.state.ClickhouseSourceState; | ||
import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.ClickhouseUtil; | ||
import org.apache.seatunnel.connectors.seatunnel.clickhouse.util.TypeConvertUtil; | ||
|
||
import org.apache.commons.collections4.map.HashedMap; | ||
|
||
import com.clickhouse.client.ClickHouseClient; | ||
import com.clickhouse.client.ClickHouseException; | ||
import com.clickhouse.client.ClickHouseFormat; | ||
import com.clickhouse.client.ClickHouseNode; | ||
import com.clickhouse.client.ClickHouseResponse; | ||
|
||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.CLICKHOUSE_CONFIG; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.DATABASE; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.HOST; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.PASSWORD; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.SERVER_TIME_ZONE; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseBaseOptions.USERNAME; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseSourceOptions.SQL; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseSourceOptions.TABLE_LIST; | ||
import static org.apache.seatunnel.connectors.seatunnel.clickhouse.config.ClickhouseSourceOptions.TABLE_PATH; | ||
|
||
public class ClickhouseSource | ||
implements SeaTunnelSource<SeaTunnelRow, ClickhouseSourceSplit, ClickhouseSourceState>, | ||
SupportParallelism, | ||
SupportColumnProjection { | ||
|
||
private List<ClickHouseNode> servers; | ||
private CatalogTable catalogTable; | ||
private String sql; | ||
private HashedMap<TablePath, ClickhouseCatalogConfig> tableClickhouseCatalogConfigMap = | ||
new HashedMap<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use |
||
|
||
public ClickhouseSource(List<ClickHouseNode> servers, CatalogTable catalogTable, String sql) { | ||
this.servers = servers; | ||
this.catalogTable = catalogTable; | ||
this.sql = sql; | ||
} | ||
private static final String defaultTablePath = "default"; | ||
|
||
@Override | ||
public String getPluginName() { | ||
return "Clickhouse"; | ||
} | ||
|
||
public ClickhouseSource(ReadonlyConfig readonlyConfig) { | ||
Map<String, String> customConfig = | ||
readonlyConfig.getOptional(CLICKHOUSE_CONFIG).orElse(null); | ||
servers = | ||
ClickhouseUtil.createNodes( | ||
readonlyConfig.get(HOST), | ||
readonlyConfig.get(DATABASE), | ||
readonlyConfig.get(SERVER_TIME_ZONE), | ||
readonlyConfig.get(USERNAME), | ||
readonlyConfig.get(PASSWORD), | ||
customConfig); | ||
ClickHouseNode currentServer = | ||
servers.get(ThreadLocalRandom.current().nextInt(servers.size())); | ||
String sql = readonlyConfig.getOptional(SQL).orElse(null); | ||
|
||
if (readonlyConfig.getOptional(TABLE_LIST).isPresent()) { | ||
readonlyConfig.get(TABLE_LIST).stream() | ||
.map(ReadonlyConfig::fromMap) | ||
.forEach( | ||
conf -> { | ||
String confSql = conf.getOptional(SQL).get(); | ||
SeaTunnelRowType clickhouseRowType = | ||
getClickhouseRowType(currentServer, confSql); | ||
TablePath tablePath = | ||
TablePath.of(conf.getOptional(TABLE_PATH).orElse("")); | ||
CatalogTable catalogTable = | ||
createCatalogTable(clickhouseRowType, tablePath); | ||
|
||
ClickhouseCatalogConfig clickhouseCatalogConfig = | ||
new ClickhouseCatalogConfig(); | ||
clickhouseCatalogConfig.setSql(confSql); | ||
clickhouseCatalogConfig.setCatalogTable(catalogTable); | ||
tableClickhouseCatalogConfigMap.put( | ||
tablePath, clickhouseCatalogConfig); | ||
}); | ||
} else { | ||
SeaTunnelRowType clickhouseRowType = getClickhouseRowType(currentServer, sql); | ||
CatalogTable catalogTable = | ||
CatalogTableUtil.getCatalogTable(defaultTablePath, clickhouseRowType); | ||
|
||
ClickhouseCatalogConfig clickhouseCatalogConfig = new ClickhouseCatalogConfig(); | ||
clickhouseCatalogConfig.setCatalogTable(catalogTable); | ||
clickhouseCatalogConfig.setSql(sql); | ||
tableClickhouseCatalogConfigMap.put( | ||
TablePath.of(defaultTablePath), clickhouseCatalogConfig); | ||
} | ||
} | ||
|
||
private CatalogTable createCatalogTable(SeaTunnelRowType rowType, TablePath tablePath) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
TableSchema.Builder schemaBuilder = TableSchema.builder(); | ||
for (int i = 0; i < rowType.getTotalFields(); i++) { | ||
schemaBuilder.column( | ||
PhysicalColumn.of( | ||
rowType.getFieldName(i), rowType.getFieldType(i), 0, true, null, null)); | ||
} | ||
return CatalogTable.of( | ||
TableIdentifier.of("", tablePath), | ||
schemaBuilder.build(), | ||
Collections.emptyMap(), | ||
Collections.emptyList(), | ||
null); | ||
} | ||
|
||
public SeaTunnelRowType getClickhouseRowType(ClickHouseNode currentServer, String sql) { | ||
try (ClickHouseClient client = ClickHouseClient.newInstance(currentServer.getProtocol()); | ||
ClickHouseResponse response = | ||
client.connect(currentServer) | ||
.format(ClickHouseFormat.RowBinaryWithNamesAndTypes) | ||
.query(String.format("SELECT * FROM (%s) s LIMIT 1", sql)) | ||
.executeAndWait()) { | ||
|
||
int columnSize = response.getColumns().size(); | ||
String[] fieldNames = new String[columnSize]; | ||
SeaTunnelDataType<?>[] seaTunnelDataTypes = new SeaTunnelDataType[columnSize]; | ||
|
||
for (int i = 0; i < columnSize; i++) { | ||
fieldNames[i] = response.getColumns().get(i).getColumnName(); | ||
seaTunnelDataTypes[i] = TypeConvertUtil.convert(response.getColumns().get(i)); | ||
} | ||
|
||
return new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); | ||
} catch (ClickHouseException e) { | ||
throw new ClickhouseConnectorException( | ||
SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, | ||
String.format( | ||
"PluginName: %s, PluginType: %s, Message: %s", | ||
getPluginName(), PluginType.SOURCE, ExceptionUtils.getMessage(e))); | ||
} | ||
} | ||
|
||
@Override | ||
public Boundedness getBoundedness() { | ||
return Boundedness.BOUNDED; | ||
} | ||
|
||
@Override | ||
public List<CatalogTable> getProducedCatalogTables() { | ||
return Collections.singletonList(catalogTable); | ||
return tableClickhouseCatalogConfigMap.entrySet().stream() | ||
.map(conf -> conf.getValue().getCatalogTable()) | ||
.collect(Collectors.toList()); | ||
} | ||
|
||
@Override | ||
public SourceReader<SeaTunnelRow, ClickhouseSourceSplit> createReader( | ||
SourceReader.Context readerContext) throws Exception { | ||
return new ClickhouseSourceReader( | ||
servers, readerContext, this.catalogTable.getSeaTunnelRowType(), sql); | ||
return new ClickhouseSourceReader(servers, readerContext); | ||
} | ||
|
||
@Override | ||
public SourceSplitEnumerator<ClickhouseSourceSplit, ClickhouseSourceState> createEnumerator( | ||
SourceSplitEnumerator.Context<ClickhouseSourceSplit> enumeratorContext) | ||
throws Exception { | ||
return new ClickhouseSourceSplitEnumerator(enumeratorContext); | ||
return new ClickhouseSourceSplitEnumerator( | ||
enumeratorContext, tableClickhouseCatalogConfigMap); | ||
} | ||
|
||
@Override | ||
public SourceSplitEnumerator<ClickhouseSourceSplit, ClickhouseSourceState> restoreEnumerator( | ||
SourceSplitEnumerator.Context<ClickhouseSourceSplit> enumeratorContext, | ||
ClickhouseSourceState checkpointState) | ||
throws Exception { | ||
return new ClickhouseSourceSplitEnumerator(enumeratorContext); | ||
return new ClickhouseSourceSplitEnumerator( | ||
enumeratorContext, tableClickhouseCatalogConfigMap, checkpointState); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you update zh doc too? Thanks.