Skip to content

Commit 2657626

Browse files
authored
[Feature][Paimon] Customize the hadoop user (#8888)
1 parent 5c4d42c commit 2657626

File tree

7 files changed

+97
-0
lines changed

7 files changed

+97
-0
lines changed

Diff for: docs/en/connector-v2/sink/Paimon.md

+43
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,49 @@ sink {
246246
}
247247
```
248248

249+
### Single table(Specify hadoop HA config with hadoop_user_name)
250+
251+
```hocon
252+
env {
253+
parallelism = 1
254+
job.mode = "STREAMING"
255+
checkpoint.interval = 5000
256+
}
257+
258+
source {
259+
Mysql-CDC {
260+
base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
261+
username = "root"
262+
password = "******"
263+
table-names = ["seatunnel.role"]
264+
}
265+
}
266+
267+
transform {
268+
}
269+
270+
sink {
271+
Paimon {
272+
catalog_name="seatunnel_test"
273+
warehouse="hdfs:///tmp/seatunnel/paimon/hadoop-sink/"
274+
database="seatunnel"
275+
table="role"
276+
paimon.hadoop.conf = {
277+
hadoop_user_name = "hdfs"
278+
fs.defaultFS = "hdfs://nameservice1"
279+
dfs.nameservices = "nameservice1"
280+
dfs.ha.namenodes.nameservice1 = "nn1,nn2"
281+
dfs.namenode.rpc-address.nameservice1.nn1 = "hadoop03:8020"
282+
dfs.namenode.rpc-address.nameservice1.nn2 = "hadoop04:8020"
283+
dfs.client.failover.proxy.provider.nameservice1 = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
284+
dfs.client.use.datanode.hostname = "true"
285+
security.kerberos.login.principal = "your-kerberos-principal"
286+
security.kerberos.login.keytab = "your-kerberos-keytab-path"
287+
}
288+
}
289+
}
290+
```
291+
249292
### Single table(Hive catalog)
250293

251294
```hocon

Diff for: docs/en/connector-v2/source/Paimon.md

+1
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ source {
152152
table="st_test"
153153
query = "select * from st_test where pk_id is not null and pk_id < 3"
154154
paimon.hadoop.conf = {
155+
hadoop_user_name = "hdfs"
155156
fs.defaultFS = "hdfs://nameservice1"
156157
dfs.nameservices = "nameservice1"
157158
dfs.ha.namenodes.nameservice1 = "nn1,nn2"

Diff for: docs/zh/connector-v2/sink/Paimon.md

+43
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,49 @@ sink {
244244
}
245245
```
246246

247+
### 单表(指定hadoop HA配置和指定hadoop用户名)
248+
249+
```hocon
250+
env {
251+
parallelism = 1
252+
job.mode = "STREAMING"
253+
checkpoint.interval = 5000
254+
}
255+
256+
source {
257+
Mysql-CDC {
258+
base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
259+
username = "root"
260+
password = "******"
261+
table-names = ["seatunnel.role"]
262+
}
263+
}
264+
265+
transform {
266+
}
267+
268+
sink {
269+
Paimon {
270+
catalog_name="seatunnel_test"
271+
warehouse="hdfs:///tmp/seatunnel/paimon/hadoop-sink/"
272+
database="seatunnel"
273+
table="role"
274+
paimon.hadoop.conf = {
275+
hadoop_user_name = "hdfs"
276+
fs.defaultFS = "hdfs://nameservice1"
277+
dfs.nameservices = "nameservice1"
278+
dfs.ha.namenodes.nameservice1 = "nn1,nn2"
279+
dfs.namenode.rpc-address.nameservice1.nn1 = "hadoop03:8020"
280+
dfs.namenode.rpc-address.nameservice1.nn2 = "hadoop04:8020"
281+
dfs.client.failover.proxy.provider.nameservice1 = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
282+
dfs.client.use.datanode.hostname = "true"
283+
security.kerberos.login.principal = "your-kerberos-principal"
284+
security.kerberos.login.keytab = "your-kerberos-keytab-path"
285+
}
286+
}
287+
}
288+
```
289+
247290
### 单表(使用Hive catalog)
248291

249292
```hocon

Diff for: docs/zh/connector-v2/source/Paimon.md

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ source {
154154
table="st_test"
155155
query = "select * from st_test where pk_id is not null and pk_id < 3"
156156
paimon.hadoop.conf = {
157+
hadoop_user_name = "hdfs"
157158
fs.defaultFS = "hdfs://nameservice1"
158159
dfs.nameservices = "nameservice1"
159160
dfs.ha.namenodes.nameservice1 = "nn1,nn2"

Diff for: seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalogLoader.java

+7
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import org.apache.commons.lang3.StringUtils;
2727
import org.apache.hadoop.conf.Configuration;
28+
import org.apache.hadoop.security.UserGroupInformation;
2829
import org.apache.paimon.catalog.Catalog;
2930
import org.apache.paimon.catalog.CatalogContext;
3031
import org.apache.paimon.catalog.CatalogFactory;
@@ -50,6 +51,8 @@ public class PaimonCatalogLoader implements Serializable {
5051

5152
private static final String HDFS_IMPL_KEY = "fs.hdfs.impl";
5253

54+
private static final String HADOOP_USER_NAME = "hadoop_user_name";
55+
5356
private String warehouse;
5457
private PaimonCatalogEnum catalogType;
5558
private String catalogUri;
@@ -72,6 +75,10 @@ public Catalog loadCatalog() {
7275
if (warehouse.startsWith(HDFS_PREFIX)) {
7376
checkConfiguration(paimonHadoopConfiguration, HDFS_DEF_FS_NAME);
7477
paimonHadoopConfiguration.set(HDFS_IMPL_KEY, HDFS_IMPL);
78+
String username = paimonHadoopConfiguration.get(HADOOP_USER_NAME);
79+
if (StringUtils.isNotBlank(username)) {
80+
UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser(username));
81+
}
7582
} else if (warehouse.startsWith(S3A_PREFIX)) {
7683
optionsMap.putAll(paimonHadoopConfiguration.getPropsWithPrefix(StringUtils.EMPTY));
7784
}

Diff for: seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/fake_sink_paimon_truncate_with_hdfs_case1.conf

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ sink {
6868
database = "seatunnel_namespace11"
6969
table = "st_test"
7070
paimon.hadoop.conf = {
71+
hadoop_user_name = "hdfs"
7172
fs.defaultFS = "hdfs://nameservice1"
7273
dfs.nameservices = "nameservice1"
7374
dfs.ha.namenodes.nameservice1 = "nn1,nn2"

Diff for: seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/fake_sink_paimon_truncate_with_hdfs_case2.conf

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ sink {
5353
table = "st_test"
5454
data_save_mode=DROP_DATA
5555
paimon.hadoop.conf = {
56+
hadoop_user_name = "hdfs"
5657
fs.defaultFS = "hdfs://nameservice1"
5758
dfs.nameservices = "nameservice1"
5859
dfs.ha.namenodes.nameservice1 = "nn1,nn2"

0 commit comments

Comments
 (0)