Skip to content

Commit 5c6a060

Browse files
committed
add paimon hadoop remote storage
1 parent e9da6d6 commit 5c6a060

File tree

1 file changed

+97
-41
lines changed

1 file changed

+97
-41
lines changed

website/docs/quickstart/flink.md

Lines changed: 97 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,55 @@ cd fluss-quickstart-flink
3737

3838
```yaml
3939
services:
40+
zookeeper:
41+
restart: always
42+
image: zookeeper:3.9.2
43+
namenode:
44+
image: apache/hadoop:3.3.6
45+
hostname: namenode
46+
user: root
47+
command: [ "hdfs", "namenode" ]
48+
ports:
49+
- 9870:9870
50+
- 8020:8020
51+
environment:
52+
ENSURE_NAMENODE_DIR: "/tmp/hadoop/dfs/name"
53+
CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
54+
CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
55+
HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
56+
HDFS-SITE.XML_dfs.replication: 1
57+
HDFS-SITE.XML_dfs.permissions.enabled: false
58+
HDFS-SITE.XML_dfs.datanode.address: datanode:9866
59+
healthcheck:
60+
test: ["CMD", "hdfs dfs -ls /"]
61+
interval: 10s
62+
timeout: 10s
63+
retries: 20
64+
65+
datanode:
66+
image: apache/hadoop:3.3.6
67+
user: root
68+
command: [ "hdfs", "datanode" ]
69+
environment:
70+
CORE-SITE.XML_fs.defaultFS: hdfs://namenode:8020
71+
CORE-SITE.XML_hadoop.tmp.dir: /hadoop/tmp
72+
HDFS-SITE.XML_dfs.namenode.rpc-address: namenode:8020
73+
HDFS-SITE.XML_dfs.replication: 1
74+
HDFS-SITE.XML_dfs.permissions.enabled: false
75+
HDFS-SITE.XML_dfs.datanode.address: datanode:9866
76+
depends_on:
77+
- namenode
78+
4079
#begin Fluss cluster
4180
coordinator-server:
4281
image: fluss/fluss:$FLUSS_DOCKER_VERSION$
43-
command: coordinatorServer
4482
depends_on:
45-
- zookeeper
83+
namenode:
84+
condition: service_healthy
85+
zookeeper:
86+
condition: service_started
87+
datanode:
88+
condition: service_started
4689
environment:
4790
- |
4891
FLUSS_PROPERTIES=
@@ -51,9 +94,10 @@ services:
5194
remote.data.dir: /tmp/fluss/remote-data
5295
datalake.format: paimon
5396
datalake.paimon.metastore: filesystem
54-
datalake.paimon.warehouse: /tmp/paimon
97+
datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
5598
volumes:
56-
- shared-tmpfs:/tmp/paimon
99+
- ./lib:/tmp/lib
100+
entrypoint: [ "sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer" ]
57101
tablet-server:
58102
image: fluss/fluss:$FLUSS_DOCKER_VERSION$
59103
command: tabletServer
@@ -66,15 +110,11 @@ services:
66110
bind.listeners: FLUSS://tablet-server:9123
67111
data.dir: /tmp/fluss/data
68112
remote.data.dir: /tmp/fluss/remote-data
69-
kv.snapshot.interval: 0s
113+
kv.snapshot.interval: 30s
70114
datalake.format: paimon
71115
datalake.paimon.metastore: filesystem
72-
datalake.paimon.warehouse: /tmp/paimon
73-
volumes:
74-
- shared-tmpfs:/tmp/paimon
75-
zookeeper:
76-
restart: always
77-
image: zookeeper:3.9.2
116+
datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
117+
78118
#end
79119
#begin Flink cluster
80120
jobmanager:
@@ -86,8 +126,6 @@ services:
86126
- |
87127
FLINK_PROPERTIES=
88128
jobmanager.rpc.address: jobmanager
89-
volumes:
90-
- shared-tmpfs:/tmp/paimon
91129
taskmanager:
92130
image: fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
93131
depends_on:
@@ -100,16 +138,7 @@ services:
100138
taskmanager.numberOfTaskSlots: 10
101139
taskmanager.memory.process.size: 2048m
102140
taskmanager.memory.framework.off-heap.size: 256m
103-
volumes:
104-
- shared-tmpfs:/tmp/paimon
105141
#end
106-
107-
volumes:
108-
shared-tmpfs:
109-
driver: local
110-
driver_opts:
111-
type: "tmpfs"
112-
device: "tmpfs"
113142
```
114143

115144
The Docker Compose environment consists of the following containers:
@@ -346,6 +375,33 @@ The following SQL query should return an empty result.
346375
SELECT * FROM fluss_customer WHERE `cust_key` = 1;
347376
```
348377

378+
## Fluss Remote Storage
379+
380+
Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
381+
```shell
382+
docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
383+
```
384+
385+
**Sample Output:**
386+
```shell
387+
hdfs://namenode:8020/fluss-data
388+
└── kv
389+
└── fluss
390+
├── enriched_orders-3
391+
│ └── 0
392+
│ ├── shared
393+
│ │ ├── 71fca534-ecca-489b-a19a-bd0538c9f9e9
394+
│ │ ├── b06ef3a3-2873-470e-961f-da25582136a1
395+
│ │ └── b93bad5c-00fb-4e62-8217-71b010621479
396+
│ └── snap-2
397+
│ ├── _METADATA
398+
│ ├── 08d39726-f847-4401-8f31-4e905f2ba3f6
399+
│ ├── b6a7bc2c-b5c3-4eeb-a523-b2b6fff159f3
400+
│ └── e6278555-d71f-431f-954e-71bf066dd29f
401+
├── fluss_customer-1
402+
... # Remaining entries omitted for brevity
403+
```
404+
349405
## Integrate with Paimon
350406
### Start the Lakehouse Tiering Service
351407
To integrate with [Apache Paimon](https://paimon.apache.org/), you need to start the `Lakehouse Tiering Service`.
@@ -473,30 +529,30 @@ The result looks like:
473529
```
474530
You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
475531

476-
Finally, you can use the following command to view the files stored in Paimon:
532+
### Storage
533+
534+
Finally, you can use the following command to view the files stored in Paimon Hadoop warehouse:
477535
```shell
478-
docker compose exec taskmanager tree /tmp/paimon/fluss.db
536+
docker compose exec namenode hdfs dfs -ls -R /fluss-lake/ | awk '{print $8}' | grep -v '^$' | tree --fromfile .
479537
```
480538

481539
**Sample Output:**
482540
```shell
483-
/tmp/paimon/fluss.db
484-
└── datalake_enriched_orders
485-
├── bucket-0
486-
│ ├── changelog-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-0.orc
487-
│ └── data-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-1.orc
488-
├── manifest
489-
│ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-0
490-
│ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-1
491-
│ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-0
492-
│ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-1
493-
│ └── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-2
494-
├── schema
495-
│ └── schema-0
496-
└── snapshot
497-
├── EARLIEST
498-
├── LATEST
499-
└── snapshot-1
541+
hdfs://namenode:8020/fluss-lake
542+
├── default.db
543+
└── fluss.db
544+
└── datalake_enriched_orders
545+
├── bucket-0
546+
│ └── data-02acf76d-c4cc-4bc1-9292-e64a77dfcc72-0.parquet
547+
├── manifest
548+
│ ├── manifest-df5b6833-7e92-4ec9-a196-51d6fd60b1d1-0
549+
│ ├── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-0
550+
│ └── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-1
551+
├── schema
552+
│ └── schema-0
553+
└── snapshot
554+
├── LATEST
555+
└── snapshot-1
500556
```
501557
The files adhere to Paimon's standard format, enabling seamless querying with other engines such as [StarRocks](https://docs.starrocks.io/docs/data_source/catalog/paimon_catalog/).
502558

0 commit comments

Comments
 (0)