@@ -37,23 +37,61 @@ cd fluss-quickstart-paimon
3737
3838``` yaml
3939services :
40+ # begin Hadoop cluster
41+ namenode :
42+ image : apache/hadoop:3.3.6
43+ hostname : namenode
44+ user : root
45+ command : [ "hdfs", "namenode" ]
46+ ports :
47+ - 9870:9870
48+ - 8020:8020
49+ environment :
50+ ENSURE_NAMENODE_DIR : " /tmp/hadoop/dfs/name"
51+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
52+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
53+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
54+ HDFS-SITE.XML_dfs.replication : 1
55+ HDFS-SITE.XML_dfs.permissions.enabled : false
56+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
57+ healthcheck :
58+ test : ["CMD", "hdfs dfs -test -d / && exit 0 || exit 1"]
59+ interval : 15s
60+ timeout : 10s
61+ retries : 20
62+
63+ datanode :
64+ image : apache/hadoop:3.3.6
65+ user : root
66+ command : [ "hdfs", "datanode" ]
67+ environment :
68+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
69+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
70+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
71+ HDFS-SITE.XML_dfs.replication : 1
72+ HDFS-SITE.XML_dfs.permissions.enabled : false
73+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
74+ depends_on :
75+ - namenode
76+ # end
4077 # begin Fluss cluster
4178 coordinator-server :
4279 image : apache/fluss:$FLUSS_DOCKER_VERSION$
4380 command : coordinatorServer
4481 depends_on :
45- - zookeeper
82+ namenode :
83+ condition : service_healthy
84+ zookeeper :
85+ condition : service_started
4686 environment :
4787 - |
4888 FLUSS_PROPERTIES=
4989 zookeeper.address: zookeeper:2181
5090 bind.listeners: FLUSS://coordinator-server:9123
51- remote.data.dir: /tmp/fluss/remote -data
91+ remote.data.dir: hdfs://namenode:8020/fluss -data
5292 datalake.format: paimon
5393 datalake.paimon.metastore: filesystem
54- datalake.paimon.warehouse: /tmp/paimon
55- volumes :
56- - shared-tmpfs:/tmp/paimon
94+ datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
5795 tablet-server :
5896 image : apache/fluss:$FLUSS_DOCKER_VERSION$
5997 command : tabletServer
@@ -65,13 +103,11 @@ services:
65103 zookeeper.address: zookeeper:2181
66104 bind.listeners: FLUSS://tablet-server:9123
67105 data.dir: /tmp/fluss/data
68- remote.data.dir: /tmp/fluss/remote -data
69- kv.snapshot.interval: 0s
106+ remote.data.dir: hdfs://namenode:8020/fluss -data
107+ kv.snapshot.interval: 30s
70108 datalake.format: paimon
71109 datalake.paimon.metastore: filesystem
72- datalake.paimon.warehouse: /tmp/paimon
73- volumes :
74- - shared-tmpfs:/tmp/paimon
110+ datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
75111 zookeeper :
76112 restart : always
77113 image : zookeeper:3.9.2
@@ -86,8 +122,6 @@ services:
86122 - |
87123 FLINK_PROPERTIES=
88124 jobmanager.rpc.address: jobmanager
89- volumes :
90- - shared-tmpfs:/tmp/paimon
91125 taskmanager :
92126 image : apache/fluss-quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
93127 depends_on :
@@ -100,16 +134,7 @@ services:
100134 taskmanager.numberOfTaskSlots: 10
101135 taskmanager.memory.process.size: 2048m
102136 taskmanager.memory.framework.off-heap.size: 256m
103- volumes :
104- - shared-tmpfs:/tmp/paimon
105137 # end
106-
107- volumes :
108- shared-tmpfs :
109- driver : local
110- driver_opts :
111- type : " tmpfs"
112- device : " tmpfs"
113138```
114139
115140The Docker Compose environment consists of the following containers:
@@ -183,22 +208,60 @@ services:
183208 zookeeper :
184209 restart : always
185210 image : zookeeper:3.9.2
211+
212+ namenode :
213+ image : apache/hadoop:3.3.6
214+ hostname : namenode
215+ user : root
216+ command : [ "hdfs", "namenode" ]
217+ ports :
218+ - 9870:9870
219+ - 8020:8020
220+ environment :
221+ ENSURE_NAMENODE_DIR : " /tmp/hadoop/dfs/name"
222+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
223+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
224+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
225+ HDFS-SITE.XML_dfs.replication : 1
226+ HDFS-SITE.XML_dfs.permissions.enabled : false
227+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
228+ healthcheck :
229+ test : ["CMD", "hdfs dfs -test -d / && exit 0 || exit 1"]
230+ interval : 15s
231+ timeout : 10s
232+ retries : 20
233+
234+ datanode :
235+ image : apache/hadoop:3.3.6
236+ user : root
237+ command : [ "hdfs", "datanode" ]
238+ environment :
239+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
240+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
241+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
242+ HDFS-SITE.XML_dfs.replication : 1
243+ HDFS-SITE.XML_dfs.permissions.enabled : false
244+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
245+ depends_on :
246+ - namenode
186247
187248 coordinator-server :
188249 image : apache/fluss:$FLUSS_DOCKER_VERSION$
189250 depends_on :
190- - zookeeper
251+ namenode :
252+ condition : service_healthy
253+ zookeeper :
254+ condition : service_started
191255 environment :
192256 - |
193257 FLUSS_PROPERTIES=
194258 zookeeper.address: zookeeper:2181
195259 bind.listeners: FLUSS://coordinator-server:9123
196- remote.data.dir: /tmp/fluss/remote -data
260+ remote.data.dir: hdfs://namenode:8020/fluss -data
197261 datalake.format: iceberg
198262 datalake.iceberg.type: hadoop
199- datalake.iceberg.warehouse: /tmp/iceberg
263+ datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
200264 volumes :
201- - shared-tmpfs:/tmp/iceberg
202265 - ./lib:/tmp/lib
203266 entrypoint : ["sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer"]
204267
@@ -213,13 +276,11 @@ services:
213276 zookeeper.address: zookeeper:2181
214277 bind.listeners: FLUSS://tablet-server:9123
215278 data.dir: /tmp/fluss/data
216- remote.data.dir: /tmp/fluss/remote -data
217- kv.snapshot.interval: 0s
279+ remote.data.dir: hdfs://namenode:8020/fluss -data
280+ kv.snapshot.interval: 30s
218281 datalake.format: iceberg
219282 datalake.iceberg.type: hadoop
220- datalake.iceberg.warehouse: /tmp/iceberg
221- volumes:
222- - shared-tmpfs:/tmp/iceberg
283+ datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
223284
224285 jobmanager :
225286 image : apache/fluss-quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -230,8 +291,6 @@ services:
230291 - |
231292 FLINK_PROPERTIES=
232293 jobmanager.rpc.address: jobmanager
233- volumes:
234- - shared-tmpfs:/tmp/iceberg
235294
236295 taskmanager :
237296 image : apache/fluss-quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -245,15 +304,6 @@ services:
245304 taskmanager.numberOfTaskSlots: 10
246305 taskmanager.memory.process.size: 2048m
247306 taskmanager.memory.framework.off-heap.size: 256m
248- volumes:
249- - shared-tmpfs:/tmp/iceberg
250-
251- volumes:
252- shared-tmpfs:
253- driver: local
254- driver_opts:
255- type: "tmpfs"
256- device: "tmpfs"
257307` ` `
258308
259309The Docker Compose environment consists of the following containers:
@@ -636,34 +686,6 @@ The result looks like:
636686```
637687You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
638688
639- Finally, you can use the following command to view the files stored in Paimon:
640- ``` shell
641- docker compose exec taskmanager tree /tmp/paimon/fluss.db
642- ```
643-
644- ** Sample Output:**
645- ``` shell
646- /tmp/paimon/fluss.db
647- └── datalake_enriched_orders
648- ├── bucket-0
649- │ ├── changelog-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-0.orc
650- │ └── data-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-1.orc
651- ├── manifest
652- │ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-0
653- │ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-1
654- │ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-0
655- │ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-1
656- │ └── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-2
657- ├── schema
658- │ └── schema-0
659- └── snapshot
660- ├── EARLIEST
661- ├── LATEST
662- └── snapshot-1
663- ```
664-
665- The files adhere to Paimon's standard format, enabling seamless querying with other engines such as [ Spark] ( https://paimon.apache.org/docs/1.3/spark/quick-start/ ) and [ Trino] ( https://paimon.apache.org/docs/1.3/ecosystem/trino/ ) .
666-
667689 </TabItem >
668690
669691 <TabItem value =" iceberg " label =" Iceberg " >
@@ -730,22 +752,89 @@ SELECT sum(total_price) as sum_price FROM datalake_enriched_orders;
730752
731753You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
732754
733- Finally, you can use the following command to view the files stored in Iceberg:
755+ </TabItem >
756+ </Tabs >
757+
758+ ### Remote Storage
759+
760+ Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
761+ ``` shell
762+ docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk ' {print $8}' | grep -v ' ^$' | tree --fromfile .
763+ ```
764+
765+ ** Sample Output:**
766+ ``` shell
767+ hdfs://namenode:8020/fluss-data
768+ └── kv
769+ └── fluss
770+ ├── enriched_orders-3
771+ │ └── 0
772+ │ ├── shared
773+ │ │ ├── 71fca534-ecca-489b-a19a-bd0538c9f9e9
774+ │ │ ├── b06ef3a3-2873-470e-961f-da25582136a1
775+ │ │ └── b93bad5c-00fb-4e62-8217-71b010621479
776+ │ └── snap-2
777+ │ ├── _METADATA
778+ │ ├── 08d39726-f847-4401-8f31-4e905f2ba3f6
779+ │ ├── b6a7bc2c-b5c3-4eeb-a523-b2b6fff159f3
780+ │ └── e6278555-d71f-431f-954e-71bf066dd29f
781+ ├── fluss_customer-1
782+ ... # Remaining entries omitted for brevity
783+ ```
784+
785+ ### Lake Storage
786+
787+ <Tabs groupId =" lake-tabs " >
788+ <TabItem value =" paimon " label =" Paimon " default >
789+
790+ Finally, you can use the following command to view the files stored in Paimon Hadoop warehouse:
734791``` shell
735- docker compose exec taskmanager tree /tmp/iceberg/ fluss
792+ docker compose exec namenode hdfs dfs -ls -R / fluss-lake/ | awk ' {print $8} ' | grep -v ' ^$ ' | tree --fromfile .
736793```
737794
738795** Sample Output:**
739796``` shell
740- /tmp/iceberg/fluss
741- └── datalake_enriched_orders
742- ├── data
743- │ └── 00000-0-abc123.parquet
744- └── metadata
745- ├── snap-1234567890123456789-1-abc123.avro
746- └── v1.metadata.json
747- ```
748- The files adhere to Iceberg's standard format, enabling seamless querying with other engines such as [ Spark] ( https://iceberg.apache.org/docs/latest/spark-queries/ ) and [ Trino] ( https://trino.io/docs/current/connector/iceberg.html ) .
797+ hdfs://namenode:8020/fluss-lake
798+ ├── default.db
799+ └── fluss.db
800+ └── datalake_enriched_orders
801+ ├── bucket-0
802+ │ └── data-02acf76d-c4cc-4bc1-9292-e64a77dfcc72-0.parquet
803+ ├── manifest
804+ │ ├── manifest-df5b6833-7e92-4ec9-a196-51d6fd60b1d1-0
805+ │ ├── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-0
806+ │ └── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-1
807+ ├── schema
808+ │ └── schema-0
809+ └── snapshot
810+ ├── LATEST
811+ └── snapshot-1
812+ ```
813+
814+ </TabItem >
815+
816+ <TabItem value =" iceberg " label =" Iceberg " >
817+
818+ Finally, you can use the following command to view the files stored in Iceberg Hadoop warehouse:
819+ ``` shell
820+ docker compose exec namenode hdfs dfs -ls -R /fluss-lake/ | awk ' {print $8}' | grep -v ' ^$' | tree --fromfile .
821+ ```
822+
823+ ** Sample Output:**
824+ ``` shell
825+ hdfs://namenode:8020/fluss-lake
826+ └── fluss
827+ └── datalake_enriched_orders
828+ ├── data
829+ │ └── __bucket=0
830+ │ └── 00000-0-3ff95845-47af-456f-83e0-8411576cfffe-00001.parquet
831+ └── metadata
832+ ├── 528ae521-d683-4c5e-8dd7-779a83dd9c6f-m0.avro
833+ ├── snap-3496049107217731071-1-528ae521-d683-4c5e-8dd7-779a83dd9c6f.avro
834+ ├── v1.metadata.json
835+ ├── v2.metadata.json
836+ └── version-hint.text
837+ ```
749838
750839 </TabItem >
751840</Tabs >
0 commit comments