@@ -37,12 +37,55 @@ cd fluss-quickstart-flink
3737
3838``` yaml
3939services :
40+ zookeeper :
41+ restart : always
42+ image : zookeeper:3.9.2
43+ namenode :
44+ image : apache/hadoop:3.3.6
45+ hostname : namenode
46+ user : root
47+ command : [ "hdfs", "namenode" ]
48+ ports :
49+ - 9870:9870
50+ - 8020:8020
51+ environment :
52+ ENSURE_NAMENODE_DIR : " /tmp/hadoop/dfs/name"
53+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
54+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
55+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
56+ HDFS-SITE.XML_dfs.replication : 1
57+ HDFS-SITE.XML_dfs.permissions.enabled : false
58+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
59+ healthcheck :
60+ test : ["CMD", "hdfs dfs -ls /"]
61+ interval : 10s
62+ timeout : 10s
63+ retries : 20
64+
65+ datanode :
66+ image : apache/hadoop:3.3.6
67+ user : root
68+ command : [ "hdfs", "datanode" ]
69+ environment :
70+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
71+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
72+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
73+ HDFS-SITE.XML_dfs.replication : 1
74+ HDFS-SITE.XML_dfs.permissions.enabled : false
75+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
76+ depends_on :
77+ - namenode
78+
4079 # begin Fluss cluster
4180 coordinator-server :
4281 image : fluss/fluss:$FLUSS_DOCKER_VERSION$
43- command : coordinatorServer
4482 depends_on :
45- - zookeeper
83+ namenode :
84+ condition : service_healthy
85+ zookeeper :
86+ condition : service_started
87+ datanode :
88+ condition : service_started
4689 environment :
4790 - |
4891 FLUSS_PROPERTIES=
@@ -51,9 +94,10 @@ services:
5194 remote.data.dir: /tmp/fluss/remote-data
5295 datalake.format: paimon
5396 datalake.paimon.metastore: filesystem
54- datalake.paimon.warehouse: /tmp/paimon
97+ datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
5598 volumes :
56- - shared-tmpfs:/tmp/paimon
99+ - ./lib:/tmp/lib
100+ entrypoint : [ "sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer" ]
57101 tablet-server :
58102 image : fluss/fluss:$FLUSS_DOCKER_VERSION$
59103 command : tabletServer
@@ -66,15 +110,11 @@ services:
66110 bind.listeners: FLUSS://tablet-server:9123
67111 data.dir: /tmp/fluss/data
68112 remote.data.dir: /tmp/fluss/remote-data
69- kv.snapshot.interval: 0s
113+ kv.snapshot.interval: 30s
70114 datalake.format: paimon
71115 datalake.paimon.metastore: filesystem
72- datalake.paimon.warehouse: /tmp/paimon
73- volumes :
74- - shared-tmpfs:/tmp/paimon
75- zookeeper :
76- restart : always
77- image : zookeeper:3.9.2
116+ datalake.paimon.warehouse: hdfs://namenode:8020/fluss-lake
117+
78118 # end
79119 # begin Flink cluster
80120 jobmanager :
@@ -86,8 +126,6 @@ services:
86126 - |
87127 FLINK_PROPERTIES=
88128 jobmanager.rpc.address: jobmanager
89- volumes :
90- - shared-tmpfs:/tmp/paimon
91129 taskmanager :
92130 image : fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
93131 depends_on :
@@ -100,16 +138,7 @@ services:
100138 taskmanager.numberOfTaskSlots: 10
101139 taskmanager.memory.process.size: 2048m
102140 taskmanager.memory.framework.off-heap.size: 256m
103- volumes :
104- - shared-tmpfs:/tmp/paimon
105141 # end
106-
107- volumes :
108- shared-tmpfs :
109- driver : local
110- driver_opts :
111- type : " tmpfs"
112- device : " tmpfs"
113142```
114143
115144The Docker Compose environment consists of the following containers:
@@ -346,6 +375,33 @@ The following SQL query should return an empty result.
346375SELECT * FROM fluss_customer WHERE ` cust_key` = 1 ;
347376```
348377
378+ ## Fluss Remote Storage
379+
380+ Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
381+ ``` shell
382+ docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk ' {print $8}' | grep -v ' ^$' | tree --fromfile .
383+ ```
384+
385+ ** Sample Output:**
386+ ``` shell
387+ hdfs://namenode:8020/fluss-data
388+ └── kv
389+ └── fluss
390+ ├── enriched_orders-3
391+ │ └── 0
392+ │ ├── shared
393+ │ │ ├── 71fca534-ecca-489b-a19a-bd0538c9f9e9
394+ │ │ ├── b06ef3a3-2873-470e-961f-da25582136a1
395+ │ │ └── b93bad5c-00fb-4e62-8217-71b010621479
396+ │ └── snap-2
397+ │ ├── _METADATA
398+ │ ├── 08d39726-f847-4401-8f31-4e905f2ba3f6
399+ │ ├── b6a7bc2c-b5c3-4eeb-a523-b2b6fff159f3
400+ │ └── e6278555-d71f-431f-954e-71bf066dd29f
401+ ├── fluss_customer-1
402+ ... # Remaining entries omitted for brevity
403+ ```
404+
349405## Integrate with Paimon
350406### Start the Lakehouse Tiering Service
351407To integrate with [ Apache Paimon] ( https://paimon.apache.org/ ) , you need to start the ` Lakehouse Tiering Service ` .
@@ -473,30 +529,30 @@ The result looks like:
473529```
474530You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
475531
476- Finally, you can use the following command to view the files stored in Paimon:
532+ ### Storage
533+
534+ Finally, you can use the following command to view the files stored in Paimon Hadoop warehouse:
477535``` shell
478- docker compose exec taskmanager tree /tmp/paimon/ fluss.db
536+ docker compose exec namenode hdfs dfs -ls -R / fluss-lake/ | awk ' {print $8} ' | grep -v ' ^$ ' | tree --fromfile .
479537```
480538
481539** Sample Output:**
482540``` shell
483- /tmp/paimon/fluss.db
484- └── datalake_enriched_orders
485- ├── bucket-0
486- │ ├── changelog-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-0.orc
487- │ └── data-aef1810f-85b2-4eba-8eb8-9b136dec5bdb-1.orc
488- ├── manifest
489- │ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-0
490- │ ├── manifest-aaa007e1-81a2-40b3-ba1f-9df4528bc402-1
491- │ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-0
492- │ ├── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-1
493- │ └── manifest-list-ceb77e1f-7d17-4160-9e1f-f334918c6e0d-2
494- ├── schema
495- │ └── schema-0
496- └── snapshot
497- ├── EARLIEST
498- ├── LATEST
499- └── snapshot-1
541+ hdfs://namenode:8020/fluss-lake
542+ ├── default.db
543+ └── fluss.db
544+ └── datalake_enriched_orders
545+ ├── bucket-0
546+ │ └── data-02acf76d-c4cc-4bc1-9292-e64a77dfcc72-0.parquet
547+ ├── manifest
548+ │ ├── manifest-df5b6833-7e92-4ec9-a196-51d6fd60b1d1-0
549+ │ ├── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-0
550+ │ └── manifest-list-b683c5a2-4072-4c7a-8586-2c853de8d964-1
551+ ├── schema
552+ │ └── schema-0
553+ └── snapshot
554+ ├── LATEST
555+ └── snapshot-1
500556```
501557The files adhere to Paimon's standard format, enabling seamless querying with other engines such as [ StarRocks] ( https://docs.starrocks.io/docs/data_source/catalog/paimon_catalog/ ) .
502558
0 commit comments