@@ -61,23 +61,55 @@ services:
6161 restart : always
6262 image : zookeeper:3.9.2
6363
64+ namenode :
65+ image : apache/hadoop:3.3.6
66+ hostname : namenode
67+ user : root
68+ command : [ "hdfs", "namenode" ]
69+ ports :
70+ - 9870:9870
71+ - 8020:8020
72+ environment :
73+ ENSURE_NAMENODE_DIR : " /tmp/hadoop/dfs/name"
74+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
75+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
76+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
77+ HDFS-SITE.XML_dfs.replication : 1
78+ HDFS-SITE.XML_dfs.permissions.enabled : false
79+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
80+
81+ datanode :
82+ image : apache/hadoop:3.3.6
83+ user : root
84+ command : [ "hdfs", "datanode" ]
85+ environment :
86+ CORE-SITE.XML_fs.defaultFS : hdfs://namenode:8020
87+ CORE-SITE.XML_hadoop.tmp.dir : /hadoop/tmp
88+ HDFS-SITE.XML_dfs.namenode.rpc-address : namenode:8020
89+ HDFS-SITE.XML_dfs.replication : 1
90+ HDFS-SITE.XML_dfs.permissions.enabled : false
91+ HDFS-SITE.XML_dfs.datanode.address : datanode:9866
92+ depends_on :
93+ - namenode
94+
6495 coordinator-server :
6596 image : fluss/fluss:$FLUSS_DOCKER_VERSION$
6697 depends_on :
6798 - zookeeper
99+ - namenode
100+ - datanode
68101 environment :
69102 - |
70103 FLUSS_PROPERTIES=
71104 zookeeper.address: zookeeper:2181
72105 bind.listeners: FLUSS://coordinator-server:9123
73- remote.data.dir: /tmp/fluss/remote -data
106+ remote.data.dir: hdfs://namenode:8020/fluss -data
74107 datalake.format: iceberg
75108 datalake.iceberg.type: hadoop
76- datalake.iceberg.warehouse: /tmp/iceberg
109+ datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
77110 volumes :
78- - shared-tmpfs:/tmp/iceberg
79111 - ./lib:/tmp/lib
80- entrypoint : ["sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer"]
112+ entrypoint : [ "sh", "-c", "cp -v /tmp/lib/*.jar /opt/fluss/plugins/iceberg/ && exec /docker-entrypoint.sh coordinatorServer" ]
81113
82114 tablet-server :
83115 image : fluss/fluss:$FLUSS_DOCKER_VERSION$
@@ -90,13 +122,11 @@ services:
90122 zookeeper.address: zookeeper:2181
91123 bind.listeners: FLUSS://tablet-server:9123
92124 data.dir: /tmp/fluss/data
93- remote.data.dir: /tmp/fluss/remote -data
94- kv.snapshot.interval: 0s
125+ remote.data.dir: hdfs://namenode:8020/fluss -data
126+ kv.snapshot.interval: 10s
95127 datalake.format: iceberg
96128 datalake.iceberg.type: hadoop
97- datalake.iceberg.warehouse: /tmp/iceberg
98- volumes :
99- - shared-tmpfs:/tmp/iceberg
129+ datalake.iceberg.warehouse: hdfs://namenode:8020/fluss-lake
100130
101131 jobmanager :
102132 image : fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -107,8 +137,6 @@ services:
107137 - |
108138 FLINK_PROPERTIES=
109139 jobmanager.rpc.address: jobmanager
110- volumes :
111- - shared-tmpfs:/tmp/iceberg
112140
113141 taskmanager :
114142 image : fluss/quickstart-flink:1.20-$FLUSS_DOCKER_VERSION$
@@ -122,15 +150,6 @@ services:
122150 taskmanager.numberOfTaskSlots: 10
123151 taskmanager.memory.process.size: 2048m
124152 taskmanager.memory.framework.off-heap.size: 256m
125- volumes :
126- - shared-tmpfs:/tmp/iceberg
127-
128- volumes :
129- shared-tmpfs :
130- driver : local
131- driver_opts :
132- type : " tmpfs"
133- device : " tmpfs"
134153` ` `
135154
136155The Docker Compose environment consists of the following containers:
@@ -367,6 +386,60 @@ The following SQL query should return an empty result.
367386SELECT * FROM fluss_customer WHERE ` cust_key` = 1 ;
368387```
369388
389+ ### Storage
390+
391+ Finally, you can use the following command to view the fluss kv snapshot stored in fluss remote storage:
392+ ``` shell
393+ docker compose exec namenode hdfs dfs -ls -R /fluss-data/ | awk ' {print $8}' | grep -v ' ^$' | tree --fromfile .
394+ ```
395+
396+ ** Sample Output:**
397+ ``` shell
398+ hdfs://namenode:8020/fluss-data
399+ └── kv
400+ └── fluss
401+ ├── enriched_orders-3
402+ │ └── 0
403+ │ ├── shared
404+ │ │ ├── 0836f202-bdcd-498b-a94a-0520beb3d7ea
405+ │ │ ├── afefc29f-d8d3-4cdb-a496-a6c271ddfac0
406+ │ │ └── b67bd402-2ad4-4305-bd36-4fadf08a5200
407+ │ └── snap-2
408+ │ ├── _METADATA
409+ │ ├── 02f02528-af03-4c88-980c-ec9f878d5476
410+ │ ├── 7b21a889-ab06-4b74-98a5-36b542a67d0d
411+ │ └── d7b699d9-6547-49fc-b579-de84cc37a167
412+ ├── fluss_customer-1
413+ │ └── 0
414+ │ ├── shared
415+ │ │ └── e537da08-ad8c-478d-9b80-505616e481b9
416+ │ └── snap-0
417+ │ ├── _METADATA
418+ │ ├── 34a2dea2-8079-483f-b6d8-003a6e01bd3f
419+ │ ├── b196e58d-7df2-4c95-b4fb-b1c3ebb4c622
420+ │ └── f6ff8a01-d7dc-451d-a0e7-716392808405
421+ ├── fluss_nation-2
422+ │ └── 0
423+ │ ├── shared
424+ │ │ └── fd7c1369-fa5d-4396-9610-216f0affb213
425+ │ └── snap-0
426+ │ ├── _METADATA
427+ │ ├── 0808acec-65d4-451c-b8fd-2225f045ad6d
428+ │ ├── 181f67c8-0620-4e9a-9367-2a1c774b4abd
429+ │ └── 5690e21f-3322-486c-93f5-5669d595cf34
430+ └── fluss_order-0
431+ └── 0
432+ ├── shared
433+ │ ├── 29affd1e-ca99-4cc5-a855-61d3c1b2c9a5
434+ │ ├── 546a6113-0a63-4a17-964c-d8e37c32acc0
435+ │ └── 6d9b6ddb-a2c1-4746-aedd-3217a9f51686
436+ └── snap-2
437+ ├── _METADATA
438+ ├── 3fb7b4e9-9d66-44b6-b846-dba77514a1c7
439+ ├── 450997ac-8323-4708-a4ac-6bb2e71834a7
440+ └── f2b4b230-41e9-41dd-9227-337e324460a6
441+ ```
442+
370443## Integrate with Iceberg
371444### Start the Lakehouse Tiering Service
372445To integrate with [ Apache Iceberg] ( https://iceberg.apache.org/ ) , you need to start the ` Lakehouse Tiering Service ` .
@@ -378,7 +451,7 @@ docker compose exec jobmanager \
378451 --fluss.bootstrap.servers coordinator-server:9123 \
379452 --datalake.format iceberg \
380453 --datalake.iceberg.type hadoop \
381- --datalake.iceberg.warehouse /tmp/iceberg
454+ --datalake.iceberg.warehouse hdfs://namenode:8020/fluss-lake
382455```
383456You should see a Flink Job to tier data from Fluss to Iceberg running in the [ Flink Web UI] ( http://localhost:8083/ ) .
384457
@@ -501,20 +574,27 @@ SELECT sum(total_price) as sum_price FROM datalake_enriched_orders;
501574
502575You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
503576
504- Finally, you can use the following command to view the files stored in Iceberg:
577+ ### Storage
578+
579+ Finally, you can use the following command to view the files stored in Iceberg Hadoop warehouse:
505580``` shell
506- docker compose exec taskmanager tree /tmp/iceberg/ fluss
581+ docker compose exec namenode hdfs dfs -ls -R / fluss-lake/ | awk ' {print $8} ' | grep -v ' ^$ ' | tree --fromfile .
507582```
508583
509584** Sample Output:**
510585``` shell
511- /tmp/iceberg/fluss
512- └── datalake_enriched_orders
513- ├── data
514- │ └── 00000-0-abc123.parquet
515- └── metadata
516- ├── snap-1234567890123456789-1-abc123.avro
517- └── v1.metadata.json
586+ hdfs://namenode:8020/fluss-lake
587+ └── fluss
588+ └── datalake_enriched_orders
589+ ├── data
590+ │ └── __bucket=0
591+ │ └── 00000-0-3ff95845-47af-456f-83e0-8411576cfffe-00001.parquet
592+ └── metadata
593+ ├── 528ae521-d683-4c5e-8dd7-779a83dd9c6f-m0.avro
594+ ├── snap-3496049107217731071-1-528ae521-d683-4c5e-8dd7-779a83dd9c6f.avro
595+ ├── v1.metadata.json
596+ ├── v2.metadata.json
597+ └── version-hint.text
518598```
519599The files adhere to Iceberg's standard format, enabling seamless querying with other engines such as [ Spark] ( https://iceberg.apache.org/docs/latest/spark-queries/ ) and [ Trino] ( https://trino.io/docs/current/connector/iceberg.html ) .
520600
0 commit comments