Skip to content

Commit 258fb81

Browse files
MehulBatraluoyuxia
andauthored
[build] Update docker image dependencies for flink/fluss iceberg quickstart (#1813)
--------- Co-authored-by: luoyuxia <[email protected]>
1 parent 6003c42 commit 258fb81

File tree

3 files changed

+86
-38
lines changed

3 files changed

+86
-38
lines changed

docker/quickstart-flink/prepare_build.sh

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ check_prerequisites() {
109109
local required_dirs=(
110110
"$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target"
111111
"$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target"
112+
"$PROJECT_ROOT/fluss-lake/fluss-lake-iceberg/target"
112113
"$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target"
113114
)
114115

@@ -140,6 +141,7 @@ main() {
140141
log_info "Copying Fluss connector JARs..."
141142
copy_jar "$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target/fluss-flink-1.20-*.jar" "./lib" "fluss-flink-1.20 connector"
142143
copy_jar "$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target/fluss-lake-paimon-*.jar" "./lib" "fluss-lake-paimon connector"
144+
copy_jar "$PROJECT_ROOT/fluss-lake/fluss-lake-iceberg/target/fluss-lake-iceberg-*.jar" "./lib" "fluss-lake-iceberg connector"
143145

144146
# Download external dependencies
145147
log_info "Downloading external dependencies..."
@@ -151,12 +153,12 @@ main() {
151153
"" \
152154
"flink-faker-0.5.3"
153155

154-
# Download flink-shaded-hadoop-2-uber for Hadoop integration
156+
# Download Hadoop for HDFS/local filesystem support
155157
download_jar \
156158
"https://repo1.maven.org/maven2/io/trino/hadoop/hadoop-apache/3.3.5-2/hadoop-apache-3.3.5-2.jar" \
157159
"./lib/hadoop-apache-3.3.5-2.jar" \
158160
"508255883b984483a45ca48d5af6365d4f013bb8" \
159-
"hadoop-apache-3.3.5-2.jar"
161+
"hadoop-apache-3.3.5-2"
160162

161163
# Download paimon-flink connector
162164
download_jar \
@@ -165,48 +167,61 @@ main() {
165167
"b9f8762c6e575f6786f1d156a18d51682ffc975c" \
166168
"paimon-flink-1.20-1.2.0"
167169

170+
# Iceberg Support
171+
log_info "Downloading Iceberg connector JARs..."
172+
173+
# Download iceberg-flink-runtime for Flink 1.20 (version 1.9.1)
174+
download_jar \
175+
"https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-1.20/1.9.1/iceberg-flink-runtime-1.20-1.9.1.jar" \
176+
"./lib/iceberg-flink-runtime-1.20-1.9.1.jar" \
177+
"" \
178+
"iceberg-flink-runtime-1.20-1.9.1"
179+
180+
168181
# Prepare lake tiering JAR
169182
log_info "Preparing lake tiering JAR..."
170183
copy_jar "$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target/fluss-flink-tiering-*.jar" "./opt" "fluss-flink-tiering"
171184

172185
# Final verification
173186
verify_jars
174-
187+
175188
# Show summary
176189
show_summary
177190
}
178191

179192
# Verify that all required JAR files are present
180193
verify_jars() {
181194
log_info "Verifying all required JAR files are present..."
182-
195+
183196
local missing_jars=()
184197
local lib_jars=(
185198
"fluss-flink-1.20-*.jar"
186199
"fluss-lake-paimon-*.jar"
200+
"fluss-lake-iceberg-*.jar"
187201
"flink-faker-0.5.3.jar"
188202
"hadoop-apache-3.3.5-2.jar"
189203
"paimon-flink-1.20-1.2.0.jar"
204+
"iceberg-flink-runtime-1.20-1.9.1.jar"
190205
)
191-
206+
192207
local opt_jars=(
193208
"fluss-flink-tiering-*.jar"
194209
)
195-
210+
196211
# Check lib directory
197212
for jar_pattern in "${lib_jars[@]}"; do
198213
if ! ls ./lib/$jar_pattern >/dev/null 2>&1; then
199214
missing_jars+=("lib/$jar_pattern")
200215
fi
201216
done
202-
217+
203218
# Check opt directory
204219
for jar_pattern in "${opt_jars[@]}"; do
205220
if ! ls ./opt/$jar_pattern >/dev/null 2>&1; then
206221
missing_jars+=("opt/$jar_pattern")
207222
fi
208223
done
209-
224+
210225
# Report results
211226
if [ ${#missing_jars[@]} -eq 0 ]; then
212227
log_success "All required JAR files are present!"
@@ -224,11 +239,23 @@ show_summary() {
224239
log_success "JAR files preparation completed!"
225240
echo ""
226241
log_info "📦 Generated JAR files:"
227-
echo "Lib directory:"
228-
ls -la ./lib/ 2>/dev/null || echo " (empty)"
229-
echo "Opt directory:"
230-
ls -la ./opt/ 2>/dev/null || echo " (empty)"
242+
echo ""
243+
echo "Lib directory (Flink connectors):"
244+
ls -lh ./lib/ | tail -n +2 | awk '{printf " %-50s %8s\n", $9, $5}'
245+
echo ""
246+
echo "Opt directory (Tiering service):"
247+
ls -lh ./opt/ | tail -n +2 | awk '{printf " %-50s %8s\n", $9, $5}'
248+
echo ""
249+
log_info "📋 Included Components:"
250+
echo " ✓ Fluss Flink 1.20 connector"
251+
echo " ✓ Fluss Lake Paimon connector"
252+
echo " ✓ Fluss Lake Iceberg connector"
253+
echo " ✓ Iceberg Flink runtime 1.20 (v1.9.1)"
254+
echo " ✓ Paimon Flink 1.20 (v1.2.0)"
255+
echo " ✓ Hadoop Apache (v3.3.5-2)"
256+
echo " ✓ Flink Faker (v0.5.3)"
257+
echo " ✓ Fluss Tiering service"
231258
}
232259

233260
# Run main function
234-
main "$@"
261+
main "$@"

fluss-lake/fluss-lake-iceberg/pom.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,31 @@
318318
<include>*:*</include>
319319
</includes>
320320
</artifactSet>
321+
<relocations>
322+
<!-- Shade Iceberg to Fluss namespace for complete isolation -->
323+
<relocation>
324+
<pattern>org.apache.iceberg</pattern>
325+
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.org.apache.iceberg</shadedPattern>
326+
</relocation>
327+
<!-- Shade Jackson to Fluss namespace -->
328+
<relocation>
329+
<pattern>com.fasterxml.jackson</pattern>
330+
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.com.fasterxml.jackson</shadedPattern>
331+
</relocation>
332+
<!-- Shade Parquet to Fluss namespace -->
333+
<relocation>
334+
<pattern>org.apache.parquet</pattern>
335+
<shadedPattern>org.apache.fluss.lake.iceberg.shaded.org.apache.parquet</shadedPattern>
336+
</relocation>
337+
</relocations>
321338
<filters>
322339
<filter>
323340
<artifact>*</artifact>
324341
<excludes>
325342
<exclude>LICENSE</exclude>
326343
<exclude>NOTICE</exclude>
344+
<!-- Exclude multi-release JAR versions that cause shading issues -->
345+
<exclude>META-INF/versions/**</exclude>
327346
</excludes>
328347
</filter>
329348
</filters>

website/docs/quickstart/flink-iceberg.md

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ services:
6363

6464
coordinator-server:
6565
image: fluss/fluss:$FLUSS_DOCKER_VERSION$
66-
command: coordinatorServer
6766
depends_on:
6867
- zookeeper
6968
environment:
@@ -401,8 +400,7 @@ CREATE TABLE datalake_enriched_orders (
401400
`cust_phone` STRING,
402401
`cust_acctbal` DECIMAL(15, 2),
403402
`cust_mktsegment` STRING,
404-
`nation_name` STRING,
405-
PRIMARY KEY (`order_key`) NOT ENFORCED
403+
`nation_name` STRING
406404
) WITH (
407405
'table.datalake.enabled' = 'true',
408406
'table.datalake.freshness' = '30s'
@@ -429,11 +427,14 @@ SELECT o.order_key,
429427
c.acctbal,
430428
c.mktsegment,
431429
n.name
432-
FROM fluss_order o
433-
LEFT JOIN fluss_customer FOR SYSTEM_TIME AS OF `o`.`ptime` AS `c`
434-
ON o.cust_key = c.cust_key
435-
LEFT JOIN fluss_nation FOR SYSTEM_TIME AS OF `o`.`ptime` AS `n`
436-
ON c.nation_key = n.nation_key;
430+
FROM (
431+
SELECT *, PROCTIME() as ptime
432+
FROM `default_catalog`.`default_database`.source_order
433+
) o
434+
LEFT JOIN fluss_customer FOR SYSTEM_TIME AS OF o.ptime AS c
435+
ON o.cust_key = c.cust_key
436+
LEFT JOIN fluss_nation FOR SYSTEM_TIME AS OF o.ptime AS n
437+
ON c.nation_key = n.nation_key;
437438
```
438439

439440
### Real-Time Analytics on Fluss datalake-enabled Tables
@@ -459,11 +460,12 @@ SELECT snapshot_id, operation FROM datalake_enriched_orders$lake$snapshots;
459460

460461
**Sample Output:**
461462
```shell
462-
+-------------+--------------------+
463-
| snapshot_id | operation |
464-
+-------------+--------------------+
465-
| 1 | append |
466-
+-------------+--------------------+
463+
+---------------------+-----------+
464+
| snapshot_id | operation |
465+
+---------------------+-----------+
466+
| 7792523713868625335 | append |
467+
| 7960217942125627573 | append |
468+
+---------------------+-----------+
467469
```
468470
**Note:** Make sure to wait for the configured `datalake.freshness` (~30s) to complete before querying the snapshots, otherwise the result will be empty.
469471

@@ -474,11 +476,11 @@ SELECT sum(total_price) as sum_price FROM datalake_enriched_orders$lake;
474476
```
475477
**Sample Output:**
476478
```shell
477-
+------------+
478-
| sum_price |
479-
+------------+
480-
| 1669519.92 |
481-
+------------+
479+
+-----------+
480+
| sum_price |
481+
+-----------+
482+
| 432880.93 |
483+
+-----------+
482484
```
483485

484486
To achieve results with sub-second data freshness, you can query the table directly, which seamlessly unifies data from both Fluss and Iceberg:
@@ -490,23 +492,23 @@ SELECT sum(total_price) as sum_price FROM datalake_enriched_orders;
490492

491493
**Sample Output:**
492494
```shell
493-
+------------+
494-
| sum_price |
495-
+------------+
496-
| 1777908.36 |
497-
+------------+
495+
+-----------+
496+
| sum_price |
497+
+-----------+
498+
| 558660.03 |
499+
+-----------+
498500
```
499501

500502
You can execute the real-time analytics query multiple times, and the results will vary with each run as new data is continuously written to Fluss in real-time.
501503

502504
Finally, you can use the following command to view the files stored in Iceberg:
503505
```shell
504-
docker compose exec taskmanager tree /tmp/iceberg/fluss.db
506+
docker compose exec taskmanager tree /tmp/iceberg/fluss
505507
```
506508

507509
**Sample Output:**
508510
```shell
509-
/tmp/iceberg/fluss.db
511+
/tmp/iceberg/fluss
510512
└── datalake_enriched_orders
511513
├── data
512514
│ └── 00000-0-abc123.parquet

0 commit comments

Comments
 (0)