Skip to content

Commit be6fc3b

Browse files
authored
[build] Introduce flink-quickstart docker file (#1759)
* [build] Introduce quickstart docker * add checksum * address comments * address comments * update to use hadoop3 to shared by iceberg
1 parent 665e4e8 commit be6fc3b

File tree

7 files changed

+413
-0
lines changed

7 files changed

+413
-0
lines changed

docker/quickstart-flink/Dockerfile

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
# Use Flink 1.20.0 as base image
20+
FROM flink:1.20.0-scala_2.12-java17
21+
22+
# Switch to root user for installation and setup
23+
USER root
24+
25+
# Install necessary packages
26+
RUN apt-get update && \
27+
apt-get install -y tree && \
28+
rm -rf /var/lib/apt/lists/*
29+
30+
# Copy sql-client script to the container
31+
COPY bin/* /opt/sql-client/
32+
33+
# Set working directory and environment
34+
WORKDIR /opt/sql-client
35+
ENV SQL_CLIENT_HOME=/opt/sql-client
36+
37+
# Copy Fluss connector JARs and SQL files
38+
# Copy JARs to both sql-client lib and Flink lib directories
39+
COPY lib/* /opt/sql-client/lib/
40+
COPY sql/* /opt/sql-client/sql/
41+
COPY lib/* /opt/flink/lib/
42+
COPY opt/* /opt/flink/opt/
43+
44+
# Modify docker-entrypoint.sh to allow Flink to run as root user
45+
# This is needed for the quickstart environment
46+
RUN sed -i 's/exec $(drop_privs_cmd)/exec/g' /docker-entrypoint.sh
47+
48+
# Make sql-client script executable
49+
RUN ["chmod", "+x", "/opt/sql-client/sql-client"]

docker/quickstart-flink/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Fluss Quickstart Flink Docker
2+
3+
This directory contains the Docker setup for Fluss Quickstart with Flink integration.
4+
5+
## Overview
6+
7+
The Fluss Quickstart Flink Docker image provides a complete environment for running Flink with Fluss, powered by Paimon lake storage.
8+
9+
## Prerequisites
10+
11+
Before building the Docker image, ensure you have:
12+
13+
1. Check out the code version that you want to use for the Docker image. Go to the project root directory and build Fluss using `./mvnw clean package -DskipTests`.
14+
The local build will be used for the Docker image.
15+
2. Docker installed and running
16+
3. Internet access for retrieving dependencies
17+
18+
## Build Process
19+
20+
The build process consists of two main steps:
21+
22+
### Step 1: Prepare Build Files
23+
24+
First, you need to prepare the required JAR files and dependencies:
25+
26+
```bash
27+
# Make the script executable
28+
chmod +x prepare_build.sh
29+
30+
# Run the preparation script
31+
./prepare_build.sh
32+
```
33+
34+
### Step 2: Build Docker Image
35+
36+
After the preparation is complete, build the Docker image:
37+
38+
```bash
39+
# Build the Docker image
40+
docker build -t fluss/quickstart-flink:1.20-latest .
41+
```
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one
5+
# or more contributor license agreements. See the NOTICE file
6+
# distributed with this work for additional information
7+
# regarding copyright ownership. The ASF licenses this file
8+
# to you under the Apache License, Version 2.0 (the
9+
# "License"); you may not use this file except in compliance
10+
# with the License. You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
#
20+
21+
${FLINK_HOME}/bin/sql-client.sh -i ${SQL_CLIENT_HOME}/sql/sql-client.sql
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
#!/bin/bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
set -e
19+
20+
# Configuration
21+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
22+
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
23+
24+
# Logging functions
25+
log_info() {
26+
echo "ℹ️ $1"
27+
}
28+
29+
log_success() {
30+
echo "$1"
31+
}
32+
33+
log_error() {
34+
echo "$1" >&2
35+
}
36+
37+
# Utility function to copy JAR files with version numbers
38+
copy_jar() {
39+
local src_pattern="$1"
40+
local dest_dir="$2"
41+
local description="$3"
42+
43+
log_info "Copying $description..."
44+
45+
# Find matching files
46+
local matches=($src_pattern)
47+
local count=${#matches[@]}
48+
49+
# No files matched
50+
if (( count == 0 )); then
51+
log_error "No matching JAR files found: $src_pattern"
52+
log_error "Please build the Fluss project first: mvn clean package"
53+
return 1
54+
fi
55+
56+
# Multiple files matched
57+
if (( count > 1 )); then
58+
log_error "Multiple matching JAR files found:"
59+
printf " %s\n" "${matches[@]}"
60+
return 1
61+
fi
62+
63+
# Exactly one file matched → copy it with original file name
64+
mkdir -p "$dest_dir"
65+
cp "${matches[0]}" "$dest_dir/"
66+
log_success "Copied: $(basename "${matches[0]}")"
67+
}
68+
69+
# Utility function to download and verify JAR
70+
download_jar() {
71+
local url="$1"
72+
local dest_file="$2"
73+
local expected_hash="$3"
74+
local description="$4"
75+
76+
log_info "Downloading $description..."
77+
78+
# Download the file
79+
if ! wget -O "$dest_file" "$url"; then
80+
log_error "Failed to download $description from $url"
81+
return 1
82+
fi
83+
84+
# Verify file size
85+
if [ ! -s "$dest_file" ]; then
86+
log_error "Downloaded file is empty: $dest_file"
87+
return 1
88+
fi
89+
90+
# Verify checksum if provided
91+
if [ -n "$expected_hash" ]; then
92+
local actual_hash=$(shasum "$dest_file" | awk '{print $1}')
93+
if [ "$expected_hash" != "$actual_hash" ]; then
94+
log_error "Checksum mismatch for $description"
95+
log_error "Expected: $expected_hash"
96+
log_error "Actual: $actual_hash"
97+
return 1
98+
fi
99+
log_success "Checksum verified for $description"
100+
else
101+
log_success "Downloaded $description"
102+
fi
103+
}
104+
105+
# Check if required directories exist
106+
check_prerequisites() {
107+
log_info "Checking prerequisites..."
108+
109+
local required_dirs=(
110+
"$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target"
111+
"$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target"
112+
"$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target"
113+
)
114+
115+
for dir in "${required_dirs[@]}"; do
116+
if [ ! -d "$dir" ]; then
117+
log_error "Required directory not found: $dir"
118+
log_error "Please build the Fluss project first: mvn clean package"
119+
exit 1
120+
fi
121+
done
122+
123+
log_success "All prerequisites met"
124+
}
125+
126+
# Main execution
127+
main() {
128+
log_info "Preparing JAR files for Fluss Quickstart Flink Docker..."
129+
log_info "Project root: $PROJECT_ROOT"
130+
131+
# Check prerequisites
132+
check_prerequisites
133+
134+
# Clean and create directories
135+
log_info "Setting up directories..."
136+
rm -rf lib opt
137+
mkdir -p lib opt
138+
139+
# Copy Fluss connector JARs
140+
log_info "Copying Fluss connector JARs..."
141+
copy_jar "$PROJECT_ROOT/fluss-flink/fluss-flink-1.20/target/fluss-flink-1.20-*.jar" "./lib" "fluss-flink-1.20 connector"
142+
copy_jar "$PROJECT_ROOT/fluss-lake/fluss-lake-paimon/target/fluss-lake-paimon-*.jar" "./lib" "fluss-lake-paimon connector"
143+
144+
# Download external dependencies
145+
log_info "Downloading external dependencies..."
146+
147+
# Download flink-faker for data generation
148+
download_jar \
149+
"https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar" \
150+
"./lib/flink-faker-0.5.3.jar" \
151+
"" \
152+
"flink-faker-0.5.3"
153+
154+
# Download flink-shaded-hadoop-2-uber for Hadoop integration
155+
download_jar \
156+
"https://repo1.maven.org/maven2/io/trino/hadoop/hadoop-apache/3.3.5-2/hadoop-apache-3.3.5-2.jar" \
157+
"./lib/hadoop-apache-3.3.5-2.jar" \
158+
"508255883b984483a45ca48d5af6365d4f013bb8" \
159+
"hadoop-apache-3.3.5-2.jar"
160+
161+
# Download paimon-flink connector
162+
download_jar \
163+
"https://repo1.maven.org/maven2/org/apache/paimon/paimon-flink-1.20/1.2.0/paimon-flink-1.20-1.2.0.jar" \
164+
"./lib/paimon-flink-1.20-1.2.0.jar" \
165+
"b9f8762c6e575f6786f1d156a18d51682ffc975c" \
166+
"paimon-flink-1.20-1.2.0"
167+
168+
# Prepare lake tiering JAR
169+
log_info "Preparing lake tiering JAR..."
170+
copy_jar "$PROJECT_ROOT/fluss-flink/fluss-flink-tiering/target/fluss-flink-tiering-*.jar" "./opt" "fluss-flink-tiering"
171+
172+
# Final verification
173+
verify_jars
174+
175+
# Show summary
176+
show_summary
177+
}
178+
179+
# Verify that all required JAR files are present
180+
verify_jars() {
181+
log_info "Verifying all required JAR files are present..."
182+
183+
local missing_jars=()
184+
local lib_jars=(
185+
"fluss-flink-1.20-*.jar"
186+
"fluss-lake-paimon-*.jar"
187+
"flink-faker-0.5.3.jar"
188+
"hadoop-apache-3.3.5-2.jar"
189+
"paimon-flink-1.20-1.2.0.jar"
190+
)
191+
192+
local opt_jars=(
193+
"fluss-flink-tiering-*.jar"
194+
)
195+
196+
# Check lib directory
197+
for jar_pattern in "${lib_jars[@]}"; do
198+
if ! ls ./lib/$jar_pattern >/dev/null 2>&1; then
199+
missing_jars+=("lib/$jar_pattern")
200+
fi
201+
done
202+
203+
# Check opt directory
204+
for jar_pattern in "${opt_jars[@]}"; do
205+
if ! ls ./opt/$jar_pattern >/dev/null 2>&1; then
206+
missing_jars+=("opt/$jar_pattern")
207+
fi
208+
done
209+
210+
# Report results
211+
if [ ${#missing_jars[@]} -eq 0 ]; then
212+
log_success "All required JAR files are present!"
213+
else
214+
log_error "Missing required JAR files:"
215+
for jar in "${missing_jars[@]}"; do
216+
log_error " - $jar"
217+
done
218+
exit 1
219+
fi
220+
}
221+
222+
# Summary function
223+
show_summary() {
224+
log_success "JAR files preparation completed!"
225+
echo ""
226+
log_info "📦 Generated JAR files:"
227+
echo "Lib directory:"
228+
ls -la ./lib/ 2>/dev/null || echo " (empty)"
229+
echo "Opt directory:"
230+
ls -la ./opt/ 2>/dev/null || echo " (empty)"
231+
}
232+
233+
# Run main function
234+
main "$@"

0 commit comments

Comments
 (0)