Skip to content

Commit 4d16cc1

Browse files
committed
feat: automate the pipleline initializatgions
1 parent b1e7927 commit 4d16cc1

File tree

4 files changed

+54
-5
lines changed

4 files changed

+54
-5
lines changed

docker-compose.yaml

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,41 +28,51 @@ services:
2828
networks:
2929
stock-net:
3030
ipv4_address: 172.28.1.2
31+
volumes:
32+
- ./scripts/init-kafka.sh:/init-kafka.sh
33+
# entrypoint: ["/bin/bash", "init-kafka.sh"]
34+
restart: always
3135

3236
cassandra:
3337
image: cassandra:latest
3438
ports:
3539
- "9042:9042"
3640
volumes:
3741
- ./init-cassandra:/init-cassandra
42+
- ./scripts/init-cassandra-schema.sh:/init-cassandra-schema.sh
3843
environment:
3944
- CASSANDRA_START_RPC=true
4045
networks:
4146
stock-net:
4247
ipv4_address: 172.28.1.3
48+
# entrypoint: ["/bin/bash", "init-cassandra-schema.sh"]
49+
restart: always
4350

4451
spark:
4552
image: bitnami/spark:latest
4653
volumes:
4754
- ./spark:/opt/bitnami/spark/jobs
55+
- ./scripts/submit-spark-job.sh:/opt/bitnami/spark/submit-spark-job.sh
4856
ports:
4957
- "8080:8080"
5058
depends_on:
5159
- kafka
5260
networks:
5361
stock-net:
5462
ipv4_address: 172.28.1.4
55-
63+
entrypoint: ["sh", "-c", "./submit-spark-job.sh"]
64+
restart: always
65+
5666
kafka_producer:
5767
build:
58-
context: ./kafka-producer
59-
dockerfile: kafka_producer.dockerfile
68+
context: ./kafka-producer
69+
dockerfile: kafka_producer.dockerfile
6070
depends_on:
6171
- kafka
6272
networks:
6373
stock-net:
64-
ipv4_address: 172.28.1.5
65-
74+
ipv4_address: 172.28.1.8
75+
restart: always
6676
networks:
6777
stock-net:
6878
driver: bridge

scripts/init-cassandra-schema.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Function to check if Cassandra is up and running
5+
function check_cassandra {
6+
while ! cqlsh -e "describe keyspaces" &>/dev/null; do
7+
echo "Waiting for Cassandra to be up..."
8+
sleep 10
9+
done
10+
}
11+
12+
echo "Setting up Cassandra schema..."
13+
14+
# Wait for Cassandra to be ready
15+
check_cassandra
16+
17+
# Execute schema setup commands
18+
cqlsh -f /init-cassandra/init.cql -u cassandra -p cassandra
19+
20+
echo "Cassandra schema setup complete."

scripts/init-kafka.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
set -e
3+
4+
/opt/bitnami/scripts/kafka/entrypoint.sh /opt/bitnami/scripts/kafka/run.sh &
5+
6+
echo "Waiting for Kafka to be up..."
7+
sleep 10
8+
9+
kafka-topics.sh --create --topic stocks --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1
10+
11+
wait

scripts/submit-spark-job.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
3+
spark-submit \
4+
--conf "spark.jars.ivy=/opt/bitnami/spark/.ivy2" \
5+
--conf "spark.executor.extraJavaOptions=-Djava.security.krb5.conf=/path/to/krb5.conf" \
6+
--conf "spark.driver.extraJavaOptions=-Djava.security.krb5.conf=/path/to/krb5.conf" \
7+
--packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.1,com.datastax.spark:spark-cassandra-connector_2.12:3.0.0 \
8+
/opt/bitnami/spark/jobs/spark_job.py stocks

0 commit comments

Comments
 (0)