Skip to content

Commit eecc1ad

Browse files
authored
Merge pull request #23 from SciCatProject/kafka-ci
Add Kafka connection test. (Part of integration test)
2 parents 7400c4b + fadb2ee commit eecc1ad

7 files changed

+95
-18
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949

5050
intergration-tests:
5151
name: Integration Tests
52-
needs: tests
52+
needs: [tests, formatting]
5353
uses: ./.github/workflows/integration.yml
5454
with:
5555
python-version: '${{needs.formatting.outputs.min_python}}'

.github/workflows/integration.yml

+4
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,8 @@ jobs:
2424
python-version: ${{ inputs.python-version }}
2525
- run: python -m pip install --upgrade pip
2626
- run: python -m pip install -r requirements/ci.txt
27+
- run: python -m pip install -e .
2728
- run: docker-compose version
29+
- run: docker-compose -f tests/docker-compose-file-writer.yml up -d
30+
- run: scicat_ingestor -c resources/config.sample.json --verbose
31+
- run: docker-compose -f tests/docker-compose-file-writer.yml down

config.20240405.json

+1-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
"kafka": {
33
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
44
"group_id": "GROUP_ID",
5-
"bootstrap_servers": [
6-
"HOST:9092"
7-
],
5+
"bootstrap_servers": ["localhost:9093"],
86
"enable_auto_commit": true,
97
"auto_offset_reset": "earliest"
108
},

resources/config.sample.json

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
{
22
"kafka": {
3-
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
3+
"topics": ["KAFKA_TOPIC_1", "KAFKA_TOPIC_2"],
44
"group_id": "GROUP_ID",
5-
"bootstrap_servers": [
6-
"HOST:9092"
7-
],
5+
"bootstrap_servers": ["localhost:9093"],
86
"individual_message_commit": false,
97
"enable_auto_commit": true,
108
"auto_offset_reset": "earliest"

src/scicat_ingestor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
from scicat_logging import build_logger
88

99

10-
def quit(logger: logging.Logger) -> None:
10+
def quit(logger: logging.Logger, unexpected: bool = True) -> None:
1111
"""Log the message and exit the program."""
1212
import sys
1313

1414
logger.info("Exiting ingestor")
15-
sys.exit()
15+
sys.exit(1 if unexpected else 0)
1616

1717

1818
def main() -> None:

src/scicat_kafka.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,43 @@ def collect_consumer_options(options: kafkaOptions) -> dict:
1313

1414
# Build logger and formatter
1515
config_dict = {
16-
key.replace('_', '.'): value
16+
key.replace("_", "."): value
1717
for key, value in asdict(options).items()
18-
if key not in ('topics', 'individual_message_commit')
18+
if key not in ("topics", "individual_message_commit")
1919
}
20-
config_dict['enable.auto.commit'] = (
20+
config_dict["enable.auto.commit"] = (
2121
not options.individual_message_commit
2222
) and options.enable_auto_commit
23+
if isinstance(bootstrap_servers := options.bootstrap_servers, list):
24+
# Convert the list to a comma-separated string
25+
config_dict["bootstrap.servers"] = ",".join(bootstrap_servers)
26+
else:
27+
config_dict["bootstrap.servers"] = bootstrap_servers
28+
2329
return config_dict
2430

2531

2632
def collect_kafka_topics(options: kafkaOptions) -> list[str]:
2733
"""Return the Kafka topics as a list."""
2834
if isinstance(options.topics, str):
29-
return options.topics.split(',')
35+
return options.topics.split(",")
3036
elif isinstance(options.topics, list):
3137
return options.topics
3238
else:
33-
raise TypeError('The topics must be a list or a comma-separated string.')
39+
raise TypeError("The topics must be a list or a comma-separated string.")
3440

3541

3642
def build_consumer(kafka_options: kafkaOptions, logger: logging.Logger) -> Consumer:
3743
"""Build a Kafka consumer and configure it according to the ``options``."""
3844
consumer_options = collect_consumer_options(kafka_options)
39-
logger.info('Connecting to Kafka with the following parameters:')
45+
logger.info("Connecting to Kafka with the following parameters:")
4046
logger.info(consumer_options)
4147
consumer = Consumer(consumer_options)
4248
if not validate_consumer(consumer, logger):
4349
return None
4450

4551
kafka_topics = collect_kafka_topics(kafka_options)
46-
logger.info(f'Subscribing to the following Kafka topics: {kafka_topics}')
52+
logger.info(f"Subscribing to the following Kafka topics: {kafka_topics}")
4753
consumer.subscribe(kafka_topics)
4854
return Consumer(consumer_options)
4955

@@ -58,5 +64,5 @@ def validate_consumer(consumer: Consumer, logger: logging.Logger) -> bool:
5864
)
5965
return False
6066
else:
61-
logger.info('Kafka consumer successfully instantiated')
67+
logger.info("Kafka consumer successfully instantiated")
6268
return True

tests/docker-compose-file-writer.yml

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
version: "3.5"
2+
3+
services:
4+
# Kafka and file-writer services are copied from
5+
# https://gitlab.esss.lu.se/ecdc/ess-dmsc/kafka-to-nexus/-/blob/main/integration-tests/docker-compose.yml
6+
# Currently github-ci fails to run the original docker-compose.yml file in the ecdc repository
7+
# so we copied and modified the file here.
8+
kafka:
9+
container_name: file-writer-kafka
10+
hostname: file-writer-kafka
11+
image: confluentinc/cp-kafka:7.4.3
12+
deploy:
13+
resources:
14+
limits:
15+
memory: 600M
16+
restart: always
17+
depends_on:
18+
- zookeeper
19+
ports:
20+
- "9093:9093"
21+
networks:
22+
- frontend
23+
environment:
24+
KAFKA_ZOOKEEPER_CONNECT: file-writer-zookeeper:2181
25+
KAFKA_BROKER_ID: 0
26+
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
27+
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
28+
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
29+
KAFKA_MESSAGE_MAX_BYTES: 300000000
30+
KAFKA_SOCKET_REQUEST_MAX_BYTES: 300000000
31+
KAFKA_REPLICA_FETCH_MAX_BYTES: 300000000
32+
KAFKA_LOG_RETENTION_MS: -1 # keep data forever, required for tests involving fake "historical" data
33+
## listeners
34+
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:9093
35+
KAFKA_ADVERTISED_LISTENERS: INSIDE://file-writer-kafka:9092,OUTSIDE://localhost:9093
36+
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
37+
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
38+
healthcheck:
39+
test: ["CMD", "kafka-topics", "--bootstrap-server", "localhost:9092", "--list"]
40+
interval: 5s
41+
timeout: 5s
42+
retries: 5
43+
start_period: 10s
44+
45+
zookeeper:
46+
container_name: file-writer-zookeeper
47+
hostname: file-writer-zookeeper
48+
image: confluentinc/cp-zookeeper:7.4.3
49+
deploy:
50+
resources:
51+
limits:
52+
memory: 200M
53+
restart: always
54+
environment:
55+
ZOOKEEPER_CLIENT_PORT: 2181
56+
ZOOKEEPER_TICK_TIME: 2000
57+
networks:
58+
- frontend
59+
60+
filewriter:
61+
container_name: file-writer-file-writer
62+
image: registry.esss.lu.se/ecdc/ess-dmsc/docker-centos7-build-node:latest
63+
depends_on:
64+
kafka:
65+
condition: service_healthy
66+
tty: true
67+
networks:
68+
- frontend
69+
70+
networks:
71+
frontend:

0 commit comments

Comments
 (0)