Skip to content

Commit 2f1cbee

Browse files
authored
Merge pull request #9 from SciCatProject/configuration
Refactor configuration parsing option and add configuration structure using dataclass.
2 parents cfbf9b0 + 59dc09f commit 2f1cbee

11 files changed

+336
-9
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,6 @@ __pycache__/
3333
*.cif
3434
*.rcif
3535
*.ort
36+
37+
# User configuration
38+
config.*.json

README.md

+11
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,14 @@
55
## About
66

77
A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer.
8+
9+
## Configuration
10+
11+
You can use a json file to configure options.
12+
There is a template, ``resources/config.sample.json`` you can copy/paste to make your own configuration file.
13+
14+
```bash
15+
cp resources/config.sample.json config.20240405.json
16+
```
17+
18+
Then ``scicat_ingestor`` will automatically use the configuration file.

config.20240405.json

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"kafka": {
3+
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
4+
"group_id": "GROUP_ID",
5+
"bootstrap_servers": [
6+
"HOST:9092"
7+
],
8+
"enable_auto_commit": true,
9+
"auto_offset_reset": "earliest"
10+
},
11+
"user_office": {
12+
"host": "https://useroffice.host",
13+
"username": "USERNAME",
14+
"password": "PASSWORD"
15+
},
16+
"scicat": {
17+
"host": "https://scicat.host",
18+
"username": "USERNAME",
19+
"password": "PASSWORD"
20+
},
21+
"dataset": {
22+
"instrument_id" : "",
23+
"instrument" : "INSTRUMENT_NAME",
24+
"default_proposal_id" : "714781",
25+
"ownable" : {
26+
"ownerGroup": "ess",
27+
"accessGroups": ["ymir","swap"]
28+
}
29+
},
30+
"options": {
31+
"config_file" : "config.json",
32+
"verbose" : false,
33+
"file_log" : false,
34+
"log_file_suffix" : ".scicat_ingestor_log",
35+
"file_log_timestamp" : false,
36+
"log_level" : "INFO",
37+
"system_log" : false,
38+
"system_log_facility" : "mail",
39+
"log_prefix" : " SFI: ",
40+
"check_by_job_id" : true,
41+
"pyscicat": null
42+
}
43+
}

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dynamic = ["version"]
4141
"Source" = "https://github.com/ScicatProject/scicat-filewriter-ingest"
4242

4343
[project.scripts]
44-
scicat-filewriter-ingest = "scicat_filewriter_ingest:main"
44+
scicat_ingestor = "scicat_ingestor:main"
4545

4646
[tool.setuptools_scm]
4747

resources/config.sample.json

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"kafka": {
3+
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
4+
"group_id": "GROUP_ID",
5+
"bootstrap_servers": [
6+
"HOST:9092"
7+
],
8+
"enable_auto_commit": true,
9+
"auto_offset_reset": "earliest"
10+
},
11+
"user_office": {
12+
"host": "https://useroffice.host",
13+
"username": "USERNAME",
14+
"password": "PASSWORD"
15+
},
16+
"scicat": {
17+
"host": "https://scicat.host",
18+
"username": "USERNAME",
19+
"password": "PASSWORD"
20+
},
21+
"dataset": {
22+
"instrument_id" : "",
23+
"instrument" : "INSTRUMENT_NAME",
24+
"default_proposal_id" : "714781",
25+
"ownable" : {
26+
"ownerGroup": "ess",
27+
"accessGroups": ["ymir","swap"]
28+
}
29+
},
30+
"options": {
31+
"config_file" : "config.json",
32+
"verbose" : false,
33+
"file_log" : false,
34+
"log_file_suffix" : ".scicat_ingestor_log",
35+
"file_log_timestamp" : false,
36+
"log_level" : "INFO",
37+
"system_log" : false,
38+
"system_log_facility" : "mail",
39+
"log_prefix" : " SFI: ",
40+
"check_by_job_id" : true,
41+
"pyscicat": null
42+
}
43+
}

src/scicat_configuration.py

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
3+
import argparse
4+
from dataclasses import dataclass
5+
from typing import Mapping, Optional
6+
7+
8+
def build_main_arg_parser() -> argparse.ArgumentParser:
9+
parser = argparse.ArgumentParser()
10+
11+
group = parser.add_argument_group('Scicat Ingestor Options')
12+
13+
group.add_argument(
14+
'-c',
15+
'--cf',
16+
'--config',
17+
'--config-file',
18+
default='config.20240405.json',
19+
dest='config_file',
20+
help='Configuration file name. Default: config.20240405.json',
21+
type=str,
22+
)
23+
group.add_argument(
24+
'-v',
25+
'--verbose',
26+
dest='verbose',
27+
help='Provide logging on stdout',
28+
action='store_true',
29+
default=False,
30+
)
31+
group.add_argument(
32+
'--file-log',
33+
dest='file_log',
34+
help='Provide logging on file',
35+
action='store_true',
36+
default=False,
37+
)
38+
group.add_argument(
39+
'--log-file-suffix',
40+
dest='log_file_suffix',
41+
help='Suffix of the log file name',
42+
default='.scicat_ingestor_log',
43+
)
44+
group.add_argument(
45+
'--file-log-timestamp',
46+
dest='file_log_timestamp',
47+
help='Provide logging on the system log',
48+
action='store_true',
49+
default=False,
50+
)
51+
group.add_argument(
52+
'--system-log',
53+
dest='system_log',
54+
help='Provide logging on the system log',
55+
action='store_true',
56+
default=False,
57+
)
58+
group.add_argument(
59+
'--system-log-facility',
60+
dest='system_log_facility',
61+
help='Facility for system log',
62+
default='mail',
63+
)
64+
group.add_argument(
65+
'--log-prefix',
66+
dest='log_prefix',
67+
help='Prefix for log messages',
68+
default=' SFI: ',
69+
)
70+
group.add_argument(
71+
'--log-level', dest='log_level', help='Logging level', default='INFO', type=str
72+
)
73+
group.add_argument(
74+
'--check-by-job-id',
75+
dest='check_by_job_id',
76+
help='Check the status of a job by job_id',
77+
action='store_true',
78+
default=True,
79+
)
80+
group.add_argument(
81+
'--pyscicat',
82+
dest='pyscicat',
83+
help='Location where a specific version of pyscicat is available',
84+
default=None,
85+
type=str,
86+
)
87+
return parser
88+
89+
90+
@dataclass
91+
class RunOptions:
92+
config_file: str
93+
verbose: bool
94+
file_log: bool
95+
log_file_suffix: str
96+
file_log_timestamp: bool
97+
system_log: bool
98+
system_log_facility: str
99+
log_prefix: str
100+
log_level: str
101+
check_by_job_id: bool
102+
pyscicat: Optional[str] = None
103+
104+
105+
@dataclass
106+
class ScicatConfig:
107+
original_dict: Mapping
108+
"""Original configuration dictionary in the json file."""
109+
run_options: RunOptions
110+
"""Merged configuration dictionary with command line arguments."""
111+
112+
113+
def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig:
114+
"""Merge configuration from the configuration file and input arguments."""
115+
import copy
116+
import json
117+
import pathlib
118+
from types import MappingProxyType
119+
120+
# Read configuration file
121+
if (
122+
input_args.config_file
123+
and (config_file_path := pathlib.Path(input_args.config_file)).is_file()
124+
):
125+
config_dict = json.loads(config_file_path.read_text())
126+
else:
127+
config_dict = dict()
128+
129+
# Overwrite deep-copied options with command line arguments
130+
run_option_dict: dict = copy.deepcopy(config_dict.setdefault('options', dict()))
131+
for arg_name, arg_value in vars(input_args).items():
132+
if arg_value is not None:
133+
run_option_dict[arg_name] = arg_value
134+
135+
# Protect original configuration by making it read-only
136+
for key, value in config_dict.items():
137+
config_dict[key] = MappingProxyType(value)
138+
139+
# Wrap configuration in a dataclass
140+
return ScicatConfig(
141+
original_dict=MappingProxyType(config_dict),
142+
run_options=RunOptions(**run_option_dict),
143+
)

src/scicat_filewriter_ingest.py

-7
This file was deleted.

src/scicat_ingestor.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
3+
from scicat_configuration import build_main_arg_parser, build_scicat_config
4+
5+
6+
def main() -> None:
7+
"""Main entry point of the app."""
8+
arg_parser = build_main_arg_parser()
9+
arg_namespace = arg_parser.parse_args()
10+
config = build_scicat_config(arg_namespace)
11+
print(config)

tests/minimum_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
def test_package() -> None:
2-
import scicat_filewriter_ingest # noqa: F401
2+
import scicat_ingestor # noqa: F401

tests/test_scicat_configuration.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
3+
import argparse
4+
5+
import pytest
6+
7+
from scicat_configuration import ScicatConfig
8+
9+
10+
@pytest.fixture
11+
def main_arg_parser() -> argparse.ArgumentParser:
12+
"""Return the namespace of the main argument parser."""
13+
from scicat_configuration import build_main_arg_parser
14+
15+
return build_main_arg_parser()
16+
17+
18+
def test_scicat_arg_parser_configuration_matches(
19+
main_arg_parser: argparse.ArgumentParser,
20+
) -> None:
21+
"""Test if options in the configuration file matches the argument parser."""
22+
import json
23+
import pathlib
24+
25+
scicat_namespace = main_arg_parser.parse_args(
26+
['-c', 'resources/config.sample.json']
27+
)
28+
29+
# Check if the configuration file is the same
30+
assert scicat_namespace.config_file == 'resources/config.sample.json'
31+
config_path = pathlib.Path(scicat_namespace.config_file)
32+
config_from_args: dict = vars(scicat_namespace)
33+
34+
# Parse the configuration file
35+
assert config_path.exists()
36+
config_from_file: dict = json.loads(config_path.read_text())
37+
main_options: dict = config_from_file.get('options', dict())
38+
39+
# Check if all keys matches
40+
all_keys = set(config_from_args.keys()).union(main_options.keys())
41+
for key in all_keys:
42+
assert key in config_from_args
43+
assert key in main_options
44+
45+
46+
def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) -> None:
47+
"""Test if the configuration can be built from default arguments."""
48+
from scicat_configuration import build_scicat_config
49+
50+
scicat_namespace = main_arg_parser.parse_args()
51+
scicat_config = build_scicat_config(scicat_namespace)
52+
assert scicat_config.run_options.config_file == 'config.20240405.json'
53+
54+
55+
@pytest.fixture
56+
def scicat_config(main_arg_parser: argparse.ArgumentParser) -> ScicatConfig:
57+
from scicat_configuration import build_scicat_config
58+
59+
scicat_namespace = main_arg_parser.parse_args(
60+
['-c', 'resources/config.sample.json', '--verbose']
61+
)
62+
return build_scicat_config(scicat_namespace)
63+
64+
65+
def test_build_scicat_config(scicat_config: ScicatConfig) -> None:
66+
"""Test if the configuration can be built from arguments."""
67+
assert scicat_config.original_dict['options']['config_file'] == 'config.json'
68+
assert scicat_config.run_options.config_file == 'resources/config.sample.json'
69+
assert not scicat_config.original_dict['options']['verbose']
70+
assert scicat_config.run_options.verbose
71+
72+
73+
def test_scicat_config_original_dict_read_only(scicat_config: ScicatConfig) -> None:
74+
"""Test if the original dictionary is read-only."""
75+
from types import MappingProxyType
76+
77+
assert isinstance(scicat_config.original_dict, MappingProxyType)
78+
for sub_option in scicat_config.original_dict.values():
79+
assert isinstance(sub_option, MappingProxyType)

tox.ini

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ deps = -r requirements/test.txt
77
setenv =
88
JUPYTER_PLATFORM_DIRS = 1
99
commands = pytest {posargs}
10+
scicat_ingestor --help # Minimal test of the script
1011

1112
[testenv:nightly]
1213
deps = -r requirements/nightly.txt

0 commit comments

Comments
 (0)