Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor configuration parsing option and add configuration structure using dataclass. #9

Merged
merged 7 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ __pycache__/
*.cif
*.rcif
*.ort

# User configuration
config.*.json
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,14 @@
## About

A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer.

## Configuration

You can use a json file to configure options.
There is a template, ``resources/config.sample.json`` you can copy/paste to make your own configuration file.

```bash
cp resources/config.sample.json config.20240405.json
```

Then ``scicat_ingestor`` will automatically use the configuration file.
43 changes: 43 additions & 0 deletions config.20240405.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"kafka": {
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
"group_id": "GROUP_ID",
"bootstrap_servers": [
"HOST:9092"
],
"enable_auto_commit": true,
"auto_offset_reset": "earliest"
},
"user_office": {
"host": "https://useroffice.host",
"username": "USERNAME",
"password": "PASSWORD"
},
"scicat": {
"host": "https://scicat.host",
"username": "USERNAME",
"password": "PASSWORD"
},
"dataset": {
"instrument_id" : "",
"instrument" : "INSTRUMENT_NAME",
"default_proposal_id" : "714781",
"ownable" : {
"ownerGroup": "ess",
"accessGroups": ["ymir","swap"]
}
},
"options": {
"config_file" : "config.json",
"verbose" : false,
"file_log" : false,
"log_file_suffix" : ".scicat_ingestor_log",
"file_log_timestamp" : false,
"log_level" : "INFO",
"system_log" : false,
"system_log_facility" : "mail",
"log_prefix" : " SFI: ",
"check_by_job_id" : true,
"pyscicat": null
}
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dynamic = ["version"]
"Source" = "https://github.com/ScicatProject/scicat-filewriter-ingest"

[project.scripts]
scicat-filewriter-ingest = "scicat_filewriter_ingest:main"
scicat_ingestor = "scicat_ingestor:main"

[tool.setuptools_scm]

Expand Down
43 changes: 43 additions & 0 deletions resources/config.sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"kafka": {
"topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"],
"group_id": "GROUP_ID",
"bootstrap_servers": [
"HOST:9092"
],
"enable_auto_commit": true,
"auto_offset_reset": "earliest"
},
"user_office": {
"host": "https://useroffice.host",
"username": "USERNAME",
"password": "PASSWORD"
},
"scicat": {
"host": "https://scicat.host",
"username": "USERNAME",
"password": "PASSWORD"
},
"dataset": {
"instrument_id" : "",
"instrument" : "INSTRUMENT_NAME",
"default_proposal_id" : "714781",
"ownable" : {
"ownerGroup": "ess",
"accessGroups": ["ymir","swap"]
}
},
"options": {
"config_file" : "config.json",
"verbose" : false,
"file_log" : false,
"log_file_suffix" : ".scicat_ingestor_log",
"file_log_timestamp" : false,
"log_level" : "INFO",
"system_log" : false,
"system_log_facility" : "mail",
"log_prefix" : " SFI: ",
"check_by_job_id" : true,
"pyscicat": null
}
}
143 changes: 143 additions & 0 deletions src/scicat_configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
import argparse
from dataclasses import dataclass
from typing import Mapping, Optional


def build_main_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()

group = parser.add_argument_group('Scicat Ingestor Options')

group.add_argument(
'-c',
'--cf',
'--config',
'--config-file',
default='config.20240405.json',
dest='config_file',
help='Configuration file name. Default: config.20240405.json',
type=str,
)
group.add_argument(
'-v',
'--verbose',
dest='verbose',
help='Provide logging on stdout',
action='store_true',
default=False,
)
group.add_argument(
'--file-log',
dest='file_log',
help='Provide logging on file',
action='store_true',
default=False,
)
group.add_argument(
'--log-file-suffix',
dest='log_file_suffix',
help='Suffix of the log file name',
default='.scicat_ingestor_log',
)
group.add_argument(
'--file-log-timestamp',
dest='file_log_timestamp',
help='Provide logging on the system log',
action='store_true',
default=False,
)
group.add_argument(
'--system-log',
dest='system_log',
help='Provide logging on the system log',
action='store_true',
default=False,
)
group.add_argument(
'--system-log-facility',
dest='system_log_facility',
help='Facility for system log',
default='mail',
)
group.add_argument(
'--log-prefix',
dest='log_prefix',
help='Prefix for log messages',
default=' SFI: ',
)
group.add_argument(
'--log-level', dest='log_level', help='Logging level', default='INFO', type=str
)
group.add_argument(
'--check-by-job-id',
dest='check_by_job_id',
help='Check the status of a job by job_id',
action='store_true',
default=True,
)
group.add_argument(
'--pyscicat',
dest='pyscicat',
help='Location where a specific version of pyscicat is available',
default=None,
type=str,
)
return parser


@dataclass
class RunOptions:
config_file: str
verbose: bool
file_log: bool
log_file_suffix: str
file_log_timestamp: bool
system_log: bool
system_log_facility: str
log_prefix: str
log_level: str
check_by_job_id: bool
pyscicat: Optional[str] = None


@dataclass
class ScicatConfig:
original_dict: Mapping
"""Original configuration dictionary in the json file."""
run_options: RunOptions
"""Merged configuration dictionary with command line arguments."""


def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig:
"""Merge configuration from the configuration file and input arguments."""
import copy
import json
import pathlib
from types import MappingProxyType

# Read configuration file
if (
input_args.config_file
and (config_file_path := pathlib.Path(input_args.config_file)).is_file()
):
config_dict = json.loads(config_file_path.read_text())
else:
config_dict = dict()

# Overwrite deep-copied options with command line arguments
run_option_dict: dict = copy.deepcopy(config_dict.setdefault('options', dict()))
for arg_name, arg_value in vars(input_args).items():
if arg_value is not None:
run_option_dict[arg_name] = arg_value

# Protect original configuration by making it read-only
for key, value in config_dict.items():
config_dict[key] = MappingProxyType(value)

# Wrap configuration in a dataclass
return ScicatConfig(
original_dict=MappingProxyType(config_dict),
run_options=RunOptions(**run_option_dict),
)
7 changes: 0 additions & 7 deletions src/scicat_filewriter_ingest.py

This file was deleted.

11 changes: 11 additions & 0 deletions src/scicat_ingestor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
from scicat_configuration import build_main_arg_parser, build_scicat_config


def main() -> None:
"""Main entry point of the app."""
arg_parser = build_main_arg_parser()
arg_namespace = arg_parser.parse_args()
config = build_scicat_config(arg_namespace)
print(config)
2 changes: 1 addition & 1 deletion tests/minimum_test.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
def test_package() -> None:
import scicat_filewriter_ingest # noqa: F401
import scicat_ingestor # noqa: F401
79 changes: 79 additions & 0 deletions tests/test_scicat_configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
import argparse

import pytest

from scicat_configuration import ScicatConfig


@pytest.fixture
def main_arg_parser() -> argparse.ArgumentParser:
"""Return the namespace of the main argument parser."""
from scicat_configuration import build_main_arg_parser

return build_main_arg_parser()


def test_scicat_arg_parser_configuration_matches(
main_arg_parser: argparse.ArgumentParser,
) -> None:
"""Test if options in the configuration file matches the argument parser."""
import json
import pathlib

scicat_namespace = main_arg_parser.parse_args(
['-c', 'resources/config.sample.json']
)

# Check if the configuration file is the same
assert scicat_namespace.config_file == 'resources/config.sample.json'
config_path = pathlib.Path(scicat_namespace.config_file)
config_from_args: dict = vars(scicat_namespace)

# Parse the configuration file
assert config_path.exists()
config_from_file: dict = json.loads(config_path.read_text())
main_options: dict = config_from_file.get('options', dict())

# Check if all keys matches
all_keys = set(config_from_args.keys()).union(main_options.keys())
for key in all_keys:
assert key in config_from_args
assert key in main_options


def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) -> None:
"""Test if the configuration can be built from default arguments."""
from scicat_configuration import build_scicat_config

scicat_namespace = main_arg_parser.parse_args()
scicat_config = build_scicat_config(scicat_namespace)
assert scicat_config.run_options.config_file == 'config.20240405.json'


@pytest.fixture
def scicat_config(main_arg_parser: argparse.ArgumentParser) -> ScicatConfig:
from scicat_configuration import build_scicat_config

scicat_namespace = main_arg_parser.parse_args(
['-c', 'resources/config.sample.json', '--verbose']
)
return build_scicat_config(scicat_namespace)


def test_build_scicat_config(scicat_config: ScicatConfig) -> None:
"""Test if the configuration can be built from arguments."""
assert scicat_config.original_dict['options']['config_file'] == 'config.json'
assert scicat_config.run_options.config_file == 'resources/config.sample.json'
assert not scicat_config.original_dict['options']['verbose']
assert scicat_config.run_options.verbose


def test_scicat_config_original_dict_read_only(scicat_config: ScicatConfig) -> None:
"""Test if the original dictionary is read-only."""
from types import MappingProxyType

assert isinstance(scicat_config.original_dict, MappingProxyType)
for sub_option in scicat_config.original_dict.values():
assert isinstance(sub_option, MappingProxyType)
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ deps = -r requirements/test.txt
setenv =
JUPYTER_PLATFORM_DIRS = 1
commands = pytest {posargs}
scicat_ingestor --help # Minimal test of the script

[testenv:nightly]
deps = -r requirements/nightly.txt
Expand Down