diff --git a/.gitignore b/.gitignore index 74ee902..d896434 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ __pycache__/ *.cif *.rcif *.ort + +# User configuration +config.*.json diff --git a/README.md b/README.md index 8c43ec0..0318437 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,14 @@ ## About A daemon that creates a raw dataset using scicat interface whenever a new file is written by a file-writer. + +## Configuration + +You can use a json file to configure options. +There is a template, ``resources/config.sample.json`` you can copy/paste to make your own configuration file. + +```bash +cp resources/config.sample.json config.20240405.json +``` + +Then ``scicat_ingestor`` will automatically use the configuration file. diff --git a/config.20240405.json b/config.20240405.json new file mode 100644 index 0000000..aec802d --- /dev/null +++ b/config.20240405.json @@ -0,0 +1,43 @@ +{ + "kafka": { + "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], + "group_id": "GROUP_ID", + "bootstrap_servers": [ + "HOST:9092" + ], + "enable_auto_commit": true, + "auto_offset_reset": "earliest" + }, + "user_office": { + "host": "https://useroffice.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "scicat": { + "host": "https://scicat.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "dataset": { + "instrument_id" : "", + "instrument" : "INSTRUMENT_NAME", + "default_proposal_id" : "714781", + "ownable" : { + "ownerGroup": "ess", + "accessGroups": ["ymir","swap"] + } + }, + "options": { + "config_file" : "config.json", + "verbose" : false, + "file_log" : false, + "log_file_suffix" : ".scicat_ingestor_log", + "file_log_timestamp" : false, + "log_level" : "INFO", + "system_log" : false, + "system_log_facility" : "mail", + "log_prefix" : " SFI: ", + "check_by_job_id" : true, + "pyscicat": null + } +} diff --git a/pyproject.toml b/pyproject.toml index 9478972..acc4d5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dynamic = ["version"] "Source" = "https://github.com/ScicatProject/scicat-filewriter-ingest" [project.scripts] -scicat-filewriter-ingest = "scicat_filewriter_ingest:main" +scicat_ingestor = "scicat_ingestor:main" [tool.setuptools_scm] diff --git a/resources/config.sample.json b/resources/config.sample.json new file mode 100644 index 0000000..aec802d --- /dev/null +++ b/resources/config.sample.json @@ -0,0 +1,43 @@ +{ + "kafka": { + "topics": ["KAFKA_TOPIC_1","KAFKA_TOPIC_2"], + "group_id": "GROUP_ID", + "bootstrap_servers": [ + "HOST:9092" + ], + "enable_auto_commit": true, + "auto_offset_reset": "earliest" + }, + "user_office": { + "host": "https://useroffice.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "scicat": { + "host": "https://scicat.host", + "username": "USERNAME", + "password": "PASSWORD" + }, + "dataset": { + "instrument_id" : "", + "instrument" : "INSTRUMENT_NAME", + "default_proposal_id" : "714781", + "ownable" : { + "ownerGroup": "ess", + "accessGroups": ["ymir","swap"] + } + }, + "options": { + "config_file" : "config.json", + "verbose" : false, + "file_log" : false, + "log_file_suffix" : ".scicat_ingestor_log", + "file_log_timestamp" : false, + "log_level" : "INFO", + "system_log" : false, + "system_log_facility" : "mail", + "log_prefix" : " SFI: ", + "check_by_job_id" : true, + "pyscicat": null + } +} diff --git a/src/scicat_configuration.py b/src/scicat_configuration.py new file mode 100644 index 0000000..958798b --- /dev/null +++ b/src/scicat_configuration.py @@ -0,0 +1,143 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import argparse +from dataclasses import dataclass +from typing import Mapping, Optional + + +def build_main_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + + group = parser.add_argument_group('Scicat Ingestor Options') + + group.add_argument( + '-c', + '--cf', + '--config', + '--config-file', + default='config.20240405.json', + dest='config_file', + help='Configuration file name. Default: config.20240405.json', + type=str, + ) + group.add_argument( + '-v', + '--verbose', + dest='verbose', + help='Provide logging on stdout', + action='store_true', + default=False, + ) + group.add_argument( + '--file-log', + dest='file_log', + help='Provide logging on file', + action='store_true', + default=False, + ) + group.add_argument( + '--log-file-suffix', + dest='log_file_suffix', + help='Suffix of the log file name', + default='.scicat_ingestor_log', + ) + group.add_argument( + '--file-log-timestamp', + dest='file_log_timestamp', + help='Provide logging on the system log', + action='store_true', + default=False, + ) + group.add_argument( + '--system-log', + dest='system_log', + help='Provide logging on the system log', + action='store_true', + default=False, + ) + group.add_argument( + '--system-log-facility', + dest='system_log_facility', + help='Facility for system log', + default='mail', + ) + group.add_argument( + '--log-prefix', + dest='log_prefix', + help='Prefix for log messages', + default=' SFI: ', + ) + group.add_argument( + '--log-level', dest='log_level', help='Logging level', default='INFO', type=str + ) + group.add_argument( + '--check-by-job-id', + dest='check_by_job_id', + help='Check the status of a job by job_id', + action='store_true', + default=True, + ) + group.add_argument( + '--pyscicat', + dest='pyscicat', + help='Location where a specific version of pyscicat is available', + default=None, + type=str, + ) + return parser + + +@dataclass +class RunOptions: + config_file: str + verbose: bool + file_log: bool + log_file_suffix: str + file_log_timestamp: bool + system_log: bool + system_log_facility: str + log_prefix: str + log_level: str + check_by_job_id: bool + pyscicat: Optional[str] = None + + +@dataclass +class ScicatConfig: + original_dict: Mapping + """Original configuration dictionary in the json file.""" + run_options: RunOptions + """Merged configuration dictionary with command line arguments.""" + + +def build_scicat_config(input_args: argparse.Namespace) -> ScicatConfig: + """Merge configuration from the configuration file and input arguments.""" + import copy + import json + import pathlib + from types import MappingProxyType + + # Read configuration file + if ( + input_args.config_file + and (config_file_path := pathlib.Path(input_args.config_file)).is_file() + ): + config_dict = json.loads(config_file_path.read_text()) + else: + config_dict = dict() + + # Overwrite deep-copied options with command line arguments + run_option_dict: dict = copy.deepcopy(config_dict.setdefault('options', dict())) + for arg_name, arg_value in vars(input_args).items(): + if arg_value is not None: + run_option_dict[arg_name] = arg_value + + # Protect original configuration by making it read-only + for key, value in config_dict.items(): + config_dict[key] = MappingProxyType(value) + + # Wrap configuration in a dataclass + return ScicatConfig( + original_dict=MappingProxyType(config_dict), + run_options=RunOptions(**run_option_dict), + ) diff --git a/src/scicat_filewriter_ingest.py b/src/scicat_filewriter_ingest.py deleted file mode 100644 index 13aa104..0000000 --- a/src/scicat_filewriter_ingest.py +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: BSD-3-Clause -# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) - - -def main() -> None: - """Main entry point of the app.""" - ... diff --git a/src/scicat_ingestor.py b/src/scicat_ingestor.py new file mode 100644 index 0000000..b07e2a8 --- /dev/null +++ b/src/scicat_ingestor.py @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +from scicat_configuration import build_main_arg_parser, build_scicat_config + + +def main() -> None: + """Main entry point of the app.""" + arg_parser = build_main_arg_parser() + arg_namespace = arg_parser.parse_args() + config = build_scicat_config(arg_namespace) + print(config) diff --git a/tests/minimum_test.py b/tests/minimum_test.py index 790739b..9b65be5 100644 --- a/tests/minimum_test.py +++ b/tests/minimum_test.py @@ -1,2 +1,2 @@ def test_package() -> None: - import scicat_filewriter_ingest # noqa: F401 + import scicat_ingestor # noqa: F401 diff --git a/tests/test_scicat_configuration.py b/tests/test_scicat_configuration.py new file mode 100644 index 0000000..193ac8e --- /dev/null +++ b/tests/test_scicat_configuration.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject) +import argparse + +import pytest + +from scicat_configuration import ScicatConfig + + +@pytest.fixture +def main_arg_parser() -> argparse.ArgumentParser: + """Return the namespace of the main argument parser.""" + from scicat_configuration import build_main_arg_parser + + return build_main_arg_parser() + + +def test_scicat_arg_parser_configuration_matches( + main_arg_parser: argparse.ArgumentParser, +) -> None: + """Test if options in the configuration file matches the argument parser.""" + import json + import pathlib + + scicat_namespace = main_arg_parser.parse_args( + ['-c', 'resources/config.sample.json'] + ) + + # Check if the configuration file is the same + assert scicat_namespace.config_file == 'resources/config.sample.json' + config_path = pathlib.Path(scicat_namespace.config_file) + config_from_args: dict = vars(scicat_namespace) + + # Parse the configuration file + assert config_path.exists() + config_from_file: dict = json.loads(config_path.read_text()) + main_options: dict = config_from_file.get('options', dict()) + + # Check if all keys matches + all_keys = set(config_from_args.keys()).union(main_options.keys()) + for key in all_keys: + assert key in config_from_args + assert key in main_options + + +def test_build_scicat_config_default(main_arg_parser: argparse.ArgumentParser) -> None: + """Test if the configuration can be built from default arguments.""" + from scicat_configuration import build_scicat_config + + scicat_namespace = main_arg_parser.parse_args() + scicat_config = build_scicat_config(scicat_namespace) + assert scicat_config.run_options.config_file == 'config.20240405.json' + + +@pytest.fixture +def scicat_config(main_arg_parser: argparse.ArgumentParser) -> ScicatConfig: + from scicat_configuration import build_scicat_config + + scicat_namespace = main_arg_parser.parse_args( + ['-c', 'resources/config.sample.json', '--verbose'] + ) + return build_scicat_config(scicat_namespace) + + +def test_build_scicat_config(scicat_config: ScicatConfig) -> None: + """Test if the configuration can be built from arguments.""" + assert scicat_config.original_dict['options']['config_file'] == 'config.json' + assert scicat_config.run_options.config_file == 'resources/config.sample.json' + assert not scicat_config.original_dict['options']['verbose'] + assert scicat_config.run_options.verbose + + +def test_scicat_config_original_dict_read_only(scicat_config: ScicatConfig) -> None: + """Test if the original dictionary is read-only.""" + from types import MappingProxyType + + assert isinstance(scicat_config.original_dict, MappingProxyType) + for sub_option in scicat_config.original_dict.values(): + assert isinstance(sub_option, MappingProxyType) diff --git a/tox.ini b/tox.ini index 731c297..dff601d 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,7 @@ deps = -r requirements/test.txt setenv = JUPYTER_PLATFORM_DIRS = 1 commands = pytest {posargs} + scicat_ingestor --help # Minimal test of the script [testenv:nightly] deps = -r requirements/nightly.txt