2
2
# Copyright (c) 2024 ScicatProject contributors (https://github.com/ScicatProject)
3
3
import argparse
4
4
from collections .abc import Mapping
5
- from dataclasses import dataclass
5
+ from dataclasses import asdict , dataclass
6
+ from types import MappingProxyType
7
+ from typing import Any
8
+
9
+
10
+ def _load_config (config_file : Any ) -> dict :
11
+ """Load configuration from the configuration file path."""
12
+ import json
13
+ import pathlib
14
+
15
+ if (
16
+ isinstance (config_file , str | pathlib .Path )
17
+ and (config_file_path := pathlib .Path (config_file )).is_file ()
18
+ ):
19
+ return json .loads (config_file_path .read_text ())
20
+ return {}
21
+
22
+
23
+ def _merge_run_options (config_dict : dict , input_args_dict : dict ) -> dict :
24
+ """Merge configuration from the configuration file and input arguments."""
25
+ import copy
26
+
27
+ # Overwrite deep-copied options with command line arguments
28
+ run_option_dict : dict = copy .deepcopy (config_dict .setdefault ("options" , {}))
29
+ for arg_name , arg_value in input_args_dict .items ():
30
+ if arg_value is not None :
31
+ run_option_dict [arg_name ] = arg_value
32
+
33
+ return run_option_dict
34
+
35
+
36
+ def _freeze_dict_items (d : dict ) -> MappingProxyType :
37
+ """Freeze the dictionary to make it read-only."""
38
+ return MappingProxyType (
39
+ {
40
+ key : MappingProxyType (value ) if isinstance (value , dict ) else value
41
+ for key , value in d .items ()
42
+ }
43
+ )
44
+
45
+
46
+ def _recursive_deepcopy (obj : Any ) -> dict :
47
+ """Recursively deep copy a dictionary."""
48
+ if not isinstance (obj , dict | MappingProxyType ):
49
+ return obj
50
+
51
+ copied = dict (obj )
52
+ for key , value in copied .items ():
53
+ if isinstance (value , Mapping | MappingProxyType ):
54
+ copied [key ] = _recursive_deepcopy (value )
55
+
56
+ return copied
6
57
7
58
8
59
def build_main_arg_parser () -> argparse .ArgumentParser :
@@ -96,7 +147,6 @@ def build_main_arg_parser() -> argparse.ArgumentParser:
96
147
97
148
def build_background_ingestor_arg_parser () -> argparse .ArgumentParser :
98
149
parser = build_main_arg_parser ()
99
-
100
150
group = parser .add_argument_group ('Scicat Background Ingestor Options' )
101
151
102
152
group .add_argument (
@@ -180,7 +230,7 @@ class kafkaOptions:
180
230
181
231
182
232
@dataclass
183
- class ScicatConfig :
233
+ class IngesterConfig :
184
234
original_dict : Mapping
185
235
"""Original configuration dictionary in the json file."""
186
236
run_options : RunOptions
@@ -192,50 +242,79 @@ class ScicatConfig:
192
242
193
243
def to_dict (self ) -> dict :
194
244
"""Return the configuration as a dictionary."""
195
- from dataclasses import asdict
196
-
197
- # Deep copy the original dictionary recursively
198
- original_dict = dict (self .original_dict )
199
- for key , value in original_dict .items ():
200
- if isinstance (value , Mapping ):
201
- original_dict [key ] = dict (value )
202
245
203
- copied = ScicatConfig (
204
- original_dict , self .run_options , self .kafka_options , self .graylog_options
246
+ return asdict (
247
+ IngesterConfig (
248
+ _recursive_deepcopy (
249
+ self .original_dict
250
+ ), # asdict does not support MappingProxyType
251
+ self .run_options ,
252
+ self .kafka_options ,
253
+ self .graylog_options ,
254
+ )
205
255
)
206
- return asdict (copied )
207
256
208
257
209
- def build_scicat_config (input_args : argparse .Namespace ) -> ScicatConfig :
258
+ def build_scicat_ingester_config (input_args : argparse .Namespace ) -> IngesterConfig :
210
259
"""Merge configuration from the configuration file and input arguments."""
211
- import copy
212
- import json
213
- import pathlib
214
- from types import MappingProxyType
260
+ config_dict = _load_config (input_args .config_file )
261
+ run_option_dict = _merge_run_options (config_dict , vars (input_args ))
215
262
216
- # Read configuration file
217
- if (
218
- input_args .config_file
219
- and (config_file_path := pathlib .Path (input_args .config_file )).is_file ()
220
- ):
221
- config_dict = json .loads (config_file_path .read_text ())
222
- else :
223
- config_dict = {}
263
+ # Wrap configuration in a dataclass
264
+ return IngesterConfig (
265
+ original_dict = _freeze_dict_items (config_dict ),
266
+ run_options = RunOptions (** run_option_dict ),
267
+ kafka_options = kafkaOptions (** config_dict .setdefault ("kafka" , {})),
268
+ graylog_options = GraylogOptions (** config_dict .setdefault ("graylog" , {})),
269
+ )
270
+
271
+
272
+ @dataclass
273
+ class SingleRunOptions :
274
+ nexus_file : str
275
+ """Full path of the input nexus file to be ingested."""
276
+ done_writing_message_file : str
277
+ """Full path of the done writing message file that match the ``nexus_file``."""
224
278
225
- # Overwrite deep-copied options with command line arguments
226
- run_option_dict : dict = copy .deepcopy (config_dict .setdefault ("options" , {}))
227
- for arg_name , arg_value in vars (input_args ).items ():
228
- if arg_value is not None :
229
- run_option_dict [arg_name ] = arg_value
230
279
231
- # Protect original configuration by making it read-only
232
- for key , value in config_dict .items ():
233
- config_dict [key ] = MappingProxyType (value )
280
+ @dataclass
281
+ class BackgroundIngestorConfig (IngesterConfig ):
282
+ single_run_options : SingleRunOptions
283
+ """Single run configuration options for background ingestor."""
284
+
285
+ def to_dict (self ) -> dict :
286
+ """Return the configuration as a dictionary."""
287
+
288
+ return asdict (
289
+ BackgroundIngestorConfig (
290
+ _recursive_deepcopy (
291
+ self .original_dict
292
+ ), # asdict does not support MappingProxyType
293
+ self .run_options ,
294
+ self .kafka_options ,
295
+ self .graylog_options ,
296
+ self .single_run_options ,
297
+ )
298
+ )
299
+
300
+
301
+ def build_scicat_background_ingester_config (
302
+ input_args : argparse .Namespace ,
303
+ ) -> BackgroundIngestorConfig :
304
+ """Merge configuration from the configuration file and input arguments."""
305
+ config_dict = _load_config (input_args .config_file )
306
+ input_args_dict = vars (input_args )
307
+ single_run_option_dict = {
308
+ "nexus_file" : input_args_dict .pop ("nexus_file" ),
309
+ "done_writing_message_file" : input_args_dict .pop ("done_writing_message_file" ),
310
+ }
311
+ run_option_dict = _merge_run_options (config_dict , input_args_dict )
234
312
235
313
# Wrap configuration in a dataclass
236
- return ScicatConfig (
237
- original_dict = MappingProxyType (config_dict ),
314
+ return BackgroundIngestorConfig (
315
+ original_dict = _freeze_dict_items (config_dict ),
238
316
run_options = RunOptions (** run_option_dict ),
239
317
kafka_options = kafkaOptions (** config_dict .setdefault ("kafka" , {})),
318
+ single_run_options = SingleRunOptions (** single_run_option_dict ),
240
319
graylog_options = GraylogOptions (** config_dict .setdefault ("graylog" , {})),
241
320
)
0 commit comments