11import contextlib
22import logging
33import re
4+ from concurrent .futures import ThreadPoolExecutor , as_completed
45from pathlib import Path
56
67import yaml
78
8- log = logging . getLogger ( __name__ )
9+ from nf_core . utils import read_module_name
910
10- _PROCESS_NAME_RE = re . compile ( r"^\s*process\s+(\w+)\s*\{" , re . MULTILINE )
11+ log = logging . getLogger ( __name__ )
1112
1213PLATFORMS : dict [str , list [str ]] = {
1314 "docker_amd64" : ["docker" , "linux/amd64" , "name" ],
2021 "conda_lock_files_arm64" : ["conda" , "linux/arm64" , "lock_file" ],
2122}
2223
24+ _CONFIG_LINE_RE = re .compile (r"withName:\s*'(\w+)'\s*\{\s*(?:container|conda)\s*=\s*'([^']+)'" )
25+
26+
27+ def _container_key (platform : str ) -> str :
28+ return "conda" if platform .startswith ("conda_lock_" ) else "container"
29+
30+
31+ def _parse_config_file (config_path : Path ) -> dict [str , str ]:
32+ """Return {module_name: container} from an existing platform config file."""
33+ result : dict [str , str ] = {}
34+ with contextlib .suppress (OSError ):
35+ for line in config_path .read_text ().splitlines ():
36+ m = _CONFIG_LINE_RE .search (line )
37+ if m :
38+ result [m .group (1 )] = m .group (2 )
39+ return result
40+
41+
42+ def _write_platform_config (config_path : Path , entries : dict [str , str ], key : str ) -> bool :
43+ """Write *entries* to *config_path*, or delete it when empty.
44+
45+ Skips the write when content is unchanged. Returns True if the file now exists.
46+ """
47+ if not entries :
48+ config_path .unlink (missing_ok = True )
49+ return False
50+ new_content = "" .join (
51+ f"process {{ withName: '{ name } ' {{ { key } = '{ container } ' }} }}\n " for name , container in sorted (entries .items ())
52+ )
53+ if not config_path .exists () or config_path .read_text () != new_content :
54+ config_path .write_text (new_content )
55+ return True
56+
57+
58+ def _process_meta (meta_path : Path ) -> tuple [str , dict [str , str ]] | None :
59+ """Read one meta.yml + sibling main.nf and return (module_name, {platform: container}).
60+
61+ Returns None when the file should be skipped.
62+ """
63+ try :
64+ raw = meta_path .read_bytes ()
65+ except OSError as e :
66+ log .debug (f"Could not read { meta_path } : { e } " )
67+ return None
68+
69+ # TODO: remove this early-exit once containers are present in the majority of modules
70+ if b"containers:" not in raw :
71+ return None
72+
73+ meta = yaml .safe_load (raw )
74+
75+ module_name = read_module_name (meta_path .parent / "main.nf" )
76+ if not module_name :
77+ log .debug (f"No process definition found next to { meta_path } , skipping" )
78+ return None
79+
80+ platform_containers : dict [str , str ] = {}
81+ for platform_name , (runtime , arch , protocol ) in PLATFORMS .items ():
82+ with contextlib .suppress (KeyError , TypeError ):
83+ platform_containers [platform_name ] = meta ["containers" ][runtime ][arch ][protocol ]
84+
85+ return module_name , platform_containers
86+
2387
2488class ContainerConfigs :
2589 """Generates the container configuration files for a pipeline.
@@ -28,78 +92,82 @@ class ContainerConfigs:
2892 workflow_directory (Path): The directory containing the workflow files.
2993 """
3094
31- def __init__ (
32- self ,
33- workflow_directory : Path = Path (),
34- ):
95+ def __init__ (self , workflow_directory : Path = Path ()) -> None :
3596 self .workflow_directory = workflow_directory
3697
37- def generate_container_configs (
38- self , new_module_path : Path | None = None , new_module_name : str | None = None
39- ) -> set [str ]:
98+ def update_module_container_config (self , module_path : Path ) -> None :
99+ """Targeted update for a single module.
100+
101+ Reads the current config files, splices in (or removes) the entry for
102+ the module at *module_path*, and writes back only what changed.
40103 """
41- Generate the container configuration files for a pipeline by scanning
42- all ``main.nf`` files under the ``modules/`` directory and reading the
43- accompanying ``meta.yml`` for container information.
104+ result = _process_meta (module_path / "meta.yml" )
105+
106+ if result is None :
107+ module_name = read_module_name (module_path / "main.nf" )
108+ if not module_name :
109+ log .debug (f"Could not determine process name for { module_path } , skipping" )
110+ return
111+ platform_containers : dict [str , str ] = {}
112+ else :
113+ module_name , platform_containers = result
114+
115+ conf_dir = self .workflow_directory / "conf"
116+ for platform in PLATFORMS :
117+ config_path = conf_dir / f"containers_{ platform } .config"
118+ entries = _parse_config_file (config_path )
119+ if platform in platform_containers :
120+ entries [module_name ] = platform_containers [platform ]
121+ _write_platform_config (config_path , entries , _container_key (platform ))
122+
123+ def generate_container_configs (self ) -> set [str ]:
124+ """Full scan of all ``meta.yml`` files under ``modules/``.
125+
126+ Used by lint and bulk operations (update, remove, patch) where multiple
127+ modules may have changed.
44128
45129 Returns:
46130 set[str]: Names of config files written (e.g. ``{'containers_docker_amd64.config'}``).
47131 """
132+ modules_dir = self .workflow_directory / "modules"
133+ if not modules_dir .is_dir ():
134+ log .debug (f"No modules directory found at { modules_dir } , skipping" )
135+ return set ()
136+
48137 containers : dict [str , dict [str , str ]] = {platform : {} for platform in PLATFORMS }
49138
50- for meta_path in (self .workflow_directory / "modules" ).rglob ("meta.yml" ):
51- try :
52- content = meta_path .read_text ()
53- except OSError as e :
54- log .debug (f"Could not read { meta_path } : { e } " )
55- continue
56-
57- # TODO: remove this early-exit once containers are present in the majority of modules
58- if "containers:" not in content :
59- continue
60-
61- meta = yaml .safe_load (content )
62-
63- main_nf = meta_path .parent / "main.nf"
64- try :
65- match = _PROCESS_NAME_RE .search (main_nf .read_text ())
66- except OSError :
67- log .debug (f"No main.nf next to { meta_path } , skipping" )
68- continue
69- if not match :
70- log .debug (f"No process definition found in { main_nf } , skipping" )
71- continue
72- process_name = match .group (1 )
73-
74- for platform_name , (runtime , arch , protocol ) in PLATFORMS .items ():
75- with contextlib .suppress (KeyError , TypeError ):
76- containers [platform_name ][process_name ] = meta ["containers" ][runtime ][arch ][protocol ]
139+ with ThreadPoolExecutor () as pool :
140+ futures = {pool .submit (_process_meta , p ): p for p in modules_dir .rglob ("meta.yml" )}
141+ for future in as_completed (futures ):
142+ result = future .result ()
143+ if result is None :
144+ continue
145+ module_name , platform_containers = result
146+ for platform_name , container in platform_containers .items ():
147+ containers [platform_name ][module_name ] = container
77148
78149 log .info ("Generated container configs for the pipeline." )
79150
80- # remove all generated config files, to handle removed modules
81- for platform in PLATFORMS :
82- (self .workflow_directory / "conf" / f"containers_{ platform } .config" ).unlink (missing_ok = True )
83- # write config files
84151 written : set [str ] = set ()
85152 for platform , module_containers in containers .items ():
86- if not module_containers :
87- continue
88- container_key = "conda" if platform .startswith ("conda_lock_" ) else "container"
89- lines = [
90- f"process {{ withName: '{ module_name } ' {{ { container_key } = '{ container } ' }} }}\n "
91- for module_name , container in sorted (module_containers .items ())
92- ]
93153 config_path = self .workflow_directory / "conf" / f"containers_{ platform } .config"
94- config_path . write_text ( "" . join ( lines ))
95- written .add (config_path .name )
154+ if _write_platform_config ( config_path , module_containers , _container_key ( platform )):
155+ written .add (config_path .name )
96156 return written
97157
98158
99- def try_generate_container_configs (
100- directory : Path , new_module_path : Path | None = None , new_module_name : str | None = None
101- ) -> None :
159+ def try_generate_container_configs (directory : Path , module_path : Path | None = None ) -> None :
160+ """Regenerate container configs for *directory*.
161+
162+ If *module_path* is given, only that module's entries are updated (fast
163+ path for single-module installs). Otherwise a full scan of ``modules/``
164+ is performed.
165+ """
102166 try :
103- ContainerConfigs (directory ).generate_container_configs (new_module_path , new_module_name )
167+ configs = ContainerConfigs (directory )
168+ if module_path is not None :
169+ configs .update_module_container_config (module_path )
170+ else :
171+ configs .generate_container_configs ()
104172 except UserWarning as e :
105173 log .warning (f"Could not regenerate container configuration files: { e } " )
0 commit comments