Skip to content

Commit 930ab67

Browse files
committed
add word boundary for input, output and topic linting
fixes nf-core#3842
1 parent 9c306ee commit 930ab67

File tree

2 files changed

+127
-4
lines changed

2 files changed

+127
-4
lines changed

nf_core/components/nfcore_component.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def get_inputs_from_main_nf(self) -> None:
207207
for line in input_data.split("\n"):
208208
channel_elements: Any = []
209209
line = line.split("//")[0] # remove any trailing comments
210-
regex = r"\b(val|path)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
210+
regex = r"\b(val|path)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
211211
matches = re.finditer(regex, line)
212212
for _, match in enumerate(matches, start=1):
213213
input_val = None
@@ -226,6 +226,7 @@ def get_inputs_from_main_nf(self) -> None:
226226
elif len(channel_elements) > 1:
227227
inputs.append(channel_elements)
228228
log.debug(f"Found {len(inputs)} inputs in {self.main_nf}")
229+
log.debug(f"Inputs: {inputs}")
229230
self.inputs = inputs
230231
elif self.component_type == "subworkflows":
231232
# get input values from main.nf after "take:"
@@ -252,8 +253,9 @@ def get_outputs_from_main_nf(self):
252253
log.debug(f"Could not find any outputs in {self.main_nf}")
253254
return outputs
254255
output_data = data.split("output:")[1].split("when:")[0]
256+
log.debug(f"Found output_data: {output_data}")
255257
regex_emit = r"emit:\s*([^)\s,]+)"
256-
regex_elements = r"\b(val|path|env|stdout|eval)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
258+
regex_elements = r"\b(val|path|env|stdout|eval)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
257259
for line in output_data.split("\n"):
258260
match_emit = re.search(regex_emit, line)
259261
matches_elements = re.finditer(regex_elements, line)
@@ -278,6 +280,7 @@ def get_outputs_from_main_nf(self):
278280
elif len(channel_elements) > 1:
279281
outputs[match_emit.group(1)].append(channel_elements)
280282
log.debug(f"Found {len(list(outputs.keys()))} outputs in {self.main_nf}")
283+
log.debug(f"Outputs: {outputs}")
281284
self.outputs = outputs
282285
elif self.component_type == "subworkflows":
283286
outputs = []
@@ -306,8 +309,9 @@ def get_topics_from_main_nf(self) -> None:
306309
self.topics = topics
307310
return
308311
output_data = data.split("output:")[1].split("when:")[0]
312+
log.debug(f"Output data: {output_data}")
309313
regex_topic = r"topic:\s*([^)\s,]+)"
310-
regex_elements = r"\b(val|path|env|stdout|eval)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
314+
regex_elements = r"\b(val|path|env|stdout|eval)\b\s*(\(([^)]+)\)|\s*([^)\s,]+))"
311315
for line in output_data.split("\n"):
312316
match_topic = re.search(regex_topic, line)
313317
matches_elements = re.finditer(regex_elements, line)
@@ -331,4 +335,5 @@ def get_topics_from_main_nf(self) -> None:
331335
elif len(channel_elements) > 1:
332336
topics[match_topic.group(1)].append(channel_elements)
333337
log.debug(f"Found {len(list(topics.keys()))} topics in {self.main_nf}")
338+
log.debug(f"Topics: {topics}")
334339
self.topics = topics

tests/modules/lint/test_main_nf.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
import nf_core.modules.lint
4-
import nf_core.modules.patch
4+
from nf_core.components.nfcore_component import NFCoreComponent
55
from nf_core.modules.lint.main_nf import check_container_link_line, check_process_labels
66

77
from ...test_modules import TestModules
@@ -154,3 +154,121 @@ def test_topics_and_emits_version_check(self):
154154
f"Linting warned with {[x.__dict__ for x in module_lint.warned]}, expected 1 warning"
155155
)
156156
assert len(module_lint.passed) > 0
157+
158+
159+
def test_get_inputs_no_partial_keyword_match(tmp_path):
160+
"""Test that input parsing doesn't match keywords within larger words like 'evaluate' or 'pathogen'"""
161+
main_nf_content = """
162+
process TEST_PROCESS {
163+
input:
164+
val(meta)
165+
path(reads)
166+
tuple val(evaluate), path(pathogen)
167+
168+
output:
169+
path("*.txt"), emit: results
170+
171+
script:
172+
"echo test"
173+
}
174+
"""
175+
main_nf_path = tmp_path / "main.nf"
176+
main_nf_path.write_text(main_nf_content)
177+
178+
component = NFCoreComponent(
179+
component_name="test",
180+
repo_url=None,
181+
component_dir=tmp_path,
182+
repo_type="modules",
183+
base_dir=tmp_path,
184+
component_type="modules",
185+
remote_component=False,
186+
)
187+
188+
component.get_inputs_from_main_nf()
189+
190+
# Should find 3 inputs: meta, reads, and the tuple (evaluate, pathogen)
191+
# The regex with \b should correctly identify 'val(evaluate)' and 'path(pathogen)' as valid inputs
192+
assert len(component.inputs) == 3, f"Expected 3 inputs, got {len(component.inputs)}: {component.inputs}"
193+
print(component.inputs)
194+
assert {"meta": {}} in component.inputs
195+
assert {"reads": {}} in component.inputs
196+
# The tuple should be captured as a list of two elements
197+
tuple_input = [{"evaluate": {}}, {"pathogen": {}}]
198+
assert tuple_input in component.inputs
199+
200+
201+
def test_get_outputs_no_partial_keyword_match(tmp_path):
202+
"""Test that output parsing doesn't match keywords within larger words like 'evaluate' or 'pathogen'"""
203+
main_nf_content = """
204+
process TEST_PROCESS {
205+
input:
206+
val(meta)
207+
208+
output:
209+
path("*.txt"), emit: results
210+
val(evaluate_result), emit: evaluation
211+
path(pathogen_data), emit: pathogens
212+
213+
script:
214+
"echo test"
215+
}
216+
"""
217+
main_nf_path = tmp_path / "main.nf"
218+
main_nf_path.write_text(main_nf_content)
219+
220+
component = NFCoreComponent(
221+
component_name="test",
222+
repo_url=None,
223+
component_dir=tmp_path,
224+
repo_type="modules",
225+
base_dir=tmp_path,
226+
component_type="modules",
227+
remote_component=False,
228+
)
229+
230+
component.get_outputs_from_main_nf()
231+
232+
# Should find 3 outputs with variable names containing 'val' and 'path' substrings
233+
# The regex with \b should correctly identify val(evaluate_result) and path(pathogen_data)
234+
assert len(component.outputs) == 3, f"Expected 3 outputs, got {len(component.outputs)}: {component.outputs}"
235+
assert "results" in component.outputs
236+
assert "evaluation" in component.outputs
237+
assert "pathogens" in component.outputs
238+
239+
240+
def test_get_topics_no_partial_keyword_match(tmp_path):
241+
"""Test that topic parsing doesn't match keywords within larger words like 'evaluate'"""
242+
main_nf_content = """
243+
process TEST_PROCESS {
244+
input:
245+
val(meta)
246+
247+
output:
248+
path("*.txt"), topic: results
249+
val(evaluate_result), topic: evaluation
250+
251+
script:
252+
"echo test"
253+
}
254+
"""
255+
main_nf_path = tmp_path / "main.nf"
256+
main_nf_path.write_text(main_nf_content)
257+
258+
component = NFCoreComponent(
259+
component_name="test",
260+
repo_url=None,
261+
component_dir=tmp_path,
262+
repo_type="modules",
263+
base_dir=tmp_path,
264+
component_type="modules",
265+
remote_component=False,
266+
)
267+
268+
component.get_topics_from_main_nf()
269+
270+
# Should find 2 topics with variable names containing 'val' substring
271+
# The regex with \b should correctly identify val(evaluate_result)
272+
assert len(component.topics) == 2, f"Expected 2 topics, got {len(component.topics)}: {component.topics}"
273+
assert "results" in component.topics
274+
assert "evaluation" in component.topics

0 commit comments

Comments
 (0)