Skip to content

Commit 589ba12

Browse files
Copilotgrst
andauthored
Warn on malformed variable substitution syntax in dvc.yaml (#177)
* Initial plan * Add warning for malformed variable substitution syntax in dvc.yaml When deps/outs entries in dvc.yaml contain '${' but don't have a properly closed '}', emit a warning to help users identify syntax errors early rather than silently ignoring the parameter. Fixes the issue where read_params would not fail or warn on syntax errors like '${ test_param ]', only causing confusing errors later when the parameter is accessed. * Update CHANGELOG.md with entry for malformed variable substitution warning * Detect additional malformed variable substitution patterns in dvc.yaml Add detection for common mistakes beyond just missing closing brace: - $( param ) - wrong brackets (shell syntax confusion) - $[ param ] - wrong brackets - ${{ param }} - double braces (GitHub Actions syntax confusion) Malformed patterns are now checked before the valid pattern to ensure double-brace cases (which partially match the valid regex) are caught. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Gregor Sturm <gregor.sturm@boehringer-ingelheim.com>
1 parent 05bad22 commit 589ba12

3 files changed

Lines changed: 142 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning][].
1212

1313
### Additions
1414

15+
- `read_params`/`get_config` now warns when `deps`/`outs` sections in `dvc.yaml` contain malformed variable substitution syntax (e.g. `${ param ]` instead of `${ param }`) ([#151](https://github.com/Boehringer-Ingelheim/dso/issues/151))
1516
- Watermarks in PDF and SVG are now natively implemented in these formats as text, rather than including an image overlay ([#170](https://github.com/Boehringer-Ingelheim/dso/pull/170))
1617
- JSON output support for `dso get-config` ([#157](https://github.com/Boehringer-Ingelheim/dso/issues/157))
1718
- Improve shell autocompletion by adding `click.Path()` type to path arguments in CLI commands ([#173](https://github.com/Boehringer-Ingelheim/dso/pull/173))

src/dso/_get_config.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,26 @@ def get_config(stage: str, *, all: bool = False, skip_compile: bool = False) ->
107107
else:
108108
keep_params = set(params)
109109
dvc_param_pat = re.compile(r"\$\{\s*(.*?)\s*\}")
110-
for dep in dvc_stage_config.get("deps", []):
111-
if match := dvc_param_pat.findall(dep):
112-
keep_params.update(match)
113-
for out in dvc_stage_config.get("outs", []):
114-
if match := dvc_param_pat.findall(out):
115-
keep_params.update(match)
110+
# Patterns to detect common malformed variable substitution attempts
111+
_malformed_patterns = [
112+
# ${{ ... }} double braces (e.g. GitHub Actions syntax confusion)
113+
re.compile(r"\$\{\{"),
114+
# ${ without matching } (e.g. "${ param ]", "${ param )", "${ param")
115+
re.compile(r"\$\{[^}]*$"),
116+
# $( ... ) instead of ${ ... }
117+
re.compile(r"\$\("),
118+
# $[ ... ] instead of ${ ... }
119+
re.compile(r"\$\["),
120+
]
121+
for section in ("deps", "outs"):
122+
for entry in dvc_stage_config.get(section, []) or []:
123+
if any(pat.search(entry) for pat in _malformed_patterns):
124+
log.warning(
125+
f"Possible syntax error in `{section}` section of `dvc.yaml`: `{entry}`. "
126+
"Variable substitution uses the syntax `${ <param_name> }`."
127+
)
128+
elif match := dvc_param_pat.findall(entry):
129+
keep_params.update(match)
116130

117131
log.info(
118132
f"Only including the following parameters which are listed in `dvc.yaml`: [green]{', '.join(keep_params)}"

tests/test_get_config.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,127 @@ def test_get_config_matrix(dso_project):
181181
assert list(config) == ["A"]
182182

183183

184+
def test_get_config_malformed_variable_substitution(dso_project, capfd):
185+
"""Test that get_config warns when dvc.yaml contains malformed variable substitution syntax"""
186+
chdir(dso_project)
187+
stage = dso_project / "mystage"
188+
stage.mkdir()
189+
(stage / "params.in.yaml").touch()
190+
191+
(dso_project / "params.in.yaml").write_text(
192+
dedent(
193+
"""\
194+
test_param: "output/test.txt"
195+
other_param: "something"
196+
"""
197+
)
198+
)
199+
200+
(stage / "dvc.yaml").write_text(
201+
dedent(
202+
"""\
203+
stages:
204+
mystage01:
205+
params:
206+
- other_param
207+
outs:
208+
- ${ test_param ]
209+
cmd: "echo Hello World!"
210+
"""
211+
)
212+
)
213+
214+
config = get_config("mystage")
215+
# The malformed entry should not be parsed as a parameter
216+
assert config == {"other_param": "something"}
217+
# Check that a warning was emitted
218+
captured = capfd.readouterr()
219+
assert "Possible syntax error" in captured.err or "Possible syntax error" in captured.out
220+
221+
222+
def test_get_config_malformed_variable_substitution_deps(dso_project, capfd):
223+
"""Test that get_config warns when deps section has malformed variable substitution"""
224+
chdir(dso_project)
225+
stage = dso_project / "mystage"
226+
stage.mkdir()
227+
(stage / "params.in.yaml").touch()
228+
229+
(dso_project / "params.in.yaml").write_text(
230+
dedent(
231+
"""\
232+
test_param: "input/test.txt"
233+
other_param: "something"
234+
"""
235+
)
236+
)
237+
238+
(stage / "dvc.yaml").write_text(
239+
dedent(
240+
"""\
241+
stages:
242+
mystage01:
243+
params:
244+
- other_param
245+
deps:
246+
- ${ test_param ]
247+
cmd: "echo Hello World!"
248+
"""
249+
)
250+
)
251+
252+
config = get_config("mystage")
253+
assert config == {"other_param": "something"}
254+
captured = capfd.readouterr()
255+
assert "Possible syntax error" in captured.err or "Possible syntax error" in captured.out
256+
257+
258+
@pytest.mark.parametrize(
259+
"malformed_entry",
260+
[
261+
"${ test_param ]", # ] instead of }
262+
"${ test_param )", # ) instead of }
263+
"${ test_param", # no closing brace at all
264+
"$( test_param )", # wrong brackets $()
265+
"$[ test_param ]", # wrong brackets $[]
266+
"${{ test_param }}", # double braces (GitHub Actions syntax)
267+
],
268+
)
269+
def test_get_config_malformed_variable_patterns(dso_project, capfd, malformed_entry):
270+
"""Test that get_config warns for various common malformed variable substitution patterns"""
271+
chdir(dso_project)
272+
stage = dso_project / "mystage"
273+
stage.mkdir()
274+
(stage / "params.in.yaml").touch()
275+
276+
(dso_project / "params.in.yaml").write_text(
277+
dedent(
278+
"""\
279+
test_param: "output/test.txt"
280+
other_param: "something"
281+
"""
282+
)
283+
)
284+
285+
(stage / "dvc.yaml").write_text(
286+
dedent(
287+
f"""\
288+
stages:
289+
mystage01:
290+
params:
291+
- other_param
292+
outs:
293+
- {malformed_entry}
294+
cmd: "echo Hello World!"
295+
"""
296+
)
297+
)
298+
299+
config = get_config("mystage")
300+
assert config == {"other_param": "something"}
301+
captured = capfd.readouterr()
302+
assert "Possible syntax error" in captured.err or "Possible syntax error" in captured.out
303+
304+
184305
def test_get_config_path_relative_to_root_dir(quarto_stage):
185306
chdir(quarto_stage)
186307
config1 = get_config("quarto_stage")

0 commit comments

Comments
 (0)