Skip to content

Commit 9db1a32

Browse files
Raise if duplicate keys exist (#22)
1 parent 5898683 commit 9db1a32

File tree

7 files changed

+162
-29
lines changed

7 files changed

+162
-29
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# default owners
2-
* @albrja @collijk @hussain-jafari @patricktnast @rmudambi @stevebachmeier
2+
* @albrja @hussain-jafari @patricktnast @rmudambi @stevebachmeier

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
**3.1.0 - 03/18/2025**
2+
3+
- Raise an error if YAML contains duplicate keys within the same level
4+
15
**3.0.0 - 02/18/2025**
26

37
- Better handle dunder-style keys

src/layered_config_tree/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414
DuplicatedConfigurationError,
1515
ImproperAccessError,
1616
)
17-
from layered_config_tree.main import ConfigNode, LayeredConfigTree
17+
from layered_config_tree.main import ConfigNode, LayeredConfigTree, load_yaml

src/layered_config_tree/main.py

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,14 @@
3333
from pathlib import Path
3434
from typing import Any
3535

36-
import yaml
37-
3836
from layered_config_tree import (
3937
ConfigurationError,
4038
ConfigurationKeyError,
4139
DuplicatedConfigurationError,
4240
ImproperAccessError,
4341
)
4442
from layered_config_tree.types import InputData
43+
from layered_config_tree.utilities import load_yaml
4544

4645

4746
class ConfigNode:
@@ -461,30 +460,14 @@ def _coerce(
461460
"""Coerces data into dictionary format."""
462461
if isinstance(data, dict):
463462
return data, source
464-
elif (isinstance(data, str) and data.endswith((".yaml", ".yml"))) or isinstance(
465-
data, Path
466-
):
467-
source = source if source else str(data)
468-
with open(data) as f:
469-
data_file = f.read()
470-
coerced_data = yaml.full_load(data_file)
471-
if not isinstance(coerced_data, dict):
472-
raise ValueError(
473-
f"Loaded yaml file {coerced_data} should be a dictionary but is type {type(coerced_data)}"
474-
)
475-
return coerced_data, source
476-
elif isinstance(data, str):
477-
data = yaml.full_load(data)
478-
if not isinstance(data, dict):
479-
raise ValueError(
480-
f"Loaded yaml file {data} should be a dictionary but is type {type(data)}"
481-
)
482-
return data, source
483463
elif isinstance(data, LayeredConfigTree):
484464
return data.to_dict(), source
465+
elif isinstance(data, (str, Path)):
466+
source = source if source else str(data)
467+
return load_yaml(data), source
485468
else:
486469
raise ConfigurationError(
487-
f"LayeredConfigTree can only update from dictionaries, strings, paths and LayeredConfigTrees. "
470+
f"LayeredConfigTree can only update from dictionaries, strings, paths, and LayeredConfigTrees. "
488471
f"You passed in {type(data)}",
489472
value_name=None,
490473
)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
=========
3+
Utilities
4+
=========
5+
6+
This module contains utility functions and classes for the layered_config_tree
7+
package.
8+
9+
"""
10+
11+
from collections.abc import Hashable
12+
from pathlib import Path
13+
from typing import Any
14+
15+
import yaml
16+
17+
from layered_config_tree import DuplicatedConfigurationError
18+
19+
20+
def load_yaml(data: str | Path) -> dict[str, Any]:
21+
"""Loads a YAML filepath or string into a dictionary.
22+
23+
Parameters
24+
----------
25+
data
26+
The YAML content to load. This can be a file path to a YAML file or a string
27+
containing YAML-formatted text.
28+
29+
Raises
30+
------
31+
ValueError
32+
If the loaded YAML content is not a dictionary.
33+
34+
Returns
35+
-------
36+
A dictionary representation of the loaded YAML content.
37+
38+
Notes
39+
-----
40+
If `data` is a Path object or a string that ends with ".yaml" or ".yml", it is
41+
treated as a filepath and this function loads the file. Otherwise, `data` is a
42+
string that does _not_ end in ".yaml" or ".yml" and it is treated as YAML-formatted
43+
text which is loaded directly into a dictionary.
44+
"""
45+
46+
if (isinstance(data, str) and data.endswith((".yaml", ".yml"))) or isinstance(data, Path):
47+
# 'data' is a filepath to a yaml file (rather than a yaml string)
48+
with open(data) as f:
49+
data = f.read()
50+
data_dict: dict[str, Any] = yaml.load(data, Loader=SafeLoader)
51+
52+
if not isinstance(data_dict, dict):
53+
raise ValueError(
54+
f"Loaded yaml file {data_dict} should be a dictionary but is type {type(data_dict)}"
55+
)
56+
57+
return data_dict
58+
59+
60+
class SafeLoader(yaml.SafeLoader):
61+
"""A yaml.SafeLoader that restricts duplicate keys."""
62+
63+
def construct_mapping(
64+
self, node: yaml.MappingNode, deep: bool = False
65+
) -> dict[Hashable, Any]:
66+
"""Constructs the standard mapping after checking for duplicates.
67+
68+
Raises
69+
------
70+
DuplicateKeysInYAMLError
71+
If duplicate keys within the same level are detected in the YAML file
72+
being loaded.
73+
74+
Notes
75+
-----
76+
A key is considered a duplicate only if it is the same as another key
77+
_at the same level in the YAML_.
78+
79+
This raises upon the _first_ duplicate key found; other duplicates may exist
80+
(in which case a new error will be raised upon re-loading of the YAML file
81+
once the duplicate is resolved).
82+
"""
83+
mapping = []
84+
for key_node, _value_node in node.value:
85+
key = self.construct_object(key_node, deep=deep) # type: ignore[no-untyped-call]
86+
if key in mapping:
87+
raise DuplicatedConfigurationError(
88+
f"Duplicate key detected at same level of YAML: {key}. Resolve duplicates and try again.",
89+
name=f"duplicated_{key}",
90+
layer=None,
91+
source=None,
92+
value=None,
93+
)
94+
mapping.append(key)
95+
return super().construct_mapping(node, deep)

tests/test_basic_functionality.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
DuplicatedConfigurationError,
1515
ImproperAccessError,
1616
LayeredConfigTree,
17+
load_yaml,
1718
)
1819

1920

@@ -500,8 +501,7 @@ def test_to_dict_dict() -> None:
500501

501502
def test_to_dict_yaml(test_spec: Path) -> None:
502503
lct = LayeredConfigTree(str(test_spec))
503-
with test_spec.open() as f:
504-
yaml_config = yaml.full_load(f)
504+
yaml_config = load_yaml(test_spec)
505505
assert yaml_config == lct.to_dict()
506506

507507

tests/test_ingestion.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import re
12
from pathlib import Path
23

3-
from layered_config_tree import LayeredConfigTree
4+
import pytest
5+
6+
from layered_config_tree import DuplicatedConfigurationError, LayeredConfigTree
47

58
TEST_YAML_ONE = """
69
test_section:
@@ -11,6 +14,26 @@
1114
test_key2: test_value4
1215
"""
1316

17+
TEST_YAML_DUPLICATE_KEYS = """
18+
cats:
19+
simba:
20+
size: tiny
21+
features:
22+
- cute
23+
- playful
24+
color: brown
25+
garfield:
26+
size: chonky
27+
traits:
28+
- lazy
29+
- grumpy
30+
- loves lasagna
31+
color: orange
32+
size: thick # first duplicate; we raise here
33+
color: brown # second duplicate; no raise
34+
35+
"""
36+
1437

1538
def test_load_yaml_string() -> None:
1639
lct = LayeredConfigTree()
@@ -21,13 +44,41 @@ def test_load_yaml_string() -> None:
2144
assert lct.test_section2.test_key == "test_value3"
2245

2346

24-
def test_load_yaml_file(tmp_path: Path) -> None:
47+
@pytest.mark.parametrize("path_type", [str, Path])
48+
def test_load_yaml_file(tmp_path: Path, path_type: type[str | Path]) -> None:
2549
tmp_file = tmp_path / "test_file.yaml"
2650
tmp_file.write_text(TEST_YAML_ONE)
2751

2852
lct = LayeredConfigTree()
29-
lct.update(str(tmp_file))
53+
filepath_to_test = str(tmp_file) if path_type is str else tmp_file
54+
lct.update(filepath_to_test)
3055

3156
assert lct.test_section.test_key == "test_value"
3257
assert lct.test_section.test_key2 == "test_value2"
3358
assert lct.test_section2.test_key == "test_value3"
59+
60+
61+
@pytest.mark.parametrize("duplicates", [True, False])
62+
@pytest.mark.parametrize("load_from_file", [True, False])
63+
def test_load_yaml_duplicates_raise(
64+
duplicates: bool, load_from_file: bool, tmp_path: Path
65+
) -> None:
66+
test_str: str = TEST_YAML_DUPLICATE_KEYS if duplicates else TEST_YAML_ONE
67+
if load_from_file:
68+
tmp_file = tmp_path / "test_duplicate_keys.yaml"
69+
tmp_file.write_text(test_str)
70+
test_yaml = tmp_file if load_from_file else test_str
71+
72+
lct = LayeredConfigTree()
73+
74+
if duplicates:
75+
with pytest.raises(
76+
DuplicatedConfigurationError,
77+
match=re.escape(
78+
"Duplicate key detected at same level of YAML: size. Resolve duplicates and try again."
79+
),
80+
):
81+
lct.update(test_yaml)
82+
else:
83+
# Only duplicate keys on the same level are problematic!
84+
lct.update(test_yaml)

0 commit comments

Comments
 (0)