Skip to content

Commit 2ad26ef

Browse files
Feat: Add 'replace_pattern' entry for DictComparator and several related save capabilities (#53)
1 parent 04e4ae0 commit 2ad26ef

File tree

4 files changed

+219
-33
lines changed

4 files changed

+219
-33
lines changed

dir_content_diff/base_comparators.py

+85-4
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@
22
import configparser
33
import filecmp
44
import json
5+
import re
56
from abc import ABC
67
from abc import abstractmethod
78
from xml.etree import ElementTree
89

910
import dictdiffer
11+
import jsonpath_ng
1012
import yaml
13+
from dicttoxml import dicttoxml
1114
from diff_pdf_visually import pdf_similar
1215

1316
from dir_content_diff.util import diff_msg_formatter
@@ -282,6 +285,14 @@ class DictComparator(BaseComparator):
282285
"add": "Added the value(s) '{value}' in the '{key}' key.",
283286
"change": "Changed the value of '{key}' from {value[0]} to {value[1]}.",
284287
"remove": "Removed the value(s) '{value}' from '{key}' key.",
288+
"missing_ref_entry": (
289+
"The path '{key}' is missing in the reference dictionary, please fix the "
290+
"'replace_pattern' argument."
291+
),
292+
"missing_comp_entry": (
293+
"The path '{key}' is missing in the compared dictionary, please fix the "
294+
"'replace_pattern' argument."
295+
),
285296
}
286297

287298
def __init__(self, *args, **kwargs):
@@ -318,6 +329,43 @@ def _format_change_value(value):
318329
value[num] = str(i)
319330
return value
320331

332+
def format_data(self, data, ref=None, replace_pattern=None, **kwargs):
333+
"""Format the loaded data."""
334+
# pylint: disable=too-many-nested-blocks
335+
self.current_state["format_errors"] = errors = []
336+
337+
if replace_pattern is not None:
338+
for pat, paths in replace_pattern.items():
339+
pattern = pat[0]
340+
new_value = pat[1]
341+
count = pat[2] if len(pat) > 2 else 0
342+
flags = pat[3] if len(pat) > 3 else 0
343+
for raw_path in paths:
344+
path = jsonpath_ng.parse(raw_path)
345+
if ref is not None and len(path.find(ref)) == 0:
346+
errors.append(
347+
(
348+
"missing_ref_entry",
349+
raw_path,
350+
None,
351+
)
352+
)
353+
elif len(path.find(data)) == 0:
354+
errors.append(
355+
(
356+
"missing_comp_entry",
357+
raw_path,
358+
None,
359+
)
360+
)
361+
else:
362+
for i in path.find(data):
363+
if isinstance(i.value, str):
364+
i.full_path.update(
365+
data, re.sub(pattern, new_value, i.value, count, flags)
366+
)
367+
return data
368+
321369
def diff(self, ref, comp, *args, **kwargs):
322370
"""Compare 2 dictionaries.
323371
@@ -332,13 +380,16 @@ def diff(self, ref, comp, *args, **kwargs):
332380
path_limit (list[str]): List of path limit tuples or :class:`dictdiffer.utils.PathLimit`
333381
object to limit the diff recursion depth.
334382
"""
383+
errors = self.current_state.get("format_errors", [])
384+
335385
if len(args) > 5:
336386
dot_notation = args[5]
337387
args = args[:5] + args[6:]
338388
else:
339389
dot_notation = kwargs.pop("dot_notation", False)
340390
kwargs["dot_notation"] = dot_notation
341-
return list(dictdiffer.diff(ref, comp, *args, **kwargs))
391+
errors.extend(list(dictdiffer.diff(ref, comp, *args, **kwargs)))
392+
return errors
342393

343394
def format_diff(self, difference):
344395
"""Format one element difference."""
@@ -361,6 +412,11 @@ def load(self, path):
361412
data = json.load(file)
362413
return data
363414

415+
def save(self, data, path):
416+
"""Save formatted data into a JSON file."""
417+
with open(path, "w", encoding="utf-8") as file:
418+
json.dump(data, file)
419+
364420

365421
class YamlComparator(DictComparator):
366422
"""Comparator for YAML files.
@@ -374,6 +430,11 @@ def load(self, path):
374430
data = yaml.full_load(file)
375431
return data
376432

433+
def save(self, data, path):
434+
"""Save formatted data into a YAML file."""
435+
with open(path, "w", encoding="utf-8") as file:
436+
yaml.dump(data, file)
437+
377438

378439
class XmlComparator(DictComparator):
379440
"""Comparator for XML files.
@@ -407,9 +468,14 @@ def load(self, path): # pylint: disable=arguments-differ
407468
data = self.xmltodict(file.read())
408469
return data
409470

471+
def save(self, data, path):
472+
"""Save formatted data into a XML file."""
473+
with open(path, "w", encoding="utf-8") as file:
474+
file.write(dicttoxml(data["root"]).decode())
475+
410476
@staticmethod
411477
def _cast_from_attribute(text, attr):
412-
"""Converts XML text into a Python data format based on the tag attribute."""
478+
"""Convert XML text into a Python data format based on the tag attribute."""
413479
if "type" not in attr:
414480
return text
415481
value_type = attr.get("type", "").lower()
@@ -453,7 +519,7 @@ def add_to_output(obj, child):
453519

454520
@staticmethod
455521
def xmltodict(obj):
456-
"""Converts an XML string into a Python object based on each tag's attribute."""
522+
"""Convert an XML string into a Python object based on each tag's attribute."""
457523
root = ElementTree.fromstring(obj)
458524
output = {}
459525

@@ -473,11 +539,16 @@ class IniComparator(DictComparator):
473539
"""
474540

475541
def load(self, path, **kwargs): # pylint: disable=arguments-differ
476-
"""Open a XML file."""
542+
"""Open a INI file."""
477543
data = configparser.ConfigParser(**kwargs)
478544
data.read(path)
479545
return self.configparser_to_dict(data)
480546

547+
def save(self, data, path):
548+
"""Save formatted data into a INI file."""
549+
with open(path, "w", encoding="utf-8") as file:
550+
self.dict_to_configparser(data).write(file)
551+
481552
@staticmethod
482553
def configparser_to_dict(config):
483554
"""Transform a ConfigParser object into a dict."""
@@ -494,6 +565,16 @@ def configparser_to_dict(config):
494565
dict_config[section][option] = val
495566
return dict_config
496567

568+
@staticmethod
569+
def dict_to_configparser(data, **kwargs):
570+
"""Transform a dict object into a ConfigParser."""
571+
config = configparser.ConfigParser(**kwargs)
572+
for k, v in data.items():
573+
config.add_section(k)
574+
for opt, val in v.items():
575+
config[k][opt] = json.dumps(val)
576+
return config
577+
497578

498579
class PdfComparator(BaseComparator):
499580
"""Comparator for PDF files."""

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
reqs = [
88
"dictdiffer>=0.8",
9+
"dicttoxml>=1.7.12",
910
"diff_pdf_visually>=1.7",
11+
"jsonpath-ng>=1.5",
1012
"PyYaml>=6",
1113
]
1214

tests/test_base.py

+110-14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# pylint: disable=unused-argument
55
# pylint: disable=use-implicit-booleaness-not-comparison
66
import configparser
7+
import copy
78
import json
89
import re
910

@@ -409,6 +410,83 @@ def report(
409410
assert kwargs_msg in no_report_diff_default
410411
assert no_report_diff_default.replace(kwargs_msg, "") == diff
411412

413+
class TestJsonComparator:
414+
"""Test the JSON comparator."""
415+
416+
def test_format_data(self):
417+
"""Test data formatting."""
418+
data = {
419+
"a": 1,
420+
"b": {
421+
"c": "a string",
422+
},
423+
"d": [
424+
{"d1": "the d1 string"},
425+
{"d2": "the d2 string"},
426+
],
427+
"e": {
428+
"nested_e": {
429+
"nested_e_a": "the nested_e_a string",
430+
"nested_e_b": "the nested_e_b string",
431+
}
432+
},
433+
}
434+
initial_data = copy.deepcopy(data)
435+
436+
expected_data = {
437+
"a": 1,
438+
"b": {
439+
"c": "a NEW VALUE",
440+
},
441+
"d": [
442+
{"d1": "the d1 NEW VALUE"},
443+
{"d2": "the d2 NEW VALUE"},
444+
],
445+
"e": {
446+
"nested_e": {
447+
"nested_e_a": "the nested_e_a NEW VALUE",
448+
"nested_e_b": "the nested_e_b NEW VALUE",
449+
}
450+
},
451+
}
452+
453+
patterns = {
454+
("string", "NEW VALUE"): [
455+
"b.c",
456+
"d[*].*",
457+
"e.*.*",
458+
]
459+
}
460+
461+
comparator = dir_content_diff.JsonComparator()
462+
comparator.format_data(data)
463+
assert data == initial_data
464+
465+
data = copy.deepcopy(initial_data)
466+
comparator = dir_content_diff.JsonComparator()
467+
comparator.format_data(data, replace_pattern=patterns)
468+
assert data == expected_data
469+
470+
# Missing key in ref
471+
comparator = dir_content_diff.JsonComparator()
472+
data = copy.deepcopy(initial_data)
473+
ref = {"a": 1}
474+
comparator.format_data(data, ref, replace_pattern=patterns)
475+
assert data == initial_data
476+
assert comparator.current_state["format_errors"] == [
477+
("missing_ref_entry", i, None) for i in patterns[("string", "NEW VALUE")]
478+
]
479+
480+
# Missing key in data
481+
comparator = dir_content_diff.JsonComparator()
482+
ref = copy.deepcopy(initial_data)
483+
data = {"a": 1}
484+
comparator.format_data(data, ref, replace_pattern=patterns)
485+
assert data == {"a": 1}
486+
assert comparator.current_state["format_errors"] == [
487+
("missing_comp_entry", i, None) for i in patterns[("string", "NEW VALUE")]
488+
]
489+
412490
class TestXmlComparator:
413491
"""Test the XML comparator."""
414492

@@ -655,22 +733,16 @@ def test_assert_equal_trees(self, ref_tree, res_tree_equal):
655733

656734
def test_assert_equal_trees_export(self, ref_tree, res_tree_equal):
657735
"""Test that the formatted files are properly exported."""
658-
659-
class JsonComparator(dir_content_diff.base_comparators.JsonComparator):
660-
"""Compare data from two JSON files."""
661-
662-
def save(self, data, path):
663-
"""Save formatted data into a file."""
664-
with open(path, "w", encoding="utf-8") as file:
665-
json.dump(data, file)
666-
667-
comparators = dir_content_diff.get_comparators()
668-
comparators[".json"] = JsonComparator()
669736
assert_equal_trees(
670-
ref_tree, res_tree_equal, export_formatted_files=True, comparators=comparators
737+
ref_tree,
738+
res_tree_equal,
739+
export_formatted_files=True,
671740
)
672-
assert list(res_tree_equal.with_name(res_tree_equal.name + "_FORMATTED").iterdir()) == [
673-
res_tree_equal.with_name(res_tree_equal.name + "_FORMATTED") / "file.json"
741+
assert sorted(res_tree_equal.with_name(res_tree_equal.name + "_FORMATTED").iterdir()) == [
742+
(res_tree_equal.with_name(res_tree_equal.name + "_FORMATTED") / "file").with_suffix(
743+
suffix
744+
)
745+
for suffix in [".ini", ".json", ".xml", ".yaml"]
674746
]
675747

676748
def test_diff_empty(self, empty_ref_tree, empty_res_tree):
@@ -706,6 +778,30 @@ def test_specific_args(self, ref_tree, res_tree_equal):
706778

707779
assert res == {}
708780

781+
def test_replace_pattern(self, ref_tree, res_tree_equal):
782+
"""Test specific args."""
783+
specific_args = {
784+
"file.yaml": {"args": [None, None, None, False, 0, False]},
785+
"file.json": {
786+
"format_data_kwargs": {
787+
"replace_pattern": {(".*val.*", "NEW_VAL"): ["*.[*]"]},
788+
},
789+
},
790+
}
791+
res = compare_trees(
792+
ref_tree, res_tree_equal, specific_args=specific_args, export_formatted_files=True
793+
)
794+
795+
pat = (
796+
r"""The files '\S*/ref/file\.json' and '\S*/res/file\.json' are different:\n"""
797+
r"""Kwargs used for formatting data: """
798+
r"""{'replace_pattern': {\('\.\*val\.\*', 'NEW_VAL'\): \['\*\.\[\*\]'\]}}\n"""
799+
r"""Changed the value of '\[nested_list\]\[2\]' from 'str_val' to 'NEW_VAL'\.\n"""
800+
r"""Changed the value of '\[simple_list\]\[2\]' from 'str_val' to 'NEW_VAL'\."""
801+
)
802+
803+
assert re.match(pat, res["file.json"]) is not None
804+
709805
def test_specific_comparator(self, ref_tree, res_tree_equal):
710806
"""Test specific args."""
711807
specific_args = {

0 commit comments

Comments
 (0)