Skip to content

Commit 40a64ff

Browse files
Feat: Add new specific comparators (#60)
1 parent d7f7d9f commit 40a64ff

25 files changed

+843
-67
lines changed
File renamed without changes.

.github/workflows/publish-sdist.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- uses: actions/checkout@v4
14-
- name: Set up Python 3.8
14+
- name: Set up Python 3.10
1515
uses: actions/setup-python@v5
1616
with:
17-
python-version: 3.8
17+
python-version: 3.10
1818
- name: Build a wheel and a source tarball
1919
run: |
2020
pip install setuptools>=42 build setuptools_scm[toml]>=3.4

.github/workflows/run-tox.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ jobs:
1313
runs-on: ubuntu-latest
1414
strategy:
1515
matrix:
16-
python-version: ["3.8", "3.9", "3.10", "3.11"]
16+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1717
min_versions: ["min_versions", "latest_versions"]
1818
exclude:
1919
- min_versions: "min_versions"
2020
include:
21-
- python-version: "3.8"
21+
- python-version: "3.9"
2222
min_versions: "min_versions"
2323

2424
steps:

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
default_language_version:
2-
python: python3.8
2+
python: python3.10
33
repos:
44
- repo: https://github.com/pre-commit/pre-commit-hooks
55
rev: v4.4.0

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ When you wish to contribute to the code base, please consider the following guid
5959
or
6060

6161
```shell
62-
tox -e py38 -e lint -e docs -e check-packaging
62+
tox -e py310 -e lint -e docs -e check-packaging
6363
```
6464

6565
* Commit your changes using a descriptive commit message.

MANIFEST.in

+2
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
global-exclude *.py[co] .DS_Store
2+
3+
include dir_content_diff/comparators/dependencies.json

dir_content_diff/__init__.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55
import copy
66
import importlib.metadata
7-
import logging
87
import re
98
from pathlib import Path
109

@@ -14,13 +13,12 @@
1413
from dir_content_diff.base_comparators import PdfComparator
1514
from dir_content_diff.base_comparators import XmlComparator
1615
from dir_content_diff.base_comparators import YamlComparator
16+
from dir_content_diff.util import LOGGER
1717
from dir_content_diff.util import diff_msg_formatter
1818
from dir_content_diff.util import format_ext
1919

2020
__version__ = importlib.metadata.version("dir-content-diff")
2121

22-
L = logging.getLogger(__name__)
23-
2422

2523
_DEFAULT_COMPARATORS = {
2624
None: DefaultComparator(),
@@ -125,7 +123,7 @@ def compare_files(ref_file, comp_file, comparator, *args, return_raw_diffs=False
125123
differences if they are different.
126124
"""
127125
# Get the compared file
128-
L.debug("Compare: %s and %s", ref_file, comp_file)
126+
LOGGER.debug("Compare: %s and %s", ref_file, comp_file)
129127

130128
try:
131129
return comparator(ref_file, comp_file, *args, return_raw_diffs=return_raw_diffs, **kwargs)
@@ -173,7 +171,7 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
173171
"""
174172
if hasattr(comparator, "save_capability") and comparator.save_capability:
175173
# pylint: disable=protected-access
176-
L.debug("Format: %s into %s", file, formatted_file)
174+
LOGGER.debug("Format: %s into %s", file, formatted_file)
177175
data = comparator.load(
178176
file,
179177
**kwargs.get(
@@ -204,7 +202,9 @@ def export_formatted_file(file, formatted_file, comparator, **kwargs):
204202
),
205203
)
206204
else:
207-
L.info("Skip formatting for '%s' because the comparator has no saving capability.", file)
205+
LOGGER.info(
206+
"Skip formatting for '%s' because the comparator has no saving capability.", file
207+
)
208208

209209

210210
def compare_trees(
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Sub-package of dir-content-diff for specific comparators."""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"pandas": [
3+
"pandas>=1.4",
4+
"pyarrow>=11",
5+
"tables>=3.7"
6+
],
7+
"morphio": ["morphio>=3.3.6", "morph_tool>=2.9"],
8+
"voxcell": ["voxcell>=3.1.1"]
9+
}
+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""Extension module to process morphology files with MorphIO and morph-tool."""
2+
from dir_content_diff import register_comparator
3+
from dir_content_diff.base_comparators import BaseComparator
4+
from dir_content_diff.util import import_error_message
5+
6+
try:
7+
from morph_tool import diff
8+
from morphio.mut import Morphology
9+
except ImportError: # pragma: no cover
10+
import_error_message(__name__)
11+
12+
13+
class MorphologyComparator(BaseComparator):
14+
"""Comparator for morphology files."""
15+
16+
def load(self, path, **kwargs):
17+
"""Load a morphology file into a :class:`morphio.Morphology` object."""
18+
return Morphology(path, **kwargs)
19+
20+
def diff(self, ref, comp, *args, **kwargs):
21+
"""Compare data from two morphology files.
22+
23+
Args:
24+
ref_path (str): The path to the reference morphology file.
25+
comp_path (str): The path to the compared morphology file.
26+
*args: See :func:`morph_tool.diff` for details.
27+
**kwargs: See :func:`morph_tool.diff` for details.
28+
29+
Returns:
30+
bool or list(str): ``False`` if the morphologies are considered as equal or a list of
31+
strings explaining why they are not considered as equal.
32+
"""
33+
diffs = diff(ref, comp, *args, **kwargs)
34+
if not diffs:
35+
return False
36+
return [diffs.info]
37+
38+
39+
def register(force=False):
40+
"""Register morphology file extensions."""
41+
register_comparator(".asc", MorphologyComparator(), force=force)
42+
register_comparator(".h5", MorphologyComparator(), force=force)
43+
register_comparator(".swc", MorphologyComparator(), force=force)

dir_content_diff/pandas.py renamed to dir_content_diff/comparators/pandas.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Extension module to process files with Pandas."""
2-
try:
3-
import pandas as pd
4-
except ImportError as exception: # pragma: no cover
5-
raise ImportError("Could not import pandas package, please install it.") from exception
6-
72
from dir_content_diff import register_comparator
83
from dir_content_diff.base_comparators import BaseComparator
4+
from dir_content_diff.util import import_error_message
5+
6+
try:
7+
import pandas as pd
8+
except ImportError: # pragma: no cover
9+
import_error_message(__name__)
910

1011

1112
class DataframeComparator(BaseComparator):
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Extension module to process files with Voxcell."""
2+
from dir_content_diff import register_comparator
3+
from dir_content_diff.base_comparators import BaseComparator
4+
from dir_content_diff.comparators.pandas import DataframeComparator
5+
from dir_content_diff.util import import_error_message
6+
7+
try:
8+
import numpy as np
9+
from voxcell import CellCollection
10+
from voxcell import VoxelData
11+
except ImportError: # pragma: no cover
12+
import_error_message(__name__)
13+
14+
15+
class NrrdComparator(BaseComparator):
16+
"""Comparator for NRRD files."""
17+
18+
def load(self, path, **kwargs):
19+
"""Load a NRRD file into a :class:`numpy.ndarray`."""
20+
return VoxelData.load_nrrd(str(path), **kwargs)
21+
22+
def save(self, data, path, **kwargs):
23+
"""Save data to a NRRD file."""
24+
return data.save_nrrd(str(path), **kwargs)
25+
26+
def format_diff(self, difference, **kwargs):
27+
"""Format one element difference."""
28+
k, v = difference
29+
return f"\n{k}: {v}"
30+
31+
def sort(self, differences, **kwargs):
32+
"""Do not sort the entries to keep voxel dimensions as first entry."""
33+
return differences
34+
35+
def diff(self, ref, comp, *args, precision=None, **kwargs):
36+
"""Compare data from two NRRD files.
37+
38+
Note: NRRD files can contain their creation date, so their hashes are depends on
39+
this creation date, even if the actual data are the same. This comparator only compares the
40+
actual data in the files.
41+
42+
Args:
43+
ref_path (str): The path to the reference CSV file.
44+
comp_path (str): The path to the compared CSV file.
45+
precision (int): The desired precision, default is exact precision.
46+
47+
Returns:
48+
bool or list(str): ``False`` if the DataFrames are considered as equal or a list of
49+
strings explaining why they are not considered as equal.
50+
"""
51+
errors = {}
52+
53+
try:
54+
if precision is not None:
55+
np.testing.assert_array_almost_equal(
56+
ref.voxel_dimensions, comp.voxel_dimensions, *args, decimal=precision, **kwargs
57+
)
58+
else:
59+
np.testing.assert_array_equal(
60+
ref.voxel_dimensions, comp.voxel_dimensions, *args, **kwargs
61+
)
62+
except AssertionError as exception:
63+
errors["Voxel dimensions"] = exception.args[0]
64+
65+
try:
66+
if precision is not None:
67+
np.testing.assert_array_almost_equal(
68+
ref.raw, comp.raw, *args, decimal=precision, **kwargs
69+
)
70+
else:
71+
np.testing.assert_array_equal(ref.raw, comp.raw, *args, **kwargs)
72+
except AssertionError as exception:
73+
errors["Internal raw data"] = exception.args[0]
74+
75+
if len(errors) == 0:
76+
return False
77+
return errors
78+
79+
def report(self, ref_file, comp_file, formatted_differences, diff_args, diff_kwargs, **kwargs):
80+
"""Create a report from the formatted differences."""
81+
# pylint: disable=arguments-differ
82+
if "precision" not in diff_kwargs:
83+
diff_kwargs["precision"] = None
84+
return super().report(
85+
ref_file,
86+
comp_file,
87+
formatted_differences,
88+
diff_args,
89+
diff_kwargs,
90+
**kwargs,
91+
)
92+
93+
94+
class Mvd3Comparator(DataframeComparator):
95+
"""Comparator for MVD3 files.
96+
97+
Note: MVD3 files can contain their creation date, so their hashes are depends on
98+
this creation date, even if the data are the same.
99+
100+
This comparator inherits from the :class:`dir_content_diff.pandas.DataframeComparator`, read
101+
the doc of this comparator for details on args and kwargs.
102+
"""
103+
104+
def load(self, path, **kwargs):
105+
"""Load a MVD3 file into a :class:`pandas.DataFrame`."""
106+
return CellCollection.load_mvd3(path, **kwargs).as_dataframe()
107+
108+
def save(self, data, path, **kwargs):
109+
"""Save data to a CellCollection file."""
110+
return CellCollection.from_dataframe(data).save_mvd3(path, **kwargs)
111+
112+
113+
class CellCollectionComparator(DataframeComparator):
114+
"""Comparator for any type of CellCollection file.
115+
116+
This comparator inherits from the :class:`dir_content_diff.pandas.DataframeComparator`, read
117+
the doc of this comparator for details on args and kwargs.
118+
"""
119+
120+
def load(self, path, **kwargs):
121+
"""Load a CellCollection file into a :class:`pandas.DataFrame`."""
122+
return CellCollection.load(path, **kwargs).as_dataframe()
123+
124+
def save(self, data, path, **kwargs):
125+
"""Save data to a CellCollection file."""
126+
return CellCollection.from_dataframe(data).save(path, **kwargs)
127+
128+
129+
def register():
130+
"""Register Voxcell extensions."""
131+
register_comparator(".nrrd", NrrdComparator())
132+
register_comparator(".mvd3", Mvd3Comparator())

dir_content_diff/util.py

+50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
"""Some utils used by the ``dir-content-diff`` package."""
2+
import importlib.resources
3+
import json
4+
import logging
25
import re
6+
from pathlib import Path
37

8+
LOGGER = logging.getLogger("dir-content-diff")
49
_ext_pattern = re.compile(r"\.?(.*)")
510

611

@@ -103,3 +108,48 @@ def format_kwargs(kwargs, name):
103108
f"{kwargs_used}"
104109
f"{reason_used}"
105110
)
111+
112+
113+
def _retrieve_dependencies():
114+
"""Get the comparator dependencies."""
115+
try:
116+
# Package is installed or the cwd is the root of the project
117+
root_dir = importlib.resources.files("dir_content_diff") # pylint: disable=no-member
118+
except ModuleNotFoundError: # pragma: no cover
119+
# Package is not installed and the cwd is not the root of the project
120+
root_dir = Path(__file__).parent / "dir_content_diff"
121+
deps_file = root_dir / "comparators" / "dependencies.json"
122+
with deps_file.open() as f:
123+
deps = json.load(f)
124+
return deps
125+
126+
127+
COMPARATOR_DEPENDENCIES = _retrieve_dependencies()
128+
129+
130+
def import_error_message(name):
131+
"""Raise a log entry for the missing dependencies."""
132+
name = name.split(".")[-1]
133+
try:
134+
dependencies = COMPARATOR_DEPENDENCIES[name]
135+
except KeyError as exception:
136+
msg = (
137+
f"The module {name} has no registered dependency, please add dependencies in the "
138+
"dependencies.json file"
139+
)
140+
raise KeyError(msg) from exception
141+
142+
if len(dependencies) > 1:
143+
req_plural = "s are"
144+
requirements = ", ".join(dependencies[:-1]) + f" and {dependencies[-1]}"
145+
else:
146+
req_plural = " is"
147+
requirements = str(dependencies[0])
148+
149+
msg = (
150+
f"Loading the {name} module without the required dependencies installed "
151+
f"(requirement{req_plural} the following: {requirements}). "
152+
"Will crash at runtime if the related functionalities are used. "
153+
f"These dependencies can be installed with 'pip install dir-content-diff[{name}]'."
154+
)
155+
LOGGER.warning(msg)

docs/source/api_ref.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ This page presents the complete API documentation.
88

99
dir_content_diff
1010
dir_content_diff.base_comparators
11+
dir_content_diff.comparators.morphio
12+
dir_content_diff.comparators.pandas
13+
dir_content_diff.comparators.voxcell
1114
dir_content_diff.util
12-
dir_content_diff.pandas
1315
dir_content_diff.pytest_plugin

pyproject.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ build-backend = "setuptools.build_meta"
1010
[tool.black]
1111
line-length = 100
1212
target-version = [
13-
"py38",
1413
"py39",
1514
"py310",
1615
"py311",
16+
"py312",
1717
]
1818

1919
[tool.pydocstyle]
@@ -33,3 +33,6 @@ force_single_line = true
3333
testpaths = [
3434
"tests",
3535
]
36+
markers = [
37+
"comparators_missing_deps: marks tests for missing dependencies",
38+
]

0 commit comments

Comments
 (0)