Skip to content

Commit 3c7263c

Browse files
authored
Xr202601 identical fix, take 2 (#197)
* Fixes to dataset equivalence testing on xarray loads. * Added towncrier fragment. * Use own concept of 'dataset equivalence'. * Simplify xr.Dataset equivalence test. * Reconfigure docs to ref SciTools.
1 parent 388e978 commit 3c7263c

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed xarray load tests for new behaviour of xarray.Dataset.identical.

docs/conf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
# -- Project information -----------------------------------------------------
2020

2121
project = "ncdata"
22-
copyright = "2023, pp-mo"
23-
author = "pp-mo"
22+
copyright = "2023, SciTools"
23+
author = "SciTools"
2424

2525
# The complete version, including alpha/beta/rc tags
2626
version_parts = [str(part) for part in version_tuple]
@@ -149,7 +149,7 @@
149149
html_context = {
150150
# Possibly needed for pydata_theme?
151151
"github_repo": "ncdata",
152-
"github_user": "pp-mo",
152+
"github_user": "SciTools",
153153
"github_version": "main",
154154
"doc_path": "docs",
155155
# Default light/dark mode.

pyproject.toml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ build-backend = "setuptools.build_meta"
1111
[project]
1212
name = "ncdata"
1313
authors = [
14-
{name = "Patrick Peglar", email = "patrick.peglar@metoffice.gov.uk"},
14+
{name = "Iris Contributors", email = "scitools.pub@gmail.com"}
1515
]
1616
description = "Abstract NetCDF data objects, providing fast data transfer between analysis packages."
1717
requires-python = ">=3.10"
@@ -50,10 +50,10 @@ dependencies = ["numpy", "dask", "netCDF4"]
5050

5151

5252
[project.urls]
53-
Code = "https://github.com/pp-mo/ncdata"
54-
Discussions = "https://github.com/pp-mo/ncdata/discussions"
53+
Code = "https://github.com/SciTools/ncdata"
54+
Discussions = "https://github.com/SciTools/ncdata/discussions"
5555
Documentation = "https://ncdata.readthedocs.io"
56-
Issues = "https://github.com/pp-mo/ncdata/issues"
56+
Issues = "https://github.com/SciTools/ncdata/issues"
5757

5858
[tool.setuptools]
5959
license-files = ["LICENSE"]
@@ -88,7 +88,7 @@ package = "ncdata"
8888
package_dir = "lib"
8989
directory = "docs/changelog_fragments"
9090
filename = "docs/change_log.rst"
91-
issue_format = "`ISSUE#{issue} <https://github.com/pp-mo/ncdata/pull/{issue}>`_"
91+
issue_format = "`ISSUE#{issue} <https://github.com/SciTools/ncdata/pull/{issue}>`_"
9292
underlines = ["~", "^", "*", "+"]
9393

9494
[[tool.towncrier.type]]

tests/integration/test_xarray_load_and_save_equivalence.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
(2) check equivalence of files : xarray -> file VS xarray->ncdata->file
77
"""
88

9+
import numpy as np
910
import pytest
1011
import xarray
1112

@@ -38,6 +39,45 @@ def use_xarraylock():
3839
yield
3940

4041

42+
def check_load_equivalence(ds1: xarray.Dataset, ds2: xarray.Dataset):
43+
"""
44+
Check that datasets differ only in "expected" ways.
45+
46+
The key differences are due to coordinates remaining lazy in loading via ncdata, but
47+
having real data in a "normal" load. This also affects which coords have indexes,
48+
but we are not checking that here anyway.
49+
"""
50+
51+
def check_attrs_equivalent(attrs1, attrs2):
52+
# Because dict-eq does not work when values can be arrays (!)
53+
okay = set(attrs1.keys()) == set(attrs2.keys())
54+
if okay:
55+
for attr in attrs1:
56+
okay = np.all(attrs1[attr] == attrs2[attr])
57+
if not okay:
58+
break
59+
assert okay
60+
61+
def check_vars_equivalent(v1, v2):
62+
check_attrs_equivalent(v1.attrs, v2.attrs)
63+
assert v1.dims == v2.dims
64+
assert v1.dtype == v2.dtype
65+
# Numeric compare may need to allow for NaNs : floats *and datetimes*
66+
equal_nan = (
67+
v1.dtype.kind in "fM"
68+
) # cannot set kwarg when not applicable
69+
result = np.array_equal(v1.data, v2.data, equal_nan=equal_nan)
70+
if hasattr(result, "compute"):
71+
result = result.compute()
72+
assert result
73+
74+
check_attrs_equivalent(ds1.attrs, ds2.attrs)
75+
assert ds1.dims == ds2.dims
76+
assert list(ds1.variables) == list(ds2.variables)
77+
for varname in ds1.variables:
78+
check_vars_equivalent(ds1.variables[varname], ds2.variables[varname])
79+
80+
4181
def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
4282
source_filepath = standard_testcase.filepath
4383
ncdata = from_nc4(source_filepath)
@@ -52,8 +92,7 @@ def test_load_direct_vs_viancdata(standard_testcase, use_xarraylock, tmp_path):
5292
# Load same, via ncdata
5393
xr_ncdata_ds = to_xarray(ncdata)
5494

55-
# Treat as OK if it passes xarray comparison
56-
assert xr_ds.identical(xr_ncdata_ds)
95+
check_load_equivalence(xr_ds, xr_ncdata_ds)
5796

5897

5998
def test_save_direct_vs_viancdata(standard_testcase, tmp_path):

0 commit comments

Comments
 (0)