|
1 | 1 | from abc import ABCMeta, abstractmethod |
2 | 2 | from datetime import datetime, date |
3 | | -from unittest import mock, skip |
| 3 | +from unittest import mock, skip, skipIf |
4 | 4 |
|
5 | 5 | import numpy as np |
6 | 6 | from dateutil.tz import tzlocal |
|
13 | 13 | from hdmf.validate.errors import (DtypeError, MissingError, ExpectedArrayError, MissingDataType, |
14 | 14 | IncorrectQuantityError, IllegalLinkError, ShapeError) |
15 | 15 | from hdmf.backends.hdf5 import HDF5IO |
| 16 | +from hdmf.utils import ZARR_INSTALLED |
16 | 17 |
|
17 | 18 | CORE_NAMESPACE = 'test_core' |
18 | 19 |
|
@@ -1605,3 +1606,53 @@ def test_scalar_instead_of_array(self): |
1605 | 1606 | # Should be ExpectedArrayError, not ShapeError |
1606 | 1607 | self.assertIsInstance(result[0], ExpectedArrayError) |
1607 | 1608 | self.assertNotIsInstance(result[0], ShapeError) |
| 1609 | + |
| 1610 | + |
| 1611 | +class TestObjectDtypeArrays(TestCase): |
| 1612 | + """Test validation of arrays with object dtype (e.g., zarr variable length strings)""" |
| 1613 | + |
| 1614 | + def set_up_spec(self): |
| 1615 | + spec_catalog = SpecCatalog() |
| 1616 | + spec = GroupSpec('A test group specification with a data type', |
| 1617 | + data_type_def='Bar', |
| 1618 | + datasets=[DatasetSpec('an example dataset', 'text', name='data', shape=(None,))], |
| 1619 | + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) |
| 1620 | + spec_catalog.register_spec(spec, 'test.yaml') |
| 1621 | + self.namespace = SpecNamespace( |
| 1622 | + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) |
| 1623 | + self.vmap = ValidatorMap(self.namespace) |
| 1624 | + |
| 1625 | + @skipIf(not ZARR_INSTALLED, "Zarr is not installed") |
| 1626 | + def test_non_empty_object_dtype_array(self): |
| 1627 | + """Test that validator can determine dtype for non-empty zarr.Array with object dtype""" |
| 1628 | + import zarr |
| 1629 | + import numcodecs |
| 1630 | + |
| 1631 | + self.set_up_spec() |
| 1632 | + |
| 1633 | + # Create a zarr array with object dtype containing strings |
| 1634 | + # Zarr uses object dtype for variable-length strings, unlike HDF5 which uses vlen metadata |
| 1635 | + zarr_array = zarr.array(['string1', 'string2', 'string3'], dtype=object, object_codec=numcodecs.VLenUTF8()) |
| 1636 | + bar_builder = GroupBuilder('my_bar', |
| 1637 | + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, |
| 1638 | + datasets=[DatasetBuilder('data', zarr_array)]) |
| 1639 | + results = self.vmap.validate(bar_builder) |
| 1640 | + # Should pass validation - object array with strings should be detected as 'utf' type |
| 1641 | + self.assertEqual(len(results), 0) |
| 1642 | + |
| 1643 | + @skipIf(not ZARR_INSTALLED, "Zarr is not installed") |
| 1644 | + def test_empty_object_dtype_array(self): |
| 1645 | + """Test that validator can determine dtype for empty zarr.Array with object dtype""" |
| 1646 | + import zarr |
| 1647 | + import numcodecs |
| 1648 | + |
| 1649 | + self.set_up_spec() |
| 1650 | + |
| 1651 | + # Create an empty zarr array with object dtype |
| 1652 | + empty_zarr_array = zarr.array([], dtype=object, object_codec=numcodecs.VLenUTF8()) |
| 1653 | + bar_builder = GroupBuilder('my_bar', |
| 1654 | + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, |
| 1655 | + datasets=[DatasetBuilder('data', empty_zarr_array)]) |
| 1656 | + results = self.vmap.validate(bar_builder) |
| 1657 | + # Should pass validation - empty object array defaults to 'utf' type |
| 1658 | + self.assertEqual(len(results), 0) |
0 commit comments