diff --git a/src/roman_datamodels/datamodels/__init__.py b/src/roman_datamodels/datamodels/__init__.py index 3c5417b75..d4730c355 100644 --- a/src/roman_datamodels/datamodels/__init__.py +++ b/src/roman_datamodels/datamodels/__init__.py @@ -2,5 +2,8 @@ from ._datamodels import * # noqa: F403 # rename rdm_open to open to match the current roman_datamodels API -from ._utils import FilenameMismatchWarning # noqa: F401 +from ._utils import ( + FilenameMismatchWarning, # noqa: F401 + create_synchronized_table, # noqa: F401 +) from ._utils import rdm_open as open # noqa: F401 diff --git a/src/roman_datamodels/datamodels/_core.py b/src/roman_datamodels/datamodels/_core.py index eabe7d78b..88b12bb37 100644 --- a/src/roman_datamodels/datamodels/_core.py +++ b/src/roman_datamodels/datamodels/_core.py @@ -284,7 +284,10 @@ def save(self, path, dir_path=None, *args, all_array_compression="lz4", all_arra output_path, *args, all_array_compression=all_array_compression, all_array_storage=all_array_storage, **kwargs ) elif ext == ".parquet" and hasattr(self, "to_parquet"): - self.to_parquet(output_path) + to_parquet_kwargs = {} + if "ivoa_compliant" in kwargs: + to_parquet_kwargs["ivoa_compliant"] = kwargs["ivoa_compliant"] + self.to_parquet(output_path, **to_parquet_kwargs) else: raise ValueError(f"unknown filetype {ext}") diff --git a/src/roman_datamodels/datamodels/_datamodels.py b/src/roman_datamodels/datamodels/_datamodels.py index b160a734f..eae511c37 100644 --- a/src/roman_datamodels/datamodels/_datamodels.py +++ b/src/roman_datamodels/datamodels/_datamodels.py @@ -16,13 +16,17 @@ from collections import abc from typing import TYPE_CHECKING -import astropy.table.meta import numpy as np from astropy import time as _time from astropy.modeling import models from ._core import DataModel -from ._utils import node_update, temporary_update_filedate, temporary_update_filename +from ._utils import ( + create_synchronized_table, + node_update, + temporary_update_filedate, + temporary_update_filename, +) if TYPE_CHECKING: from typing import Any @@ -84,16 +88,28 @@ class _ParquetMixin: __slots__ = () - def to_parquet(self, filepath): + def to_parquet(self, filepath, ivoa_compliant: bool = False): """ - Save catalog in parquet format. + Save the catalog to a Parquet file preserving metadata. - Defers import of parquet to minimize import overhead for all other models. + Parameters + ---------- + filepath : str or Path + Path to the output Parquet file. + ivoa_compliant : bool, optional + If True, ensures units and metadata are formatted according to IVOA standards. + + Notes + ----- + - Validates the catalog before writing, as Parquet does not provide schema validation. + - Metadata is flattened and merged with table-level metadata for compatibility. + - Imports Parquet dependencies only when needed to minimize overhead. + - Optionally, column units and types can be synchronized for IVOA compliance. """ from roman_datamodels._stnode import DNode # parquet does not provide validation so validate first with asdf - self.validate() + self.validate() # type: ignore[attr-defined] global DTYPE_MAP import pyarrow as pa @@ -117,13 +133,14 @@ def to_parquet(self, filepath): } ) - with temporary_update_filename(self, pathlib.Path(filepath).name), temporary_update_filedate(self, _time.Time.now()): + with temporary_update_filename(self, pathlib.Path(filepath).name), temporary_update_filedate(self, _time.Time.now()): # type: ignore[arg-type] # Construct flat metadata dict - flat_meta = self.to_flat_dict() + flat_meta = self.to_flat_dict() # type: ignore[attr-defined] + # select only meta items flat_meta = {k: str(v) for (k, v) in flat_meta.items() if k.startswith("roman.meta")} # Extract table metadata - source_cat = self.source_catalog + source_cat = self.source_catalog # type: ignore[attr-defined] scmeta = source_cat.meta # Wrap it as a DNode so it can be flattened dn_scmeta = DNode(scmeta) @@ -136,14 +153,18 @@ def to_parquet(self, filepath): keys = list(source_cat.columns.keys()) arrs = [np.array(source_cat[key]) for key in keys] units = [str(source_cat[key].unit) for key in keys] + descriptions = [getattr(source_cat[key], "description", "") for key in keys] dtypes = [DTYPE_MAP[np.array(source_cat[key]).dtype.name] for key in keys] - fields = [ - pa.field(key, type=dtype, metadata={"unit": unit}) for (key, dtype, unit) in zip(keys, dtypes, units, strict=False) - ] - extra_astropy_metadata = astropy.table.meta.get_yaml_from_table(source_cat) - flat_meta["table_meta_yaml"] = "\n".join(extra_astropy_metadata) - schema = pa.schema(fields, metadata=flat_meta) - table = pa.Table.from_arrays(arrs, schema=schema) + table = create_synchronized_table( + arrs, + keys, + units, + dtypes=dtypes, + global_meta=flat_meta, + ivoa_compliant=ivoa_compliant, + descriptions=descriptions, + table_meta=scmeta, + ) pq.write_table(table, filepath, compression=None) diff --git a/src/roman_datamodels/datamodels/_utils.py b/src/roman_datamodels/datamodels/_utils.py index e1a3f382d..b03b5727a 100644 --- a/src/roman_datamodels/datamodels/_utils.py +++ b/src/roman_datamodels/datamodels/_utils.py @@ -23,7 +23,15 @@ from roman_datamodels._stnode import DNode, LNode -__all__ = ["FilenameMismatchWarning", "node_update", "rdm_open", "temporary_update_filedate", "temporary_update_filename"] +__all__ = [ + "FilenameMismatchWarning", + "create_synchronized_table", + "node_update", + "parse_units_to_ivoa", + "rdm_open", + "temporary_update_filedate", + "temporary_update_filename", +] class FilenameMismatchWarning(UserWarning): @@ -302,3 +310,130 @@ def rdm_open(init, memmap=False, **kwargs): if not isinstance(init, asdf.AsdfFile): asdf_file.close() raise TypeError(f"Unknown datamodel type: {model_type}, please use asdf.open for non-roman_datamodels files") + + +def parse_units_to_ivoa(unit_strings: list[str]) -> list[str]: + """ + Convert a list of unit strings to their IVOA-compliant representations. + + Parameters + ---------- + unit_strings : list of str + List of unit strings to be converted. Can include None, empty strings, or 'unitless'. + + Returns + ------- + list of str + List of IVOA-compliant unit strings. Dimensionless or unrecognized units are mapped to "1". + """ + from astropy import units as u + + ivoa_list: list[str] = [] + for s in unit_strings: + # Standardize dimensionless/null inputs to IVOA "1" + if s is None or str(s).lower() in ("none", "", "unitless"): + ivoa_list.append("1") + continue + try: + unit_obj = u.Unit(s) + if isinstance(unit_obj, u.function.core.FunctionUnitBase): + unit_str = unit_obj.to_string("generic") + if unit_str.startswith("mag"): + ivoa_list.append("mag") + else: + ivoa_list.append(unit_str) + else: + ivoa_list.append(unit_obj.to_string(format="vounit", deprecations="convert")) + except Exception as e: + warnings.warn( + f"Could not parse unit '{s}' to IVOA format: {e}. Using dimensionless unit '1'.", UserWarning, stacklevel=2 + ) + ivoa_list.append("1") + + return ivoa_list + + +def create_synchronized_table( + arrs: list, + names: list[str], + units: list[str], + dtypes: list | None, + global_meta: dict | None, + ivoa_compliant: bool = False, + descriptions: list[str] | None = None, + table_meta: Mapping | None = None, +): + """ + Create a PyArrow table with synchronized field metadata and Astropy YAML metadata. + + Parameters + ---------- + arrs : list + List of arrays or PyArrow columns. + names : list of str + Column names. + units : list of str + Unit strings for each column. + dtypes : list, optional + PyArrow data types. + global_meta : dict, optional + Existing global metadata to preserve. + ivoa_compliant : bool, optional + If True, convert units to IVOA-compliant strings using parse_units_to_ivoa. + Defaults to False (uses units as-is). + descriptions : list of str, optional + Description strings for each column. + table_meta : Mapping, optional + Table-level metadata to be embedded in the Astropy YAML sidecar and + restored when reading the Parquet file back into an Astropy Table. + + Returns + ------- + pyarrow.Table + A PyArrow Table with synchronized field-level unit metadata and Astropy YAML metadata embedded in the schema. + """ + import astropy.table.meta + import pyarrow as pa + from astropy.table import Table + + # Determine final units to use + if ivoa_compliant: + # This uses your unified gatekeeper logic + final_units = parse_units_to_ivoa(units) + else: + # Default: Use exactly what was passed in, but ensure strings for .encode() + # We still handle None -> "" or "1" here to prevent encode errors + final_units = [str(u) if u is not None else "" for u in units] + + # Build Fields with Field-Level Metadata + fields = [] + for i, (name, unit) in enumerate(zip(names, final_units, strict=False)): + col_type = dtypes[i] if dtypes else arrs[i].type + # Only add metadata if the unit isn't an empty string + meta = {b"unit": unit.encode()} if unit else {} + fields.append(pa.field(name, type=col_type, metadata=meta)) + + # Build Temp Astropy Table for YAML Synchronization + temp_table = Table() + for i, name in enumerate(names): + # Convert to numpy for Astropy compatibility + temp_table[name] = arrs[i] if isinstance(arrs[i], np.ndarray) else arrs[i].to_numpy() + # Apply the final unit + temp_table[name].unit = final_units[i] if final_units[i] else None # type: ignore[attr-defined] + # Apply description if provided + if descriptions and descriptions[i]: + temp_table[name].description = descriptions[i] # type: ignore[attr-defined] + + # Attach table-level metadata (e.g., aperture_radii, ee_fractions) so it is + # serialized into the YAML sidecar and restored on read. + if table_meta: + temp_table.meta.update(table_meta) + + # Update Global Metadata (The "Astropy Sidecar") + updated_meta = dict(global_meta) if global_meta else {} + new_yaml = astropy.table.meta.get_yaml_from_table(temp_table) + updated_meta[b"table_meta_yaml"] = "\n".join(new_yaml).encode() + + # Build and return + schema = pa.schema(fields, metadata=updated_meta) + return pa.Table.from_arrays(arrs, schema=schema) diff --git a/tests/test_ivoa_compliant_units.py b/tests/test_ivoa_compliant_units.py new file mode 100644 index 000000000..b09db9f80 --- /dev/null +++ b/tests/test_ivoa_compliant_units.py @@ -0,0 +1,413 @@ +""" +Unit tests for IVOA-compliant unit conversion and Parquet export functionality. +""" + +import pathlib +import tempfile + +import numpy as np +import pyarrow.parquet as pq +import pytest +from astropy import units as u +from astropy.table import Table + +from roman_datamodels.datamodels._utils import ( + create_synchronized_table, + parse_units_to_ivoa, +) + + +class TestParseUnitsToIVOA: + """Test suite for parse_units_to_ivoa function.""" + + def test_standard_units(self): + """Test conversion of standard physical units.""" + input_units = ["m", "m/s", "kg", "erg/(cm2 s Angstrom)"] + result = parse_units_to_ivoa(input_units) + + assert result[0] == "m" + assert result[1] == "m.s**-1" + assert result[2] == "kg" + # Complex unit conversion - vounit format converts to IVOA standard representation + # erg/(cm2 s Angstrom) -> 10g.nm**-1.s**-3 in IVOA vounit format + assert result[3] == "10g.nm**-1.s**-3" + + def test_magnitude_units(self): + """Test conversion of magnitude units (FunctionUnitBase).""" + input_units = ["mag", "mag(AB)", "mag(ST)"] + result = parse_units_to_ivoa(input_units) + + # All magnitude units should map to "mag" + assert all(unit == "mag" for unit in result) + + def test_dimensionless_units(self): + """Test handling of dimensionless/null unit representations.""" + input_units = [None, "", "unitless", "None", "UNITLESS"] + result = parse_units_to_ivoa(input_units) + + # All should map to IVOA dimensionless "1" + assert all(unit == "1" for unit in result) + + def test_invalid_units_fallback(self): + """Test that invalid units fall back to '1' with warning.""" + input_units = ["invalid_unit_xyz", "not-a-unit"] + + with pytest.warns(UserWarning, match="Could not parse unit"): + result = parse_units_to_ivoa(input_units) + + # Invalid units should fallback to "1" + assert all(unit == "1" for unit in result) + + def test_mixed_units(self): + """Test a realistic mix of unit types.""" + input_units = ["m/s", None, "mag", "erg/s", "", "invalid", "Jy"] + + # Suppress warning for invalid unit + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + result = parse_units_to_ivoa(input_units) + + assert result[0] == "m.s**-1" # Standard unit + assert result[1] == "1" # None + assert result[2] == "mag" # Magnitude + assert result[3] == "cm**2.g.s**-3" # erg/s in IVOA vounit format + assert result[4] == "1" # Empty string + assert result[5] == "1" # Invalid (with warning) + assert result[6] == "Jy" # Jansky + + def test_astropy_unit_objects(self): + """Test with Astropy Unit objects (as strings).""" + input_units = [ + str(u.meter), + str(u.m / u.s), + str(u.erg / (u.cm**2 * u.s * u.Angstrom)), + ] + result = parse_units_to_ivoa(input_units) + + assert result[0] == "m" + assert result[1] == "m.s**-1" + # erg/(cm2 s Angstrom) -> 10g.nm**-1.s**-3 in IVOA vounit format + assert result[2] == "10g.nm**-1.s**-3" + + def test_empty_list(self): + """Test with empty input list.""" + result = parse_units_to_ivoa([]) + assert result == [] + + +class TestCreateSynchronizedTable: + """Test suite for create_synchronized_table function.""" + + def test_basic_table_creation(self): + """Test basic table creation without IVOA compliance.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3]), np.array([4.0, 5.0, 6.0])] + names = ["col1", "col2"] + units = ["m", "m/s"] + dtypes = [pa.int64(), pa.float64()] + + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False) + + assert table.num_columns == 2 + assert table.num_rows == 3 + assert table.column_names == ["col1", "col2"] + + # Check field metadata + assert table.schema.field("col1").metadata[b"unit"] == b"m" + assert table.schema.field("col2").metadata[b"unit"] == b"m/s" + + def test_ivoa_compliant_table(self): + """Test table creation with IVOA compliance enabled.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3]), np.array([4.0, 5.0, 6.0])] + names = ["col1", "col2"] + units = ["m/s", None] # None should convert to "1" + dtypes = [pa.int64(), pa.float64()] + + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=True) + + # Check IVOA-compliant units + assert table.schema.field("col1").metadata[b"unit"] == b"m.s**-1" + assert table.schema.field("col2").metadata[b"unit"] == b"1" + + def test_with_descriptions(self): + """Test table creation with column descriptions.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = ["m"] + dtypes = [pa.int64()] + descriptions = ["This is a test column"] + + table = create_synchronized_table( + arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False, descriptions=descriptions + ) + + # Descriptions should be in Astropy YAML metadata + assert b"table_meta_yaml" in table.schema.metadata + yaml_str = table.schema.metadata[b"table_meta_yaml"].decode() + assert "This is a test column" in yaml_str + + def test_with_global_metadata(self): + """Test preservation of global metadata.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = ["m"] + dtypes = [pa.int64()] + global_meta = {"instrument": "WFI", "filter": "F158"} + + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=global_meta, ivoa_compliant=False) + + # Global metadata should be preserved (as bytes) + assert b"instrument" in table.schema.metadata + assert table.schema.metadata[b"instrument"] == b"WFI" + + def test_with_table_metadata(self): + """Test preservation of table-level metadata.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = ["m"] + dtypes = [pa.int64()] + table_meta = {"aperture_radii": [1.0, 2.0, 3.0], "ee_fractions": [0.5, 0.7, 0.9]} + + table = create_synchronized_table( + arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False, table_meta=table_meta + ) + + # Table metadata should be in YAML + yaml_str = table.schema.metadata[b"table_meta_yaml"].decode() + assert "aperture_radii" in yaml_str + assert "ee_fractions" in yaml_str + + def test_empty_units(self): + """Test handling of empty unit strings.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = [""] # Empty unit + dtypes = [pa.int64()] + + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False) + + # Empty units should not create metadata + assert table.schema.field("col1").metadata == {} + + +class TestParquetRoundTrip: + """Test round-trip preservation of data and metadata through Parquet files.""" + + @pytest.fixture + def temp_parquet_file(self): + """Provide a temporary Parquet file path.""" + with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as f: + filepath = pathlib.Path(f.name) + yield filepath + # Cleanup + if filepath.exists(): + filepath.unlink() + + def test_basic_roundtrip(self, temp_parquet_file): + """Test basic round-trip without IVOA compliance.""" + import pyarrow as pa + import pyarrow.parquet as pq + + # Create test data + arrs = [np.array([1, 2, 3]), np.array([4.0, 5.0, 6.0])] + names = ["int_col", "float_col"] + units = ["m", "m/s"] + dtypes = [pa.int64(), pa.float64()] + + # Create and save table + table = create_synchronized_table( + arrs, names, units, dtypes, global_meta={"test_key": "test_value"}, ivoa_compliant=False + ) + pq.write_table(table, temp_parquet_file) + + # Read back + loaded_table = pq.read_table(temp_parquet_file) + + # Verify data + assert loaded_table.num_rows == 3 + assert loaded_table.column_names == ["int_col", "float_col"] + np.testing.assert_array_equal(loaded_table["int_col"].to_numpy(), [1, 2, 3]) + np.testing.assert_array_equal(loaded_table["float_col"].to_numpy(), [4.0, 5.0, 6.0]) + + # Verify metadata + assert loaded_table.schema.field("int_col").metadata[b"unit"] == b"m" + assert loaded_table.schema.field("float_col").metadata[b"unit"] == b"m/s" + assert loaded_table.schema.metadata[b"test_key"] == b"test_value" + + def test_ivoa_roundtrip(self, temp_parquet_file): + """Test round-trip with IVOA compliance enabled.""" + import pyarrow as pa + import pyarrow.parquet as pq + + # Create test data with mixed units + arrs = [np.array([1, 2, 3]), np.array([4.0, 5.0, 6.0]), np.array([7.0, 8.0, 9.0])] + names = ["velocity", "magnitude", "dimensionless"] + units = ["m/s", "mag", None] + dtypes = [pa.int64(), pa.float64(), pa.float64()] + + # Create and save table with IVOA compliance + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=True) + pq.write_table(table, temp_parquet_file) + + # Read back + loaded_table = pq.read_table(temp_parquet_file) + + # Verify IVOA-compliant units + assert loaded_table.schema.field("velocity").metadata[b"unit"] == b"m.s**-1" + assert loaded_table.schema.field("magnitude").metadata[b"unit"] == b"mag" + assert loaded_table.schema.field("dimensionless").metadata[b"unit"] == b"1" + + def test_astropy_table_roundtrip(self, temp_parquet_file): + """Test round-trip through Astropy Table.""" + import pyarrow as pa + + # Create test data + arrs = [np.array([1, 2, 3]), np.array([4.0, 5.0, 6.0])] + names = ["col1", "col2"] + units = ["m/s", "erg/s"] + dtypes = [pa.int64(), pa.float64()] + descriptions = ["Velocity column", "Energy flux column"] + + # Create table with IVOA compliance + table = create_synchronized_table( + arrs, names, units, dtypes, global_meta=None, ivoa_compliant=True, descriptions=descriptions + ) + pq.write_table(table, temp_parquet_file) + + # Read back as Astropy Table + loaded_astropy = Table.read(temp_parquet_file) + + # Verify data + assert len(loaded_astropy) == 3 + np.testing.assert_array_equal(loaded_astropy["col1"], [1, 2, 3]) + np.testing.assert_array_equal(loaded_astropy["col2"], [4.0, 5.0, 6.0]) + + # Verify units (Astropy should parse them) + assert loaded_astropy["col1"].unit is not None + assert loaded_astropy["col2"].unit is not None + + def test_metadata_preservation(self, temp_parquet_file): + """Test comprehensive metadata preservation.""" + import pyarrow as pa + + # Create test data with all metadata types + arrs = [np.array([1, 2, 3])] + names = ["test_col"] + units = ["m"] + dtypes = [pa.int64()] + global_meta = {"instrument": "WFI", "filter": "F158", "exposure_time": "100.5"} + table_meta = {"aperture_radii": [1.0, 2.0, 3.0], "processing_level": "L2"} + descriptions = ["Test column with metadata"] + + # Create and save + table = create_synchronized_table( + arrs, + names, + units, + dtypes, + global_meta=global_meta, + ivoa_compliant=False, + descriptions=descriptions, + table_meta=table_meta, + ) + pq.write_table(table, temp_parquet_file) + + # Read back + loaded_table = pq.read_table(temp_parquet_file) + + # Verify all metadata types + assert loaded_table.schema.metadata[b"instrument"] == b"WFI" + assert loaded_table.schema.metadata[b"filter"] == b"F158" + assert b"table_meta_yaml" in loaded_table.schema.metadata + + yaml_str = loaded_table.schema.metadata[b"table_meta_yaml"].decode() + assert "aperture_radii" in yaml_str + assert "processing_level" in yaml_str + assert "Test column with metadata" in yaml_str + + +class TestBackwardCompatibility: + """Test backward compatibility of the changes.""" + + def test_default_ivoa_compliant_false(self): + """Test that ivoa_compliant defaults to False.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = ["m/s"] # Should NOT be converted to IVOA format + dtypes = [pa.int64()] + + # Call without ivoa_compliant parameter (should default to False) + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None) + + # Unit should be unchanged (not IVOA-converted) + assert table.schema.field("col1").metadata[b"unit"] == b"m/s" + + def test_none_handling_backward_compatible(self): + """Test that None units are handled gracefully.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = [None] + dtypes = [pa.int64()] + + # Without IVOA compliance, None should become empty string + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False) + + # No metadata should be set for empty units + assert table.schema.field("col1").metadata == {} + + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_mismatched_array_lengths(self): + """Test behavior with mismatched input lengths.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3]), np.array([4, 5])] # Different lengths + names = ["col1", "col2"] + units = ["m", "m/s"] + dtypes = [pa.int64(), pa.int64()] + + # PyArrow should raise an error for mismatched array lengths + with pytest.raises((ValueError, pa.lib.ArrowInvalid)): + create_synchronized_table(arrs, names, units, dtypes, global_meta=None) + + def test_special_characters_in_units(self): + """Test handling of special characters in unit strings.""" + input_units = ["m^2", "kg*m/s^2", "erg/(cm^2*s*Angstrom)"] + result = parse_units_to_ivoa(input_units) + + # Should handle special characters gracefully + assert len(result) == 3 + assert all(isinstance(unit, str) for unit in result) + + def test_unicode_in_metadata(self): + """Test handling of unicode characters in metadata.""" + import pyarrow as pa + + arrs = [np.array([1, 2, 3])] + names = ["col1"] + units = ["µm"] # Micrometer with unicode + dtypes = [pa.int64()] + + table = create_synchronized_table(arrs, names, units, dtypes, global_meta=None, ivoa_compliant=False) + + # Should handle unicode properly + assert b"\xc2\xb5m" in table.schema.field("col1").metadata[b"unit"]