-
Notifications
You must be signed in to change notification settings - Fork 71
Open
Description
I was trying to rename columns of a spatialdata points object and ran into issues regarding the column that is labeled as feature_key.
Some reproducible ways of how I tried it:
Sdata setup
import pandas as pd
import dask.dataframe as dd
import spatialdata as sd
df = pd.DataFrame({"gene": ["A", "B", "C"], "x": [1, 2, 3], "y": [1, 1, 1]})
df = dd.from_pandas(df, npartitions=1)
sdata = sd.SpatialData(points={"points": sd.models.PointsModel.parse(df, feature_key="gene")})
Example 1
sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"})
leads to
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
3811 try:
-> 3812 return self._engine.get_loc(casted_key)
3813 except KeyError as err:
File pandas/_libs/index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:7096, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'gene'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[60], line 9
6 df = dd.from_pandas(df, npartitions=1)
8 sdata = sd.SpatialData(points={"points": sd.models.PointsModel.parse(df, feature_key="gene")})
----> 9 sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"})
12 sdata['points']['gene_symbol'] = sdata['points']['gene']
13 del sdata['points']['gene']
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/spatialdata.py:2393, in SpatialData.__setitem__(self, key, value)
2382 def __setitem__(self, key: str, value: SpatialElement | AnnData) -> None:
2383 """
2384 Add the element to the SpatialData object.
2385
(...)
2391 The element.
2392 """
-> 2393 schema = get_model(value)
2394 if schema in (Image2DModel, Image3DModel):
2395 self.images[key] = value
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:1193, in get_model(e)
1191 return _validate_and_return(ShapesModel, e)
1192 if isinstance(e, DaskDataFrame):
-> 1193 return _validate_and_return(PointsModel, e)
1194 if isinstance(e, AnnData):
1195 return _validate_and_return(TableModel, e)
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:1178, in get_model.<locals>._validate_and_return(schema, e)
1174 def _validate_and_return(
1175 schema: Schema_t,
1176 e: SpatialElement,
1177 ) -> Schema_t:
-> 1178 schema().validate(e)
1179 return schema
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:653, in PointsModel.validate(cls, data)
651 if ATTRS_KEY in data.attrs and "feature_key" in data.attrs[ATTRS_KEY]:
652 feature_key = data.attrs[ATTRS_KEY][cls.FEATURE_KEY]
--> 653 if not isinstance(data[feature_key].dtype, CategoricalDtype):
654 logger.info(f"Feature key `{feature_key}`could be of type `pd.Categorical`. Consider casting it.")
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/dask/dataframe/core.py:4955, in DataFrame.__getitem__(self, key)
4952 return self.loc[key]
4954 # error is raised from pandas
-> 4955 meta = self._meta[_extract_meta(key)]
4956 dsk = partitionwise_graph(operator.getitem, name, self, key)
...
3822 # InvalidIndexError. Otherwise we fall through and re-raise
3823 # the TypeError.
3824 self._check_indexing_error(key)
KeyError: 'gene'
Example 2 (error occurs after writing and reading)
sdata['points']['gene_symbol'] = sdata['points']['gene']
del sdata['points']['gene']
sdata.write("sdata_test.zarr")
sd.read_zarr("sdata_test.zarr")
leads to
---------------------------------------------------------------------------
ValidationError Traceback (most recent call last)
Cell In[59], line 17
14 #sdata['points'].attrs["spatialdata_attrs"]["feature_key"] = "gene_symbol"
16 sdata.write("sdata_test.zarr")
---> 17 sd.read_zarr("sdata_test.zarr")
20 #sdata = sd.SpatialData(
21 # transcripts=sdata['transcripts'],
22 # counts=sdata['counts'],
(...)
25 #)
26 #sdata
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_io/io_zarr.py:229, in read_zarr(store, selection, on_bad_files)
226 else:
227 attrs = None
--> 229 sdata = SpatialData(
230 images=images,
231 labels=labels,
232 points=points,
233 shapes=shapes,
234 tables=tables,
235 attrs=attrs,
236 )
237 sdata.path = Path(store)
238 return sdata
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_utils.py:270, in _deprecation_alias.<locals>.deprecation_decorator.<locals>.wrapper(*args, **kwargs)
268 raise ValueError("version for deprecation must be specified")
269 rename_kwargs(f.__name__, kwargs, alias_copy, class_name, library, version)
--> 270 return f(*args, **kwargs)
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/spatialdata.py:156, in SpatialData.__init__(self, images, labels, points, shapes, tables, attrs)
151 duplicates = {x for x in element_names if element_names.count(x) > 1}
152 raise KeyError(
153 f"Element names must be unique. The following element names are used multiple times: {duplicates}"
154 )
--> 156 with raise_validation_errors(
157 title="Cannot construct SpatialData object, input contains invalid elements.\n"
158 "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .",
159 exc_type=(ValueError, KeyError),
160 ) as collect_error:
161 if images is not None:
162 for k, v in images.items():
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/validation.py:382, in raise_validation_errors.__exit__(self, exc_type, exc_val, exc_tb)
380 # Exceptions were collected that we want to raise as a combined validation error.
381 if self._collector.errors:
--> 382 raise ValidationError(title=self._message, errors=self._collector.errors)
383 return True
ValidationError: Cannot construct SpatialData object, input contains invalid elements.
For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .
points/points: gene
Solution
I got it working with a small adjustment of example 2:
sdata['points']['gene_symbol'] = sdata['points']['gene']
del sdata['points']['gene']
sdata['points'].attrs["spatialdata_attrs"]["feature_key"] = "gene_symbol"
sdata.write("sdata_test.zarr")
sd.read_zarr("sdata_test.zarr")
Expected behaviour
I think the expected behaviour would be that
sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"})
works out of the box, renaming the feature_key (sdata['points'].attrs["spatialdata_attrs"]["feature_key"]
) as welldel sdata['points']['gene']
would delete the column, but also the entry insdata['points'].attrs["spatialdata_attrs"]["feature_key"]
(maybe with a warning, telling that the feature_key column is deleted and not just a standard column)
Metadata
Metadata
Assignees
Labels
No labels