Skip to content

Indexed column breaks generation of icephys hierarchical dataframe #665

Closed
@lvsltz

Description

@lvsltz

Description

Some of the metadata in the icephys tables are arrays. Writing these tables to NWB succeeds, but on read to_hierarchical_dataframe appears to fail if one of the columns in the DynamicTable is indexed.

Steps to Reproduce

from pynwb.testing import create_icephys_testfile
from hdmf.common.hierarchicaltable import to_hierarchical_dataframe
nwbfile = create_icephys_testfile()
nwbfile.icephys_experimental_conditions.add_column('newcol', 'abc', data = [1,2,3], index = [2,3])
to_hierarchical_dataframe(nwbfile.icephys_experimental_conditions)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath, copy)
    430             try:
--> 431                 codes, categories = factorize(values, sort=True)
    432             except TypeError as err:

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, size_hint)
    759 
--> 760         codes, uniques = factorize_array(
    761             values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize_array(values, na_sentinel, size_hint, na_value, mask)
    561     table = hash_klass(size_hint or len(values))
--> 562     uniques, codes = table.factorize(
    563         values, na_sentinel=na_sentinel, na_value=na_value, mask=mask

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.factorize()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable._unique()

TypeError: unhashable type: 'list'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-3-295943c1aba2> in <module>
      3 nwbfile = create_icephys_testfile()
      4 nwbfile.icephys_experimental_conditions.add_column('newcol', 'abc', data = [1,2,3], index = [2,3])
----> 5 to_hierarchical_dataframe(nwbfile.icephys_experimental_conditions)

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/hdmf/utils.py in func_call(*args, **kwargs)
    585             def func_call(*args, **kwargs):
    586                 pargs = _check_args(args, kwargs)
--> 587                 return func(**pargs)
    588 
    589         _rtype = rtype

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/hdmf/common/hierarchicaltable.py in to_hierarchical_dataframe(dynamic_table)
    136 
    137     # Construct the pandas dataframe with the hierarchical multi-index
--> 138     multi_index = pd.MultiIndex.from_tuples(index, names=index_names)
    139     out_df = pd.DataFrame(data=data, index=multi_index, columns=columns)
    140     return out_df

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/indexes/multi.py in new_meth(self_or_cls, *args, **kwargs)
    200             kwargs["names"] = kwargs.pop("name")
    201 
--> 202         return meth(self_or_cls, *args, **kwargs)
    203 
    204     return new_meth

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/indexes/multi.py in from_tuples(cls, tuples, sortorder, names)
    558             arrays = cast(List[Sequence[Hashable]], arrs)
    559 
--> 560         return cls.from_arrays(arrays, sortorder=sortorder, names=names)
    561 
    562     @classmethod

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
    481                 raise ValueError("all arrays must be same length")
    482 
--> 483         codes, levels = factorize_from_iterables(arrays)
    484         if names is lib.no_default:
    485             names = [getattr(arr, "name", None) for arr in arrays]

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in factorize_from_iterables(iterables)
   2779         return [], []
   2780 
-> 2781     codes, categories = zip(*(factorize_from_iterable(it) for it in iterables))
   2782     return list(codes), list(categories)

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in <genexpr>(.0)
   2779         return [], []
   2780 
-> 2781     codes, categories = zip(*(factorize_from_iterable(it) for it in iterables))
   2782     return list(codes), list(categories)

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in factorize_from_iterable(values)
   2752         # but only the resulting categories, the order of which is independent
   2753         # from ordered. Set ordered to False as default. See GH #15457
-> 2754         cat = Categorical(values, ordered=False)
   2755         categories = cat.categories
   2756         codes = cat.codes

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath, copy)
    431                 codes, categories = factorize(values, sort=True)
    432             except TypeError as err:
--> 433                 codes, categories = factorize(values, sort=False)
    434                 if dtype.ordered:
    435                     # raise, as we don't have a sortable data structure and so

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, size_hint)
    758             na_value = None
    759 
--> 760         codes, uniques = factorize_array(
    761             values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
    762         )

~/miniconda3/envs/icephys-dev/lib/python3.8/site-packages/pandas/core/algorithms.py in factorize_array(values, na_sentinel, size_hint, na_value, mask)
    560 
    561     table = hash_klass(size_hint or len(values))
--> 562     uniques, codes = table.factorize(
    563         values, na_sentinel=na_sentinel, na_value=na_value, mask=mask
    564     )

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.factorize()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable._unique()

TypeError: unhashable type: 'list'

Environment

Python Executable: Conda
Python Version: 3.8
Operating System: macOS
Versions: hdmf==3.1.1, pandas==1.3.0, pynwb==2.0.0

Checklist

  • Have you ensured the bug was not already reported ?
  • Have you included a brief and descriptive title?
  • Have you included a clear description of the problem you are trying to solve?
  • Have you included a minimal code snippet that reproduces the issue you are encountering?
  • Have you checked our Contributing document?

Metadata

Metadata

Assignees

Labels

category: bugerrors in the code or code behavior

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions