-
Notifications
You must be signed in to change notification settings - Fork 41
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Report
After updating scirpy from v0.13.0 to v0.22.2, and awkward from v2.3.1 to v2.8.7.
I am getting IndexError when calling pp.index_chains
on AnnData object.
AnnData object with n_obs × n_vars = 66912 × 0
obs: 'sample'
uns: 'scirpy_version'
obsm: 'airr'
I believe the error is caused by mixture of awkward array types because I concatenated AnnDatas, and some of them have extra fields, depending on whether it is just 10x VDJ output, or supplemented with MiXCR AIRR output.
Is there a way to concatenate airr component such that the type contain the union of fields?
adata.obsm['airr'].type.show()
66912 * union[
var * {
c_call: ?string,
cdr1: ?string,
cdr1_aa: ?string,
cdr2: ?string,
cdr2_aa: ?string,
cdr3: ?string,
cdr3_aa: ?string,
consensus_count: int64,
d_call: ?string,
d_cigar: ?string,
fwr1: ?string,
fwr1_aa: ?string,
fwr2: ?string,
fwr2_aa: ?string,
fwr3: ?string,
fwr3_aa: ?string,
fwr4: ?string,
fwr4_aa: ?string,
germline_alignment: ?string,
high_confidence: ?bool,
is_cell: ?bool,
j_call: ?string,
j_cigar: ?string,
junction: ?string,
junction_aa: ?string,
locus: ?string,
np1_length: ?int64,
np2_length: ?int64,
productive: bool,
rev_comp: ?bool,
sequence: string,
sequence_aa: ?string,
sequence_alignment: ?string,
sequence_id: string,
umi_count: int64,
v_call: ?string,
v_cigar: ?string,
c_alignment_end: ?unknown,
c_alignment_start: ?unknown,
c_cigar: ?unknown,
c_germline_end: ?unknown,
c_germline_start: ?unknown,
c_score: ?unknown,
c_sequence_end: ?unknown,
c_sequence_start: ?unknown,
cell_ids: ?string,
clone_id: ?string,
complete_vdj: ?bool,
d_alignment_end: ?int64,
d_alignment_start: ?int64,
d_germline_end: ?int64,
d_germline_start: ?int64,
d_score: ?float64,
d_sequence_end: ?int64,
d_sequence_start: ?int64,
duplicate_count: ?int64,
j_alignment_end: ?int64,
j_alignment_start: ?int64,
j_germline_end: ?int64,
j_germline_start: ?int64,
j_score: ?float64,
j_sequence_end: ?int64,
j_sequence_start: ?int64,
junction_length: ?int64,
np1: ?string,
np2: ?string,
v_alignment_end: ?int64,
v_alignment_start: ?int64,
v_germline_end: ?int64,
v_germline_start: ?int64,
v_score: ?float64,
v_sequence_end: ?int64,
v_sequence_start: ?int64
},
var * {
c_call: ?string,
cdr1: ?string,
cdr1_aa: ?string,
cdr2: ?string,
cdr2_aa: ?string,
cdr3: ?string,
cdr3_aa: ?string,
consensus_count: int64,
d_call: ?string,
d_cigar: ?unknown,
fwr1: ?string,
fwr1_aa: ?string,
fwr2: ?string,
fwr2_aa: ?string,
fwr3: ?string,
fwr3_aa: ?string,
fwr4: ?string,
fwr4_aa: ?string,
...
v_call: ?string,
v_cigar: ?unknown
}
]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[9], [line 2](vscode-notebook-cell:?execution_count=9&line=2)
1 #%%
----> [2](vscode-notebook-cell:?execution_count=9&line=2) ir.pp.index_chains(airr_nosupp)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/scirpy/pp/_index_chains.py:112, in index_chains(adata, filter, sort_chains_by, airr_mod, airr_key, key_added)
109 airr_idx = ak.local_index(airr, axis=1)
110 # Filter out chains that do not match the filter criteria
111 # we need an initial value that selects all chains in case filter is an empty list
--> [112](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/scirpy/pp/_index_chains.py:112) airr_idx = airr_idx[reduce(operator.and_, (f(airr) for f in filter), ak.ones_like(airr_idx, dtype=bool))]
114 res = {}
115 is_multichain = np.zeros(len(airr), dtype=bool)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/highlevel.py:1104, in Array.__getitem__(self, where)
675 def __getitem__(self, where):
676 """
677 Args:
678 where (many types supported; see below): Index of positions to
(...) 1102 have the same dimension as the array being indexed.
1103 """
-> [1104](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/highlevel.py:1104) with ak._errors.SlicingErrorContext(self, where):
1105 # Handle named axis
1106 (_, ndim) = self._layout.minmax_depth
1107 named_axis = _get_named_axis(self)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/_errors.py:80, in ErrorContext.__exit__(self, exception_type, exception_value, traceback)
78 self._slate.__dict__.clear()
79 # Handle caught exception
---> [80](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/_errors.py:80) raise self.decorate_exception(exception_type, exception_value)
81 else:
82 # Step out of the way so that another ErrorContext can become primary.
83 if self.primary() is self:
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/highlevel.py:1112, in Array.__getitem__(self, where)
1108 where = _normalize_named_slice(named_axis, where, ndim)
1110 NamedAxis.mapping = named_axis
-> [1112](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/highlevel.py:1112) indexed_layout = prepare_layout(self._layout._getitem(where, NamedAxis))
1114 if NamedAxis.mapping:
1115 return ak.operations.ak_with_named_axis._impl(
1116 indexed_layout,
1117 named_axis=NamedAxis.mapping,
(...) 1120 attrs=self._attrs,
1121 )
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:657, in Content._getitem(self, where, named_axis)
654 return out._getitem_at(0)
656 elif isinstance(where, ak.highlevel.Array):
--> [657](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:657) return self._getitem(where.layout, named_axis)
659 # Convert between nplikes of different backends
660 elif (
661 isinstance(where, ak.contents.Content)
662 and where.backend is not self._backend
663 ):
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:734, in Content._getitem(self, where, named_axis)
731 return where.to_NumpyArray(np.int64)
733 elif isinstance(where, Content):
--> [734](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:734) return self._getitem((where,), named_axis)
736 elif is_sized_iterable(where):
737 # Do we have an array
738 nplike = nplike_of_obj(where, default=None)
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:649, in Content._getitem(self, where, named_axis)
640 named_axis.mapping = _named_axis
642 next = ak.contents.RegularArray(
643 this,
644 this.length,
645 1,
646 parameters=None,
647 )
--> [649](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/content.py:649) out = next._getitem_next(nextwhere[0], nextwhere[1:], None)
651 if out.length is not unknown_length and out.length == 0:
652 return out._getitem_nothing()
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/regulararray.py:723, in RegularArray._getitem_next(self, head, tail, advanced)
707 assert head.offsets.nplike is nplike
708 self._maybe_index_error(
709 self._backend[
710 "awkward_RegularArray_getitem_jagged_expand",
(...) 721 slicer=head,
722 )
--> [723](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/regulararray.py:723) down = self._content._getitem_next_jagged(
724 multistarts, multistops, head._content, tail
725 )
727 return RegularArray(
728 down, headlength, self.length, parameters=self._parameters
729 )
731 elif isinstance(head, ak.contents.IndexedOptionArray):
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/unionarray.py:869, in UnionArray._getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail)
866 def _getitem_next_jagged(
867 self, slicestarts: Index, slicestops: Index, slicecontent: Content, tail
868 ) -> Content:
--> [869](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/unionarray.py:869) return self._getitem_next_jagged_generic(
870 slicestarts, slicestops, slicecontent, tail
871 )
File ~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/unionarray.py:857, in UnionArray._getitem_next_jagged_generic(self, slicestarts, slicestops, slicecontent, tail)
855 def _getitem_next_jagged_generic(self, slicestarts, slicestops, slicecontent, tail):
856 if isinstance(self, ak.contents.UnionArray):
--> [857](https://vscode-remote+ssh-002dremote-002bheath1-002esystemsbiology-002enet.vscode-resource.vscode-cdn.net/users/rng/proj/tlc/~/mambaforge/envs/compbio-8-25/lib/python3.11/site-packages/awkward/contents/unionarray.py:857) raise ak._errors.index_error(
858 self,
859 ak.contents.ListArray(
860 slicestarts, slicestops, slicecontent, parameters=None
861 ),
862 "cannot apply jagged slices to irreducible union arrays",
863 )
864 return self._getitem_next_jagged(slicestarts, slicestops, slicecontent, tail)
IndexError: cannot slice UnionArray (of length 66912) with [[0], [0, 1], [0], [], [], [0, 1], [0], ..., [0, 1], [0, 1], [0], [0], [], [0]]: cannot apply jagged slices to irreducible union arrays
This error occurred while attempting to slice
<Array [[0], [0, 1], [0], ..., [0], [0, 1]] type='66912 * union[var * i...'>
with
<Array [[True], [True, ...], ..., [True, False]] type='66912 * var * bool'>
Versions
| Package | Version |
| ----------- | ------- |
| anndata | 0.12.1 |
| awkward | 2.8.7 |
| scanpy | 1.11.4 |
| scirpy | 0.22.2 |
| scanwrap | 0.0.1 |
| seaborn | 0.13.2 |
| pandas | 2.3.1 |
| matplotlib | 3.10.5 |
| Levenshtein | 0.27.1 |
| numpy | 2.2.6 |
| Dependency | Version |
| --------------------- | --------------------- |
| jaraco.functools | 4.2.1 |
| texttable | 1.7.0 |
| decorator | 5.2.1 |
| pyparsing | 3.2.3 |
| charset-normalizer | 3.4.3 |
| airr | 1.5.1 |
| llvmlite | 0.44.0 |
| statsmodels | 0.14.5 |
| pluggy | 1.6.0 |
| logomaker | 0.8.6 |
| numba | 0.61.2 |
| cycler | 0.12.1 |
| comm | 0.2.3 |
| lxml | 6.0.0 |
| Jinja2 | 3.1.6 |
| jaraco.context | 6.0.1 |
| pynndescent | 0.5.13 |
| kiwisolver | 1.4.9 |
| jaraco.classes | 3.4.0 |
| scipy | 1.16.0 |
| fsspec | 2025.7.0 |
| urllib3 | 2.5.0 |
| pytz | 2025.2 |
| Pygments | 2.19.2 |
| scikit-learn | 1.7.1 |
| mudata | 0.3.2 |
| louvain | 0.8.2 |
| ipywidgets | 8.1.7 |
| jedi | 0.19.2 |
| crc32c | 2.7.1 |
| PyYAML | 6.0.2 |
| ipython | 9.4.0 |
| pooch | 1.8.2 (v1.8.2) |
| typing_extensions | 4.14.1 |
| legacy-api-wrap | 1.4.1 |
| hatchling | 1.27.0 |
| patsy | 1.0.1 |
| idna | 3.10 |
| numcodecs | 0.16.1 |
| awkward_cpp | 48 |
| threadpoolctl | 3.6.0 |
| umap-learn | 0.5.9.post2 |
| jaraco.text | 3.12.1 |
| debugpy | 1.8.16 |
| six | 1.17.0 |
| soupsieve | 2.7 |
| donfig | 0.8.1.post1 |
| certifi | 2025.8.3 (2025.08.03) |
| matplotlib-inline | 0.1.7 |
| platformdirs | 4.3.8 |
| MarkupSafe | 3.0.2 |
| more-itertools | 10.7.0 |
| pillow | 11.3.0 |
| wcwidth | 0.2.13 |
| leidenalg | 0.10.2 |
| jaraco.collections | 5.1.0 |
| setuptools | 80.9.0 |
| jupyter_core | 5.8.1 |
| argcomplete | 3.6.2 |
| jupyter_client | 8.6.3 |
| parso | 0.8.4 |
| h5py | 3.14.0 |
| natsort | 8.4.0 |
| tqdm | 4.67.1 |
| pure_eval | 0.2.3 |
| ipykernel | 6.30.1 |
| networkx | 3.5 |
| backports.tarfile | 1.2.0 |
| pyzmq | 27.0.1 |
| Brotli | 1.1.0 |
| cffi | 1.17.1 |
| zstandard | 0.23.0 |
| rpy2-robjects | 3.6.1 |
| pycparser | 2.22 |
| rpy2-rinterface | 3.6.2 |
| tornado | 6.5.2 |
| prompt_toolkit | 3.0.51 |
| igraph | 0.11.9 |
| msgpack | 1.1.1 |
| packaging | 25.0 |
| stack_data | 0.6.3 |
| zarr | 3.1.1 |
| colorama | 0.4.6 |
| requests | 2.32.4 |
| traitlets | 5.14.3 |
| beautifulsoup4 | 4.13.4 |
| joblib | 1.5.1 |
| psutil | 7.0.0 |
| adjustText | 1.3.0 |
| RapidFuzz | 3.13.0 |
| importlib_metadata | 8.7.0 |
| asttokens | 3.0.0 |
| PySocks | 1.7.1 |
| python-dateutil | 2.9.0.post0 |
| tzlocal | 5.3 |
| session-info2 | 0.2 |
| zipp | 3.23.0 |
| muon | 0.1.6 |
| yamlordereddictloader | 0.4.0 |
| executing | 2.2.0 |
| Component | Info |
| --------- | ------------------------------------------------------------------------------ |
| Python | 3.11.13 | packaged by conda-forge | (main, Jun 4 2025, 14:48:23) [GCC 13.3.0] |
| OS | Linux-5.4.0-215-generic-x86_64-with-glibc2.31 |
| Updated | 2025-10-01 19:03 |
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working