Skip to content

Commit 6350007

Browse files
committed
make indices unique upon concatenation
1 parent 69abd62 commit 6350007

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

anndata/base.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from numpy import ma
1010
import pandas as pd
1111
from pandas.core.index import RangeIndex
12+
from pandas.api.types import is_string_dtype, is_categorical
1213
from scipy import sparse
1314
from scipy.sparse import issparse
1415
from scipy.sparse.sputils import IndexMixin
@@ -156,7 +157,6 @@ def _gen_keys_from_multicol_key(key_multicol, n_keys):
156157

157158

158159
def df_to_records_fixed_width(df):
159-
from pandas.api.types import is_string_dtype, is_categorical
160160
uns = {} # unstructured dictionary for storing categories
161161
names = ['index']
162162
if is_string_dtype(df.index):
@@ -1155,16 +1155,14 @@ def _remove_unused_categories(self, df_full, df_sub, uns):
11551155
df_sub[k].cat.remove_unused_categories(inplace=True)
11561156
# also correct the colors...
11571157
if k + '_colors' in uns:
1158-
uns[k + '_colors'] = uns[
1159-
k + '_colors'][
1160-
np.where(np.in1d(
1161-
all_categories, df_sub[k].cat.categories))[0]]
1158+
uns[k + '_colors'] = np.array(uns[k + '_colors'])[
1159+
np.where(np.in1d(
1160+
all_categories, df_sub[k].cat.categories))[0]]
11621161

11631162
def _sanitize(self):
11641163
"""Transform string arrays to categorical data types, if they store less
11651164
categories than the total number of samples.
11661165
"""
1167-
from pandas.api.types import is_string_dtype
11681166
for ann in ['obs', 'var']:
11691167
for key in getattr(self, ann).columns:
11701168
df = getattr(self, ann)
@@ -1299,7 +1297,7 @@ def copy(self, filename=None):
12991297
copyfile(self.filename, filename)
13001298
return AnnData(filename=filename)
13011299

1302-
def concatenate(self, adatas, batch_key='batch', batch_categories=None):
1300+
def concatenate(self, adatas, batch_key='batch', batch_categories=None, index_unique='-'):
13031301
"""Concatenate along the observations axis after intersecting the variables names.
13041302
13051303
The `.var`, `.varm`, and `.uns` attributes of the passed adatas are ignored.
@@ -1310,8 +1308,11 @@ def concatenate(self, adatas, batch_key='batch', batch_categories=None):
13101308
AnnData matrices to concatenate with.
13111309
batch_key : `str` (default: 'batch')
13121310
Add the batch annotation to `.obs` using this key.
1313-
batch_categories : list (default: `range(len(adatas)+1)`)
1311+
batch_categories : list, optional (default: `range(len(adatas)+1)`)
13141312
Use these as categories for the batch annotation.
1313+
index_unique : `str` or `None`, optional (default: '-')
1314+
Make the index unique by joining the previous index name with the
1315+
batch category. Provide `None` to keep previous indices.
13151316
13161317
Returns
13171318
-------
@@ -1356,17 +1357,23 @@ def concatenate(self, adatas, batch_key='batch', batch_categories=None):
13561357
for adata2 in adatas:
13571358
joint_variables = np.intersect1d(
13581359
joint_variables, adata2.var_names, assume_unique=True)
1359-
adatas_to_concat = []
13601360
if batch_categories is None:
13611361
categories = [str(i) for i in range(len(adatas)+1)]
13621362
elif len(batch_categories) == len(adatas)+1:
13631363
categories = batch_categories
13641364
else:
13651365
raise ValueError('Provide as many `batch_categories` as `adatas`.')
1366+
adatas_to_concat = []
13661367
for i, ad in enumerate([self] + adatas):
1368+
ad.obs.index.values
13671369
ad = ad[:, joint_variables]
13681370
ad.obs[batch_key] = pd.Categorical(
13691371
ad.n_obs*[categories[i]], categories=categories)
1372+
ad.obs.index.values
1373+
if index_unique is not None:
1374+
if not is_string_dtype(ad.obs.index):
1375+
ad.obs.index = ad.obs.index.astype(str)
1376+
ad.obs.index = ad.obs.index.values + index_unique + categories[i]
13701377
adatas_to_concat.append(ad)
13711378
Xs = [ad.X for ad in adatas_to_concat]
13721379
if issparse(self.X):

0 commit comments

Comments
 (0)