9
9
from numpy import ma
10
10
import pandas as pd
11
11
from pandas .core .index import RangeIndex
12
+ from pandas .api .types import is_string_dtype , is_categorical
12
13
from scipy import sparse
13
14
from scipy .sparse import issparse
14
15
from scipy .sparse .sputils import IndexMixin
@@ -156,7 +157,6 @@ def _gen_keys_from_multicol_key(key_multicol, n_keys):
156
157
157
158
158
159
def df_to_records_fixed_width (df ):
159
- from pandas .api .types import is_string_dtype , is_categorical
160
160
uns = {} # unstructured dictionary for storing categories
161
161
names = ['index' ]
162
162
if is_string_dtype (df .index ):
@@ -1155,16 +1155,14 @@ def _remove_unused_categories(self, df_full, df_sub, uns):
1155
1155
df_sub [k ].cat .remove_unused_categories (inplace = True )
1156
1156
# also correct the colors...
1157
1157
if k + '_colors' in uns :
1158
- uns [k + '_colors' ] = uns [
1159
- k + '_colors' ][
1160
- np .where (np .in1d (
1161
- all_categories , df_sub [k ].cat .categories ))[0 ]]
1158
+ uns [k + '_colors' ] = np .array (uns [k + '_colors' ])[
1159
+ np .where (np .in1d (
1160
+ all_categories , df_sub [k ].cat .categories ))[0 ]]
1162
1161
1163
1162
def _sanitize (self ):
1164
1163
"""Transform string arrays to categorical data types, if they store less
1165
1164
categories than the total number of samples.
1166
1165
"""
1167
- from pandas .api .types import is_string_dtype
1168
1166
for ann in ['obs' , 'var' ]:
1169
1167
for key in getattr (self , ann ).columns :
1170
1168
df = getattr (self , ann )
@@ -1299,7 +1297,7 @@ def copy(self, filename=None):
1299
1297
copyfile (self .filename , filename )
1300
1298
return AnnData (filename = filename )
1301
1299
1302
- def concatenate (self , adatas , batch_key = 'batch' , batch_categories = None ):
1300
+ def concatenate (self , adatas , batch_key = 'batch' , batch_categories = None , index_unique = '-' ):
1303
1301
"""Concatenate along the observations axis after intersecting the variables names.
1304
1302
1305
1303
The `.var`, `.varm`, and `.uns` attributes of the passed adatas are ignored.
@@ -1310,8 +1308,11 @@ def concatenate(self, adatas, batch_key='batch', batch_categories=None):
1310
1308
AnnData matrices to concatenate with.
1311
1309
batch_key : `str` (default: 'batch')
1312
1310
Add the batch annotation to `.obs` using this key.
1313
- batch_categories : list (default: `range(len(adatas)+1)`)
1311
+ batch_categories : list, optional (default: `range(len(adatas)+1)`)
1314
1312
Use these as categories for the batch annotation.
1313
+ index_unique : `str` or `None`, optional (default: '-')
1314
+ Make the index unique by joining the previous index name with the
1315
+ batch category. Provide `None` to keep previous indices.
1315
1316
1316
1317
Returns
1317
1318
-------
@@ -1356,17 +1357,23 @@ def concatenate(self, adatas, batch_key='batch', batch_categories=None):
1356
1357
for adata2 in adatas :
1357
1358
joint_variables = np .intersect1d (
1358
1359
joint_variables , adata2 .var_names , assume_unique = True )
1359
- adatas_to_concat = []
1360
1360
if batch_categories is None :
1361
1361
categories = [str (i ) for i in range (len (adatas )+ 1 )]
1362
1362
elif len (batch_categories ) == len (adatas )+ 1 :
1363
1363
categories = batch_categories
1364
1364
else :
1365
1365
raise ValueError ('Provide as many `batch_categories` as `adatas`.' )
1366
+ adatas_to_concat = []
1366
1367
for i , ad in enumerate ([self ] + adatas ):
1368
+ ad .obs .index .values
1367
1369
ad = ad [:, joint_variables ]
1368
1370
ad .obs [batch_key ] = pd .Categorical (
1369
1371
ad .n_obs * [categories [i ]], categories = categories )
1372
+ ad .obs .index .values
1373
+ if index_unique is not None :
1374
+ if not is_string_dtype (ad .obs .index ):
1375
+ ad .obs .index = ad .obs .index .astype (str )
1376
+ ad .obs .index = ad .obs .index .values + index_unique + categories [i ]
1370
1377
adatas_to_concat .append (ad )
1371
1378
Xs = [ad .X for ad in adatas_to_concat ]
1372
1379
if issparse (self .X ):
0 commit comments