@@ -197,22 +197,33 @@ def read_text(filename, delimiter=None, first_column_names=None, dtype='float32'
197
197
return _read_text (filename , delimiter , first_column_names , dtype )
198
198
199
199
200
+ def iter_lines (file_like ):
201
+ """ Helper for iterating only nonempty lines without line breaks"""
202
+ for line in file_like :
203
+ line = line .rstrip ('\r \n ' )
204
+ if line :
205
+ yield line
206
+
207
+
200
208
def _read_text (f , delimiter , first_column_names , dtype ) -> AnnData :
201
- header = ''
209
+ comments = []
202
210
data = []
203
- lines = ( l . rstrip ( ' \r \n ' ) for l in f )
211
+ lines = iter_lines ( f )
204
212
col_names = []
205
213
row_names = []
206
214
# read header and column names
207
215
for line in lines :
208
216
if line .startswith ('#' ):
209
- header += line
217
+ comment = line .lstrip ('# ' )
218
+ if comment :
219
+ comments .append (comment )
210
220
else :
211
221
if delimiter is not None and delimiter not in line :
212
222
raise ValueError ('Did not find delimiter "{}" in first line.'
213
223
.format (delimiter ))
214
224
line_list = line .split (delimiter )
215
- if not is_float (line_list [0 ]):
225
+ # the first column might be row names, so check the last
226
+ if not is_float (line_list [- 1 ]):
216
227
col_names = line_list
217
228
# logg.msg(' assuming first line in file stores column names', v=4)
218
229
else :
@@ -225,9 +236,9 @@ def _read_text(f, delimiter, first_column_names, dtype) -> AnnData:
225
236
break
226
237
if not col_names :
227
238
# try reading col_names from the last comment line
228
- if len (header ) > 0 :
239
+ if len (comments ) > 0 :
229
240
# logg.msg(' assuming last comment line stores variable names', v=4)
230
- col_names = np .array (header . split ( ' \n ' )[ - 2 ]. strip ( '#' ) .split ())
241
+ col_names = np .array (comments [ - 1 ] .split ())
231
242
# just numbers as col_names
232
243
else :
233
244
# logg.msg(' did not find column names in file', v=4)
@@ -269,7 +280,7 @@ def _read_text(f, delimiter, first_column_names, dtype) -> AnnData:
269
280
# a lot of memory and CPU time
270
281
if data [0 ].size != data [- 1 ].size :
271
282
raise ValueError (
272
- 'length of first line {} is different from length of last line {} '
283
+ 'length of first line ({}) is different from length of last line ({}) '
273
284
.format (data [0 ].size , data [- 1 ].size ))
274
285
data = np .array (data , dtype = dtype )
275
286
# logg.msg(' constructed array from list of list', t=True, v=4)
0 commit comments