@@ -11,7 +11,7 @@ def loadfile(filename,verbose=True):
11
11
12
12
try :
13
13
if verbose == True :
14
- print 'Loading' , filename
14
+ print ( 'Loading' , filename )
15
15
# obtaning sample names and number from 3rd line in header
16
16
num_header_lines = 0
17
17
with open (filename ) as f :
@@ -26,12 +26,12 @@ def loadfile(filename,verbose=True):
26
26
num_header_lines += 1
27
27
sample_number = len (sample_names )
28
28
if verbose == True :
29
- print '--------------------------------------'
30
- print sample_number ,' samples in the file'
31
- print '--------------------------------------'
29
+ print ( '--------------------------------------' )
30
+ print ( sample_number ,' samples in the file' )
31
+ print ( '--------------------------------------' )
32
32
for elem in sample_names :
33
- print elem
34
- print '--------------------------------------'
33
+ print ( elem )
34
+ print ( '--------------------------------------' )
35
35
36
36
37
37
@@ -59,11 +59,11 @@ def loadfile(filename,verbose=True):
59
59
num_attr = len (attr_names ) #number of attributes
60
60
#expression_colindex=attr_names.index ('Expression') #position of the expression column in the attr column
61
61
if verbose == True :
62
- print num_attr ,' attributes in the file '
63
- print '--------------------------------------'
62
+ print ( num_attr ,' attributes in the file ' )
63
+ print ( '--------------------------------------' )
64
64
for attr in attr_names :
65
- print attr
66
- print '--------------------------------------'
65
+ print ( attr )
66
+ print ( '--------------------------------------' )
67
67
68
68
# joining rows of attributes without the descriptor
69
69
for row in range (atr_data .shape [0 ]):
@@ -113,7 +113,7 @@ def loadfile(filename,verbose=True):
113
113
data .at [row .Index ,var ]= np .nan
114
114
return data
115
115
except :
116
- print 'Error loading the file'
116
+ print ( 'Error loading the file' )
117
117
118
118
119
119
"""
@@ -137,7 +137,7 @@ def load_check_gff3(filename):
137
137
data_1 = rowfile .split ('\t ' )
138
138
break
139
139
if coldata_found == False :
140
- print 'No COLDATA, bad header'
140
+ print ( 'No COLDATA, bad header' )
141
141
return False
142
142
143
143
#Number of columns without breaking down attributes column
@@ -155,33 +155,33 @@ def load_check_gff3(filename):
155
155
for attr in list_attr :
156
156
if attr not in possible_attr :
157
157
Error = True
158
- print attr ,'is not a possible attribute'
158
+ print ( attr ,'is not a possible attribute' )
159
159
break
160
160
if Error :
161
- print 'File format error'
161
+ print ( 'File format error' )
162
162
return False
163
163
# If not format error, loading content
164
164
try :
165
165
dataframe = loadfile (filename ,True )
166
166
except :
167
- print 'Error loading file'
167
+ print ( 'Error loading file' )
168
168
return False
169
- print 'Checking content'
169
+ print ( 'Checking content' )
170
170
for i in range (dataframe .shape [0 ]):
171
171
# Labels in type column
172
172
if dataframe .loc [i , 'type' ] not in ['ref_miRNA' , 'isomiR' ]:
173
173
Error = True
174
- print 'line' , i , 'pip install Markdownbad type error'
174
+ print ( 'line' , i , 'pip install Markdownbad type error' )
175
175
176
176
# start<end
177
177
if dataframe .loc [i , 'start' ] >= dataframe .loc [i , 'end' ]:
178
178
Error = True
179
- print 'line' , i , 'start >=end error'
179
+ print ( 'line' , i , 'start >=end error' )
180
180
181
181
# Strand + or -
182
182
if dataframe .loc [i , 'strand' ] not in ['+' , '-' ]:
183
183
Error = True
184
- print 'line' , i , 'bad strand error'
184
+ print ( 'line' , i , 'bad strand error' )
185
185
# Variant checking
186
186
possible_variant = ['iso_5p' ,'iso_3p' ,'iso_add' ,'iso_snp_seed' ,'iso_snp_central_offset' ,'iso_snp_central' ,
187
187
'iso_central_supp' ,'iso_snp_central_supp' ,'iso_snp' ]
@@ -191,36 +191,36 @@ def load_check_gff3(filename):
191
191
if len (variant_i )== 1 and variant_i [0 ]!= 'NA' :
192
192
if variant_i [0 ].split (':' )[0 ] not in possible_variant :
193
193
Error = True
194
- print 'Variant error' , variant_i [0 ].split (':' )[0 ], 'line' , i
194
+ print ( 'Variant error' , variant_i [0 ].split (':' )[0 ], 'line' , i )
195
195
elif variant_i [0 ]!= 'NA' :
196
196
for var in range (len (variant_i )):
197
197
if variant_i [var ].split (':' )[0 ] not in possible_variant :
198
198
Error = True
199
- print 'Variant error' , variant_i [0 ].split (':' )[0 ], 'line' , i
199
+ print ( 'Variant error' , variant_i [0 ].split (':' )[0 ], 'line' , i )
200
200
201
201
#Checking expression data
202
202
expression_cols = [col for col in dataframe .columns if 'Expression_' in col ]
203
203
for col in expression_cols :
204
204
for i in range (dataframe .shape [0 ]):
205
205
if not dataframe .loc [i ,col ].isdigit ():
206
- print dataframe .loc [i ,col ].isdigit ()
207
- print 'Expression count error line' ,i
206
+ print ( dataframe .loc [i ,col ].isdigit () )
207
+ print ( 'Expression count error line' ,i )
208
208
Error = True
209
209
dataframe [col ]= dataframe [col ].astype (int ) #setting the datatype of counts
210
210
dataframe [col ]= dataframe [col ].replace (0 ,np .nan ) #Setting 0 reads to NaN
211
211
if 'Filter' in dataframe .columns :
212
212
for i in range (dataframe .shape [0 ]):
213
213
if dataframe .loc [i , 'Filter' ]!= 'Pass' :
214
- print 'Warning non-pass filter in line' ,i
214
+ print ( 'Warning non-pass filter in line' ,i )
215
215
if Error :
216
- print 'File format error'
216
+ print ( 'File format error' )
217
217
return False
218
218
219
- print '--------------------------------------'
220
- print dataframe .dtypes
221
- print '--------------------------------------'
222
- print 'Format ok'
219
+ print ( '--------------------------------------' )
220
+ print ( dataframe .dtypes )
221
+ print ( '--------------------------------------' )
222
+ print ( 'Format ok' )
223
223
return dataframe
224
224
except :
225
- print 'Error checking the file'
225
+ print ( 'Error checking the file' )
226
226
return False
0 commit comments