@@ -29,7 +29,10 @@ def _resolve_usi(usi, temp_folder="temp"):
2929 if not os .path .isfile (converted_local_filename ):
3030 temp_filename = os .path .join (temp_folder , str (uuid .uuid4 ()) + ".mzML" )
3131 # Lets do a conversion
32- _convert_mzML (local_filename , temp_filename )
32+ if file_extension == ".cdf" :
33+ _convert_cdf_to_mzML (local_filename , temp_filename )
34+ else :
35+ _convert_mzML (local_filename , temp_filename )
3336
3437 os .rename (temp_filename , converted_local_filename )
3538
@@ -120,8 +123,9 @@ def _resolve_usi(usi, temp_folder="temp"):
120123 # Getting Data Local, TODO: likely should serialize it
121124 local_filename = os .path .join (temp_folder , werkzeug .utils .secure_filename (remote_link ))
122125 filename , file_extension = os .path .splitext (local_filename )
126+
123127 converted_local_filename = filename + ".mzML"
124-
128+
125129 if not os .path .isfile (converted_local_filename ):
126130 temp_filename = os .path .join (temp_folder , str (uuid .uuid4 ()) + file_extension )
127131 wget_cmd = "wget '{}' -O {}" .format (remote_link , temp_filename )
@@ -130,12 +134,14 @@ def _resolve_usi(usi, temp_folder="temp"):
130134
131135 temp_filename = os .path .join (temp_folder , str (uuid .uuid4 ()) + ".mzML" )
132136 # Lets do a conversion
133- _convert_mzML (local_filename , temp_filename )
137+ if file_extension == ".cdf" :
138+ _convert_cdf_to_mzML (local_filename , temp_filename )
139+ else :
140+ _convert_mzML (local_filename , temp_filename )
134141
142+ # Renaming the temp
135143 os .rename (temp_filename , converted_local_filename )
136144
137- local_filename = converted_local_filename
138-
139145 return remote_link , converted_local_filename
140146
141147# First try msconvert, if the output fails, then we will do pyteomics to mzML and then msconvert
@@ -220,8 +226,72 @@ def _convert_mzML(input_mzXML, output_mzML):
220226 except :
221227 pass
222228
229+ # in python doing a conversion from cdf to mzML
230+ def _convert_cdf_to_mzML (input_cdf , output_mzML ):
231+ from netCDF4 import Dataset
232+ from psims .mzml .writer import MzMLWriter
233+ import numpy as np
234+
235+ temp_filename = os .path .join ("temp" , str (uuid .uuid4 ()) + ".mzML" )
236+
237+ # lets put the cdf reader here
238+ cdf_reader = Dataset (input_cdf , "r" )
239+ mass_values = np .array (cdf_reader .variables ["mass_values" ][:])
240+ intensity_values = np .array (cdf_reader .variables ["intensity_values" ][:])
241+ time_values = np .array (cdf_reader .variables ["scan_acquisition_time" ][:])
242+ scan_values = np .array (cdf_reader .variables ["scan_index" ][:])
243+
244+ #removing empty scans
245+ dd = np .diff (scan_values ) != 0
246+ dd = np .append (dd , True )
247+ ddi = np .arange (scan_values .shape [0 ], dtype = np .int64 )[dd ]
248+ time_values = time_values [ddi ]
249+ scan_values = scan_values [ddi ]
250+
251+ # getting scan boundaries
252+ scan_end_values = np .append (scan_values [1 :]- 1 , mass_values .shape [0 ]- 1 )
253+ scan_indcs = zip (scan_values , scan_end_values )
254+
255+ # Writing everything out
256+ with MzMLWriter (open (temp_filename , 'wb' )) as out :
257+ out .controlled_vocabularies ()
258+ with out .run (id = "my_analysis" ):
259+ with out .spectrum_list (count = 1000 ):
260+ # Iterating through all scans in the reader
261+ try :
262+ # reading through scans
263+ for i , scan_range in enumerate (scan_indcs ):
264+ time_min_rt = time_values [i ] / 60
265+
266+ _mz_array = np .array (mass_values [scan_range [0 ]:scan_range [1 ]])
267+ _i_array = np .array (intensity_values [scan_range [0 ]:scan_range [1 ]])
268+
269+ out .write_spectrum (
270+ _mz_array , _i_array ,
271+ id = i , params = [
272+ "MS1 Spectrum" ,
273+ {"ms level" : 1 },
274+ {"total ion current" : sum (_i_array )}
275+ ],
276+ scan_start_time = time_min_rt )
277+ except :
278+ print ("Reading Failed, skipping to end" )
279+ pass
280+
281+ # # Round trip through MsConvert
282+ # conversion_cmd = "export LC_ALL=C && ./bin/msconvert {} --mzML --32 --outfile {} --outdir {} --filter 'threshold count 500 most-intense'".format(temp_filename, output_mzML, os.path.dirname(output_mzML))
283+ # conversion_ret_code = os.system(conversion_cmd)
284+
285+ # try:
286+ # os.remove(temp_filename)
287+ # except:
288+ # pass
289+
290+ try :
291+ os .rename (temp_filename , output_mzML )
292+ except :
293+ pass
223294
224-
225295
226296import subprocess , io
227297
0 commit comments