Skip to content

Commit b008eea

Browse files
authored
Merge pull request #57 from mwang87/cdf-gc-support
[CDF Support] CDF support from GC and single quad instruments
2 parents d041198 + 7f6c73d commit b008eea

File tree

7 files changed

+103
-13
lines changed

7 files changed

+103
-13
lines changed

app.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@
106106
),
107107
dbc.Nav(
108108
[
109-
dbc.NavItem(dbc.NavLink("GNPS LCMS Dashboard - Version 0.14", href="/")),
109+
dbc.NavItem(dbc.NavLink("GNPS LCMS Dashboard - Version 0.15", href="/")),
110110
],
111111
navbar=True)
112112
],
@@ -726,6 +726,8 @@
726726
html.Br(),
727727
html.A("Thermo GCMS", href="/?usi=mzspec:MSV000086150:BA1.mzML"),
728728
html.Br(),
729+
html.A("GCMS in CDF Format", href="/?usi=mzspec:GNPS:TASK-ce31b7fdd01244dbb31478147889de1e-f.aaksenov/GC_data/Sterols_data/Samples/0104006.cdf"),
730+
html.Br(),
729731
html.A("Thermo LCMS from GNPS Analysis Classical Molecular Networking Task", href="/?usi=mzspec:GNPS:TASK-5ecfcf81cb3c471698995b194d8246a0-f.MSV000085444/ccms_peak/peak/Hui_N1_fe.mzML#%7B%7D"),
730732
html.Br(),
731733
]
@@ -1147,8 +1149,11 @@ def update_output(search, filecontent, filename, filedate):
11471149
usi2 = ""
11481150

11491151
if filecontent is not None:
1152+
if len(filecontent) > 100000000:
1153+
raise Exception
1154+
11501155
extension = os.path.splitext(filename)[1]
1151-
if extension == ".mzML" and len(filecontent) < 100000000:
1156+
if extension == ".mzML":
11521157
temp_filename = os.path.join("temp", "{}.mzML".format(str(uuid.uuid4())))
11531158
data = filecontent.encode("utf8").split(b";base64,")[1]
11541159

@@ -1159,7 +1164,7 @@ def update_output(search, filecontent, filename, filedate):
11591164

11601165
return [usi, usi2, "FILE Uploaded {}".format(filename)]
11611166

1162-
if extension == ".mzXML" and len(filecontent) < 100000000:
1167+
if extension == ".mzXML":
11631168
mangled_name = str(uuid.uuid4())
11641169
temp_filename_mzXML = os.path.join("temp", "{}.mzXML".format(mangled_name))
11651170
temp_filename = os.path.join("temp", "{}.mzXML".format(mangled_name))
@@ -1172,6 +1177,19 @@ def update_output(search, filecontent, filename, filedate):
11721177

11731178
return [usi, usi2, "FILE Uploaded {}".format(filename)]
11741179

1180+
if extension.lower() == ".cdf":
1181+
mangled_name = str(uuid.uuid4())
1182+
temp_filename = os.path.join("temp", "{}.cdf".format(mangled_name))
1183+
data = filecontent.encode("utf8").split(b";base64,")[1]
1184+
1185+
with open(temp_filename, "wb") as temp_file:
1186+
temp_file.write(base64.decodebytes(data))
1187+
1188+
usi = "mzspec:LOCAL:{}".format(os.path.basename(temp_filename))
1189+
1190+
return [usi, usi2, "FILE Uploaded {}".format(filename)]
1191+
1192+
11751193

11761194
# Resolving USI
11771195
usi = _get_param_from_url(search, "usi", usi)

lcms_map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
import os
23
import pymzml
34
import numpy as np
45
import datashader as ds
@@ -11,7 +12,6 @@
1112
import plotly.graph_objects as go
1213

1314

14-
1515
def _gather_lcms_data(filename, min_rt, max_rt, min_mz, max_mz, polarity_filter="None"):
1616
all_mz = []
1717
all_rt = []

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,5 @@ Flask-Caching
4343
scipy
4444
pyteomics
4545
psims
46-
molmass
46+
molmass
47+
netcdf4

test/bin

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../bin/

test/test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,3 @@ def test_resolve():
4444
print(record["usi"])
4545
remote_link, local_filename = utils._resolve_usi(record["usi"])
4646
lcms_map._create_map_fig(local_filename)
47-

test/usi_list.tsv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@ mzspec:GNPS:GNPS-EMBL-MCF:accession:CCMSLIB00000479565
1515
mzspec:MTBLS138:QXA01PDNEG20181113_KGCO0418VWSS7000000126_RAT_SERUM1_03.mzML
1616
mzspec:MSV000083500:ccms_peak/9177.mzML
1717
mzspec:ST000763:20160411_MB_CS00000074-1_P.mzXML
18-
mzspec:MSV000086456:peak/Pilot_plate_5_NEG_mzXML/PLT5_B1_BB1_01_60017.mzXML
18+
mzspec:MSV000086456:peak/Pilot_plate_5_NEG_mzXML/PLT5_B1_BB1_01_60017.mzXML
19+
mzspec:GNPS:TASK-ce31b7fdd01244dbb31478147889de1e-f.aaksenov/GC_data/Sterols_data/Samples/0104006.cdf

utils.py

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ def _resolve_usi(usi, temp_folder="temp"):
2929
if not os.path.isfile(converted_local_filename):
3030
temp_filename = os.path.join(temp_folder, str(uuid.uuid4()) + ".mzML")
3131
# Lets do a conversion
32-
_convert_mzML(local_filename, temp_filename)
32+
if file_extension == ".cdf":
33+
_convert_cdf_to_mzML(local_filename, temp_filename)
34+
else:
35+
_convert_mzML(local_filename, temp_filename)
3336

3437
os.rename(temp_filename, converted_local_filename)
3538

@@ -120,8 +123,9 @@ def _resolve_usi(usi, temp_folder="temp"):
120123
# Getting Data Local, TODO: likely should serialize it
121124
local_filename = os.path.join(temp_folder, werkzeug.utils.secure_filename(remote_link))
122125
filename, file_extension = os.path.splitext(local_filename)
126+
123127
converted_local_filename = filename + ".mzML"
124-
128+
125129
if not os.path.isfile(converted_local_filename):
126130
temp_filename = os.path.join(temp_folder, str(uuid.uuid4()) + file_extension)
127131
wget_cmd = "wget '{}' -O {}".format(remote_link, temp_filename)
@@ -130,12 +134,14 @@ def _resolve_usi(usi, temp_folder="temp"):
130134

131135
temp_filename = os.path.join(temp_folder, str(uuid.uuid4()) + ".mzML")
132136
# Lets do a conversion
133-
_convert_mzML(local_filename, temp_filename)
137+
if file_extension == ".cdf":
138+
_convert_cdf_to_mzML(local_filename, temp_filename)
139+
else:
140+
_convert_mzML(local_filename, temp_filename)
134141

142+
# Renaming the temp
135143
os.rename(temp_filename, converted_local_filename)
136144

137-
local_filename = converted_local_filename
138-
139145
return remote_link, converted_local_filename
140146

141147
# First try msconvert, if the output fails, then we will do pyteomics to mzML and then msconvert
@@ -220,8 +226,72 @@ def _convert_mzML(input_mzXML, output_mzML):
220226
except:
221227
pass
222228

229+
# in python doing a conversion from cdf to mzML
230+
def _convert_cdf_to_mzML(input_cdf, output_mzML):
231+
from netCDF4 import Dataset
232+
from psims.mzml.writer import MzMLWriter
233+
import numpy as np
234+
235+
temp_filename = os.path.join("temp", str(uuid.uuid4()) + ".mzML")
236+
237+
# lets put the cdf reader here
238+
cdf_reader = Dataset(input_cdf, "r")
239+
mass_values = np.array(cdf_reader.variables["mass_values"][:])
240+
intensity_values = np.array(cdf_reader.variables["intensity_values"][:])
241+
time_values = np.array(cdf_reader.variables["scan_acquisition_time"][:])
242+
scan_values = np.array(cdf_reader.variables["scan_index"][:])
243+
244+
#removing empty scans
245+
dd = np.diff(scan_values) != 0
246+
dd = np.append(dd, True)
247+
ddi = np.arange(scan_values.shape[0], dtype = np.int64)[dd]
248+
time_values = time_values[ddi]
249+
scan_values = scan_values[ddi]
250+
251+
# getting scan boundaries
252+
scan_end_values = np.append(scan_values[1:]-1, mass_values.shape[0]-1)
253+
scan_indcs = zip(scan_values, scan_end_values)
254+
255+
# Writing everything out
256+
with MzMLWriter(open(temp_filename, 'wb')) as out:
257+
out.controlled_vocabularies()
258+
with out.run(id="my_analysis"):
259+
with out.spectrum_list(count=1000):
260+
# Iterating through all scans in the reader
261+
try:
262+
# reading through scans
263+
for i, scan_range in enumerate(scan_indcs):
264+
time_min_rt = time_values[i] / 60
265+
266+
_mz_array = np.array(mass_values[scan_range[0]:scan_range[1]])
267+
_i_array = np.array(intensity_values[scan_range[0]:scan_range[1]])
268+
269+
out.write_spectrum(
270+
_mz_array, _i_array,
271+
id=i, params=[
272+
"MS1 Spectrum",
273+
{"ms level": 1},
274+
{"total ion current": sum(_i_array)}
275+
],
276+
scan_start_time=time_min_rt)
277+
except:
278+
print("Reading Failed, skipping to end")
279+
pass
280+
281+
# # Round trip through MsConvert
282+
# conversion_cmd = "export LC_ALL=C && ./bin/msconvert {} --mzML --32 --outfile {} --outdir {} --filter 'threshold count 500 most-intense'".format(temp_filename, output_mzML, os.path.dirname(output_mzML))
283+
# conversion_ret_code = os.system(conversion_cmd)
284+
285+
# try:
286+
# os.remove(temp_filename)
287+
# except:
288+
# pass
289+
290+
try:
291+
os.rename(temp_filename, output_mzML)
292+
except:
293+
pass
223294

224-
225295

226296
import subprocess, io
227297

0 commit comments

Comments
 (0)