11import json
2+ import io
23import zipfile
4+ import tarfile
35import tempfile
46import glob # noqa: F401
57import os
@@ -98,6 +100,29 @@ def __init__(self, file, molfile=None, params=False, multiple_files=False):
98100 self .params = params
99101 self .multiple_files = multiple_files
100102
103+ @staticmethod
104+ def _is_tarball (name : str ) -> bool :
105+ lname = (name or "" ).lower ()
106+ return lname .endswith (".tar.gz" ) or lname .endswith (".tgz" ) or lname .endswith (".tar" ) or lname .endswith (".tar.xz" )
107+
108+ def _detect_archive_type (self ) -> str | None :
109+ if not getattr (self .file , "bcore" , None ):
110+ return None
111+ try :
112+ if zipfile .is_zipfile (io .BytesIO (self .file .bcore )):
113+ return "zip"
114+ except Exception :
115+ pass
116+ try :
117+ with tempfile .NamedTemporaryFile (suffix = ".tar" ) as tf :
118+ tf .write (self .file .bcore )
119+ tf .flush ()
120+ if tarfile .is_tarfile (tf .name ):
121+ return "tar"
122+ except Exception :
123+ pass
124+ return None
125+
101126 def convert2jcamp (self ):
102127 cmpsr , _ = self .to_composer ()
103128 if isinstance (cmpsr , BagItBaseConverter ):
@@ -119,17 +144,23 @@ def convert2jcamp_img(self):
119144 return cmpsr .tf_jcamp (), cmpsr .tf_img (), cmpsr .tf_csv ()
120145
121146 def to_composer (self ):
147+ archive_type = self ._detect_archive_type ()
122148 is_raw_mzml = self .file .name .split ('.' )[- 1 ].lower () in ['raw' , 'mzml' , 'mzxml' ] # noqa: E501
123149 is_cdf = self .file .name .split ('.' )[- 1 ].lower () in ['cdf' ]
124- is_zip = self .file .name .split ('.' )[- 1 ].lower () in ['zip' ]
150+ is_zip = self .file .name .split ('.' )[- 1 ].lower () in ['zip' ] or archive_type == "zip"
151+ is_tar = self ._is_tarball (self .file .name ) or archive_type == "tar"
125152 is_raw_mzml_by_params = self .params ['ext' ] in ['raw' , 'mzml' , 'mzxml' ]
126153 is_cdf_by_params = self .params ['ext' ] in ['cdf' ]
127154 is_zip_by_params = self .params ['ext' ] in ['zip' ]
155+ is_tar_by_params = self .params ['ext' ] in ['tar' , 'tar.gz' , 'tgz' , 'tar.xz' ]
128156 if is_raw_mzml or is_raw_mzml_by_params :
129157 return self .ms2composer (), False
130158 if is_cdf or is_cdf_by_params :
131159 _ , cp = self .cdf2cvp ()
132160 return cp , False
161+ if is_tar or is_tar_by_params :
162+ _ , cp , invalid_molfile = self .tar2cvp ()
163+ return cp , invalid_molfile
133164 if is_zip or is_zip_by_params :
134165 _ , cp , invalid_molfile = self .zip2cvp ()
135166 return cp , invalid_molfile
@@ -138,17 +169,23 @@ def to_composer(self):
138169 return cp , invalid_molfile
139170
140171 def to_converter (self ):
172+ archive_type = self ._detect_archive_type ()
141173 is_raw_mzml = self .file .name .split ('.' )[- 1 ].lower () in ['raw' , 'mzml' , 'mzxml' ] # noqa: E501
142174 is_cdf = self .file .name .split ('.' )[- 1 ].lower () in ['cdf' ]
143- is_zip = self .file .name .split ('.' )[- 1 ].lower () in ['zip' ]
175+ is_zip = self .file .name .split ('.' )[- 1 ].lower () in ['zip' ] or archive_type == "zip"
176+ is_tar = self ._is_tarball (self .file .name ) or archive_type == "tar"
144177 is_raw_mzml_by_params = self .params ['ext' ] in ['raw' , 'mzml' , 'mzxml' ]
145178 is_cdf_by_params = self .params ['ext' ] in ['cdf' ]
146179 is_zip_by_params = self .params ['ext' ] in ['zip' ]
180+ is_tar_by_params = self .params ['ext' ] in ['tar' , 'tar.gz' , 'tgz' , 'tar.xz' ]
147181 if is_raw_mzml or is_raw_mzml_by_params :
148182 return self .ms2composer ()
149183 if is_cdf or is_cdf_by_params :
150184 cv , _ = self .cdf2cvp ()
151185 return cv
186+ if is_tar or is_tar_by_params :
187+ cv , _ , _ = self .tar2cvp ()
188+ return cv
152189 if is_zip or is_zip_by_params :
153190 cv , _ , _ = self .zip2cvp ()
154191 return cv
@@ -271,6 +308,64 @@ def zip2cvp(self):
271308
272309 return False , False , False
273310
311+ def tar2cvp (self ):
312+ with tempfile .TemporaryDirectory () as td :
313+ suffix = '.tar.gz' if self ._is_tarball (self .file .name ) else '.tar'
314+ tt = store_byte_in_tmp (self .file .bcore , suffix = suffix )
315+ with tarfile .open (tt .name , 'r:*' ) as t :
316+ t .extractall (td )
317+
318+ openlab_dir = _find_dir_with_cdf (td )
319+ if openlab_dir :
320+ normalized_dir = os .path .join (td , 'normalized' )
321+ os .makedirs (normalized_dir , exist_ok = True )
322+
323+ converter_frames = lcms_frames_from_converter_app (openlab_dir )
324+ if converter_frames is not None :
325+ lc_df , minus_df , plus_df = converter_frames
326+ else :
327+ read_lc , read_ms = get_openlab_readers ()
328+ if read_lc is None or read_ms is None :
329+ return False , False , False
330+ lc_df = read_lc (openlab_dir )
331+ minus_df , plus_df = read_ms (openlab_dir )
332+
333+ required_lc_cols = ['RetentionTime' , 'DetectorSignal' , 'wavelength' ]
334+ for col in required_lc_cols :
335+ if col not in lc_df .columns :
336+ raise RuntimeError (f'LC output missing required column { col } ' )
337+ lc_df = lc_df [required_lc_cols ]
338+
339+ for label , df in {'MINUS' : minus_df , 'PLUS' : plus_df }.items ():
340+ for col in ('mz' , 'intensities' , 'time' ):
341+ if col not in df .columns :
342+ raise RuntimeError (f'MS { label } missing column { col } ' )
343+
344+ tic_minus = compute_tic (minus_df )
345+ tic_plus = compute_tic (plus_df )
346+
347+ lc_df .to_csv (os .path .join (normalized_dir , 'LCMS.csv' ), index = False )
348+ tic_minus .to_csv (os .path .join (normalized_dir , 'TIC_MINUS.csv' ), index = False )
349+ tic_plus .to_csv (os .path .join (normalized_dir , 'TIC_PLUS.csv' ), index = False )
350+ minus_df [['time' , 'mz' , 'intensities' ]].to_csv (
351+ os .path .join (normalized_dir , 'MZ_MINUS_Spectra.csv' ), index = False
352+ )
353+ plus_df [['time' , 'mz' , 'intensities' ]].to_csv (
354+ os .path .join (normalized_dir , 'MZ_PLUS_Spectra.csv' ), index = False
355+ )
356+
357+ lcms_cv = LCMSConverter (normalized_dir , self .params , os .path .basename (self .file .name ))
358+ lcms_peaks = None
359+ if self .params and 'lcms_peaks' in self .params :
360+ try :
361+ lcms_peaks = json .loads (self .params ['lcms_peaks' ])
362+ except (json .JSONDecodeError , TypeError ):
363+ lcms_peaks = None
364+ lcms_np = LCMSComposer (lcms_cv , lcms_peaks )
365+ return lcms_cv , lcms_np , False
366+
367+ return False , False , False
368+
274369 def zip2cv_with_processed_file (self , target_dir , params , file_name ):
275370 fid_brucker = FidHasBruckerProcessed (target_dir , params , file_name )
276371 if not fid_brucker :
0 commit comments