diff --git a/nmrglue/fileio/jcampdx.py b/nmrglue/fileio/jcampdx.py index 1d41e2b5..a2864d18 100644 --- a/nmrglue/fileio/jcampdx.py +++ b/nmrglue/fileio/jcampdx.py @@ -136,7 +136,6 @@ def _parsejcampdx(filename): if currentkey is None: warn(f"JCAMP-DX data line without associated key: {line}") continue - currentvaluestrings.append(commentsplit[0]) # push possible non-closed blocks @@ -375,7 +374,8 @@ def _parse_pseudo(datalines): valuechar = _DUP_DIGITS[char] newmode = 3 except KeyError: - warn(f"Unknown pseudo-digit: {char} at line: {dataline}") + warn( + f"Unknown pseudo-digit: {char} at line: {dataline}") return None # finish previous number @@ -746,6 +746,23 @@ def _find_firstx_lastx(dic): return firstx, lastx, isppm +def get_complex_array(data): + """ + By default JCAMP FID data are read as two separate arrays + for real and imaginary parts. This function returns + them combined for single complex array. + """ + + if len(data) != 2: + warn("data is not list of arrays [real, imag]") + return None + + complexdata = np.empty((len(data[0]), ), dtype='complex128') + complexdata.real = data[0][:] + complexdata.imag = data[1][:] + return complexdata + + def guess_udic(dic, data): """ Guess parameters of universal dictionary from dic, data pair. @@ -790,32 +807,63 @@ def guess_udic(dic, data): pass # "size" - if isinstance(data, list): - data = data[0] # if list [R,I] + npoints = None if data is not None: - udic[0]["size"] = len(data) + if isinstance(data, list): + npoints = len(data[0]) # if list [R,I] + else: + npoints = len(data) + udic[0]["size"] = npoints else: warn('No data, cannot set udic size') - # "sw" - # get firstx, lastx and unit - firstx, lastx, isppm = _find_firstx_lastx(dic) + # check if this is fid or processed: + is_processed = True # by default, expect processed data + try: + datatype = dic["DATATYPE"][0] + if datatype.strip().upper().replace(" ", "") == "NMRFID": + is_processed = False + except KeyError: + pass + # "sw" and "car" + # get firstx, lastx and unit + firstx, lastx, is_ppm = _find_firstx_lastx(dic) + if firstx is not None and lastx is not None: + if is_processed: # ppm data: convert to Hz - if isppm: + if is_ppm: if obs_freq: firstx = firstx * obs_freq lastx = lastx * obs_freq else: firstx, lastx = (None, None) - warn('Data is in ppm but have no frequency, cannot set udic sweep') - + warn('Data is in ppm but base frequency is unknown, \ + cannot set udic spectral width') if firstx is not None and lastx is not None: udic[0]["sw"] = abs(lastx - firstx) + udic[0]["car"] = (lastx + firstx) / 2 else: - warn('Cannot set udic sweep') - - # keys not found in standard&required JCAMP-DX keys and thus left default: - # car, complex, encoding + # FID: + if npoints: + aqtime = lastx - firstx + sw = npoints / aqtime + udic[0]["sw"] = sw + # note: in FIDs "car" is left to default as there is no required + # standard tag in JCAMP for it. Quite often manufacturers store + # it under their own tags though. + else: + warn('No data ranges found from JCAMP, cannot set udic sw & car') + + # "time" & "freq" + udic[0]["freq"] = is_processed + udic[0]["time"] = not is_processed + + # "complex" always false in JCAMP (R&I in separate arrays) + udic[0]["complex"] = False + # ...unless combined by nmrglue user with get_complex_array + if not isinstance(data, list): + if data.dtype == "complex128": + udic[0]["complex"] = True return udic diff --git a/tests/test_jcampdx.py b/tests/test_jcampdx.py index ce9173a0..c23439aa 100644 --- a/tests/test_jcampdx.py +++ b/tests/test_jcampdx.py @@ -109,10 +109,13 @@ def test_jcampdx1(): # check udic: udic = ng.jcampdx.guess_udic(dic, data) - assert np.abs(udic[0]["obs"]-100.4) < epsilon_e - assert np.abs(udic[0]["sw"]-24038.5) < epsilon_e + assert np.abs(udic[0]["obs"] - 100.4) < epsilon_e + assert np.abs(udic[0]["sw"] - 24038.5) < epsilon_e assert udic[0]["size"] == npoints_target assert udic[0]["label"] == "13C" + assert udic[0]["time"] is False + assert udic[0]["freq"] is True + assert udic[0]["complex"] is False def test_jcampdx2(): @@ -122,7 +125,7 @@ def test_jcampdx2(): # npoints, first, last, freq, sweep # note: first and last are raw values from datalines for convenience, # i.e. not scaled with YFACTORS - cases.append(("TESTFID.DX", 16384, 573, -11584, 100.4, 0.6815317)) + cases.append(("TESTFID.DX", 16384, 573, -11584, 100.4, 16384/0.6815317)) cases.append(("bruker1.dx", 16384, -5, -51, 200.13, 4098.3606557377)) cases.append(("bruker2.dx", 16384, 42, 422, 300.13336767, 6024.096385479)) cases.append(("bruker3.dx", 16384, 22, -313, 300.13336729, 6024.096385479)) @@ -143,9 +146,11 @@ def test_jcampdx2(): print(case[0]) # read casepath = os.path.join(DATA_DIR, "jcampdx", case[0]) - dic, data = ng.jcampdx.read(casepath) - if isinstance(data, list): - data = data[0] # for data with both R&I, check only R + dic, rawdata = ng.jcampdx.read(casepath) + if isinstance(rawdata, list): + data = rawdata[0] # for data with both R&I, check only R + else: + data = rawdata # since first and last are raw values, do yfactor # back-scaling here @@ -164,8 +169,22 @@ def test_jcampdx2(): # check udic udic = ng.jcampdx.guess_udic(dic, data) - assert np.abs(udic[0]["obs"]-case[4]) < epsilon - assert np.abs(udic[0]["sw"]-case[5]) < epsilon + assert np.abs(udic[0]["obs"] - case[4]) < epsilon + assert np.abs(udic[0]["sw"] - case[5]) < epsilon + if dic["DATATYPE"][0] == "NMR FID": + assert udic[0]["time"] is True + assert udic[0]["freq"] is False + assert udic[0]["complex"] is False + assert len(rawdata) == 2 # [R,I] + # FID: merge raw arrays and assert changes + data = ng.jcampdx.get_complex_array(rawdata) + assert len(data) == case[1] + udic = ng.jcampdx.guess_udic(dic, data) + assert udic[0]["complex"] is True + else: + assert udic[0]["time"] is False + assert udic[0]["freq"] is True + assert udic[0]["complex"] is False def test_jcampdx_dicstructure(): @@ -195,6 +214,7 @@ def test_jcampdx_dicstructure(): assert dic["_datatype_NA"][0]["_comments"][0] == "comment line" assert dic["_datatype_NA"][0]["_comments"][1] == "another comment" + def test_jcampdx_dicstructure2(): '''JCAMP-DX read: ensure correct dic structure (nested LINKs, multiple spectra)'''