|
| 1 | +import datetime |
| 2 | +import logging |
| 3 | +import re |
| 4 | + |
| 5 | +from .base import Reader |
| 6 | + |
| 7 | +logger = logging.getLogger(__name__) |
| 8 | + |
| 9 | + |
| 10 | +class GcdReader(Reader): |
| 11 | + identifier = 'gcd_reader' |
| 12 | + priority = 5 |
| 13 | + |
| 14 | + _number_of_ch = 0 |
| 15 | + |
| 16 | + def _parse_input(self): |
| 17 | + current_header = '[NO HEADER]' |
| 18 | + header_re = re.compile('^\[.+]$') |
| 19 | + content = {current_header: []} |
| 20 | + for x in self.file.string.splitlines(): |
| 21 | + if x != '': |
| 22 | + if header_re.match(x) is not None: |
| 23 | + current_header = x |
| 24 | + content[x] = [] |
| 25 | + else: |
| 26 | + content[current_header].append(x) |
| 27 | + return content |
| 28 | + |
| 29 | + def check(self): |
| 30 | + result = self.file.suffix.lower() == '.txt' |
| 31 | + if result: |
| 32 | + self.lines = self._parse_input() |
| 33 | + result = '[Chromatogram (Ch1)]' in self.lines and '[Compound Results(Ch1)]' in self.lines |
| 34 | + logger.debug('result=%s', result) |
| 35 | + return result |
| 36 | + |
| 37 | + def get_tables(self): |
| 38 | + tables = [] |
| 39 | + table = self.append_table(tables) |
| 40 | + time_re = re.compile('\d{1,2}:\d{1,2}:\d{1,2} [AP]M') |
| 41 | + date_re = re.compile('\d{1,2}/\d{1,2}/\d{4}') |
| 42 | + datetime_re = re.compile(f'{date_re.pattern}\s{time_re.pattern}') |
| 43 | + date_read_formate = "%m/%d/%Y" |
| 44 | + time_read_formate = "%I:%M:%S %p" |
| 45 | + datetime_read_formate = f"{date_read_formate} {time_read_formate}" |
| 46 | + date_write_formate = "%d.%m.%Y" |
| 47 | + time_write_formate = "%H:%M:%S" |
| 48 | + datetime_write_formate = f"{date_write_formate} {time_write_formate}" |
| 49 | + for header_key in ['[Header]', '[File Information]', '[Sample Information]', '[Configuration]', |
| 50 | + '[Original Files]']: |
| 51 | + table['header'].append('') |
| 52 | + table['header'].append(header_key) |
| 53 | + for line in self.lines.get(header_key, []): |
| 54 | + table['header'].append(line) |
| 55 | + key, value = [x.strip() for x in line.split(',', 1)] |
| 56 | + if datetime_re.match(value) is not None: |
| 57 | + value = datetime.datetime.strptime(value, datetime_read_formate).strftime(datetime_write_formate) |
| 58 | + if time_re.match(value) is not None: |
| 59 | + value = datetime.datetime.strptime(value, time_read_formate).strftime(time_write_formate) |
| 60 | + if date_re.match(value) is not None: |
| 61 | + value = datetime.datetime.strptime(value, date_read_formate).strftime(date_write_formate) |
| 62 | + if key in ['Detector ID', 'Detector Name', '# of Channels']: |
| 63 | + if key == '# of Channels': |
| 64 | + self._number_of_ch = sum([int(x) for x in value.split(',')]) |
| 65 | + table['metadata'][key] = str(self._number_of_ch) |
| 66 | + for idx, val_item in enumerate(value.split(',')): |
| 67 | + table['metadata'][f'{header_key}.{key}.{idx + 1}'] = val_item |
| 68 | + else: |
| 69 | + table['metadata'][f'{header_key}.{key}'] = value |
| 70 | + |
| 71 | + table['columns'] = [] |
| 72 | + table['rows'] = [] |
| 73 | + table['metadata']['rows'] = str(len(table['rows'])) |
| 74 | + table['metadata']['columns'] = str(len(table['columns'])) |
| 75 | + |
| 76 | + |
| 77 | + for idx in range(self._number_of_ch): |
| 78 | + header = f"[Compound Results(Ch{idx + 1})]" |
| 79 | + lines = self.lines[header] |
| 80 | + key, number_of_ids = [x.strip() for x in lines[0].split(',', 1)] |
| 81 | + table = self.append_table(tables) |
| 82 | + table['header'] += lines |
| 83 | + table['metadata']['Header'] = header |
| 84 | + table['metadata'][key] = number_of_ids |
| 85 | + col_names = [x.strip() for x in lines[1].split(',')] |
| 86 | + for line in lines[2:]: |
| 87 | + table_entries = line.split(',') |
| 88 | + table['rows'].append(table_entries) |
| 89 | + for idx_entry, entry in enumerate(table_entries): |
| 90 | + table['metadata'][f"Ch{idx+1}.Id {table_entries[0]}.{col_names[idx_entry]}"] = entry |
| 91 | + |
| 92 | + table['columns'] = [{ |
| 93 | + 'key': str(idx), |
| 94 | + 'name': value |
| 95 | + } for idx, value in enumerate(col_names)] |
| 96 | + table['metadata']['rows'] = str(len(table['rows'])) |
| 97 | + table['metadata']['columns'] = str(len(table['columns'])) |
| 98 | + |
| 99 | + |
| 100 | + for idx in range(self._number_of_ch): |
| 101 | + header = f"[Chromatogram (Ch{idx + 1})]" |
| 102 | + lines = self.lines[header] |
| 103 | + table = self.append_table(tables) |
| 104 | + table['metadata']['Header'] = header |
| 105 | + |
| 106 | + metas = [x for x in lines if re.match('^\d', x) is None] |
| 107 | + values = [[float(y) for y in x.split(',')] for x in lines if re.match('^\d', x) is not None] |
| 108 | + for line in metas[:-1]: |
| 109 | + key, value = [x.strip() for x in line.split(',', 1)] |
| 110 | + table['metadata'][f"{header}.{key}"] = value |
| 111 | + table['rows'] = values |
| 112 | + |
| 113 | + |
| 114 | + table['columns'] = [{ |
| 115 | + 'key': str(idx), |
| 116 | + 'name': value |
| 117 | + } for idx, value in enumerate(metas[-1].split(','))] |
| 118 | + |
| 119 | + table['metadata']['rows'] = str(len(table['rows'])) |
| 120 | + table['metadata']['columns'] = str(len(table['columns'])) |
| 121 | + |
| 122 | + |
| 123 | + return tables |
0 commit comments