|
| 1 | +# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- |
| 2 | +# vi: set ft=python sts=4 ts=4 sw=4 et: |
| 3 | +""" |
| 4 | +Parse the "ASCCONV" meta data format found in a variety of Siemens MR files. |
| 5 | +""" |
| 6 | +import re |
| 7 | +import ast |
| 8 | +from collections import OrderedDict |
| 9 | + |
| 10 | + |
| 11 | +ASCCONV_RE = re.compile( |
| 12 | + r'### ASCCONV BEGIN((?:\s*[^=\s]+=[^=\s]+)*) ###\n(.*?)\n### ASCCONV END ###', |
| 13 | + flags=re.M | re.S) |
| 14 | + |
| 15 | + |
| 16 | +class AscconvParseError(Exception): |
| 17 | + """ Error parsing ascconv file """ |
| 18 | + |
| 19 | + |
| 20 | +class Atom: |
| 21 | + """ Object to hold operation, object type and object identifier |
| 22 | +
|
| 23 | + An atom represents an element in an expression. For example:: |
| 24 | +
|
| 25 | + a.b[0].c |
| 26 | +
|
| 27 | + has four elements. We call these elements "atoms". |
| 28 | +
|
| 29 | + We represent objects (like ``a``) as dicts for convenience. |
| 30 | +
|
| 31 | + The last element (``.c``) is an ``op = ast.Attribute`` operation where the |
| 32 | + object type (`obj_type`) of ``c`` is not constrained (we can't tell from |
| 33 | + the operation what type it is). The `obj_id` is the name of the object -- |
| 34 | + "c". |
| 35 | +
|
| 36 | + The second to last element ``[0]``, is ``op = ast.Subscript``, with object type |
| 37 | + dict (we know from the subsequent operation ``.c`` that this must be an |
| 38 | + object, we represent the object by a dict). The `obj_id` is the index 0. |
| 39 | +
|
| 40 | + Parameters |
| 41 | + ---------- |
| 42 | + op : {'name', 'attr', 'list'} |
| 43 | + Assignment type. Assignment to name (root namespace), attribute or |
| 44 | + list element. |
| 45 | + obj_type : {list, dict, other} |
| 46 | + Object type being assigned to. |
| 47 | + obj_id : str or int |
| 48 | + Key (``obj_type is dict``) or index (``obj_type is list``) |
| 49 | + """ |
| 50 | + |
| 51 | + def __init__(self, op, obj_type, obj_id): |
| 52 | + self.op = op |
| 53 | + self.obj_type = obj_type |
| 54 | + self.obj_id = obj_id |
| 55 | + |
| 56 | + |
| 57 | +class NoValue: |
| 58 | + """ Signals no value present """ |
| 59 | + |
| 60 | + |
| 61 | +def assign2atoms(assign_ast, default_class=int): |
| 62 | + """ Parse single assignment ast from ascconv line into atoms |
| 63 | +
|
| 64 | + Parameters |
| 65 | + ---------- |
| 66 | + assign_ast : assignment statement ast |
| 67 | + ast derived from single line of ascconv file. |
| 68 | + default_class : class, optional |
| 69 | + Class that will create an object where we cannot yet know the object |
| 70 | + type in the assignment. |
| 71 | +
|
| 72 | + Returns |
| 73 | + ------- |
| 74 | + atoms : list |
| 75 | + List of :class:`atoms`. See docstring for :class:`atoms`. Defines |
| 76 | + left to right sequence of assignment in `line_ast`. |
| 77 | + """ |
| 78 | + if not len(assign_ast.targets) == 1: |
| 79 | + raise AscconvParseError('Too many targets in assign') |
| 80 | + target = assign_ast.targets[0] |
| 81 | + atoms = [] |
| 82 | + prev_target_type = default_class # Placeholder for any scalar value |
| 83 | + while True: |
| 84 | + if isinstance(target, ast.Name): |
| 85 | + atoms.append(Atom(target, prev_target_type, target.id)) |
| 86 | + break |
| 87 | + if isinstance(target, ast.Attribute): |
| 88 | + atoms.append(Atom(target, prev_target_type, target.attr)) |
| 89 | + target = target.value |
| 90 | + prev_target_type = OrderedDict |
| 91 | + elif isinstance(target, ast.Subscript): |
| 92 | + index = target.slice.value.n |
| 93 | + atoms.append(Atom(target, prev_target_type, index)) |
| 94 | + target = target.value |
| 95 | + prev_target_type = list |
| 96 | + else: |
| 97 | + raise AscconvParseError(f'Unexpected LHS element {target}') |
| 98 | + return reversed(atoms) |
| 99 | + |
| 100 | + |
| 101 | +def _create_obj_in(atom, root): |
| 102 | + """ Find / create object defined in `atom` in dict-like given by `root` |
| 103 | +
|
| 104 | + Returns corresponding value if there is already a key matching |
| 105 | + `atom.obj_id` in `root`. |
| 106 | +
|
| 107 | + Otherwise, create new object with ``atom.obj_type`, insert into dictionary, |
| 108 | + and return new object. |
| 109 | +
|
| 110 | + Can therefore modify `root` in place. |
| 111 | + """ |
| 112 | + name = atom.obj_id |
| 113 | + obj = root.get(name, NoValue) |
| 114 | + if obj is not NoValue: |
| 115 | + return obj |
| 116 | + obj = atom.obj_type() |
| 117 | + root[name] = obj |
| 118 | + return obj |
| 119 | + |
| 120 | + |
| 121 | +def _create_subscript_in(atom, root): |
| 122 | + """ Find / create and insert object defined by `atom` from list `root` |
| 123 | +
|
| 124 | + The `atom` has an index, defined in ``atom.obj_id``. If `root` is long |
| 125 | + enough to contain this index, return the object at that index. Otherwise, |
| 126 | + extend `root` with None elements to contain index ``atom.obj_id``, then |
| 127 | + create a new object via ``atom.obj_type()``, insert at the end of the list, |
| 128 | + and return this object. |
| 129 | +
|
| 130 | + Can therefore modify `root` in place. |
| 131 | + """ |
| 132 | + curr_n = len(root) |
| 133 | + index = atom.obj_id |
| 134 | + if curr_n > index: |
| 135 | + return root[index] |
| 136 | + obj = atom.obj_type() |
| 137 | + root += [None] * (index - curr_n) + [obj] |
| 138 | + return obj |
| 139 | + |
| 140 | + |
| 141 | +def obj_from_atoms(atoms, namespace): |
| 142 | + """ Return object defined by list `atoms` in dict-like `namespace` |
| 143 | +
|
| 144 | + Parameters |
| 145 | + ---------- |
| 146 | + atoms : list |
| 147 | + List of :class:`atoms` |
| 148 | + namespace : dict-like |
| 149 | + Namespace in which object will be defined. |
| 150 | +
|
| 151 | + Returns |
| 152 | + ------- |
| 153 | + obj_root : object |
| 154 | + Namespace such that we can set a desired value to the object defined in |
| 155 | + `atoms` with ``obj_root[obj_key] = value``. |
| 156 | + obj_key : str or int |
| 157 | + Index into list or key into dictionary for `obj_root`. |
| 158 | + """ |
| 159 | + root_obj = namespace |
| 160 | + for el in atoms: |
| 161 | + prev_root = root_obj |
| 162 | + if isinstance(el.op, (ast.Attribute, ast.Name)): |
| 163 | + root_obj = _create_obj_in(el, root_obj) |
| 164 | + else: |
| 165 | + root_obj = _create_subscript_in(el, root_obj) |
| 166 | + if not isinstance(root_obj, el.obj_type): |
| 167 | + raise AscconvParseError( |
| 168 | + f'Unexpected type for {el.obj_id} in {prev_root}') |
| 169 | + return prev_root, el.obj_id |
| 170 | + |
| 171 | + |
| 172 | +def _get_value(assign): |
| 173 | + value = assign.value |
| 174 | + if isinstance(value, ast.Num): |
| 175 | + return value.n |
| 176 | + if isinstance(value, ast.Str): |
| 177 | + return value.s |
| 178 | + if isinstance(value, ast.UnaryOp) and isinstance(value.op, ast.USub): |
| 179 | + return -value.operand.n |
| 180 | + raise AscconvParseError(f'Unexpected RHS of assignment: {value}') |
| 181 | + |
| 182 | + |
| 183 | +def parse_ascconv(ascconv_str, str_delim='"'): |
| 184 | + '''Parse the 'ASCCONV' format from `input_str`. |
| 185 | +
|
| 186 | + Parameters |
| 187 | + ---------- |
| 188 | + ascconv_str : str |
| 189 | + The string we are parsing |
| 190 | + str_delim : str, optional |
| 191 | + String delimiter. Typically '"' or '""' |
| 192 | +
|
| 193 | + Returns |
| 194 | + ------- |
| 195 | + prot_dict : OrderedDict |
| 196 | + Meta data pulled from the ASCCONV section. |
| 197 | + attrs : OrderedDict |
| 198 | + Any attributes stored in the 'ASCCONV BEGIN' line |
| 199 | +
|
| 200 | + Raises |
| 201 | + ------ |
| 202 | + AsconvParseError |
| 203 | + A line of the ASCCONV section could not be parsed. |
| 204 | + ''' |
| 205 | + attrs, content = ASCCONV_RE.match(ascconv_str).groups() |
| 206 | + attrs = OrderedDict((tuple(x.split('=')) for x in attrs.split())) |
| 207 | + # Normalize string start / end markers to something Python understands |
| 208 | + content = content.replace(str_delim, '"""') |
| 209 | + # Use Python's own parser to parse modified ASCCONV assignments |
| 210 | + tree = ast.parse(content) |
| 211 | + |
| 212 | + prot_dict = OrderedDict() |
| 213 | + for assign in tree.body: |
| 214 | + atoms = assign2atoms(assign) |
| 215 | + obj_to_index, key = obj_from_atoms(atoms, prot_dict) |
| 216 | + obj_to_index[key] = _get_value(assign) |
| 217 | + |
| 218 | + return prot_dict, attrs |
0 commit comments