Skip to content

Commit 678b350

Browse files
authored
[ENH] annotation order (#1220)
* wip: add order to note_keys * add utils and fix style
1 parent 06e9b2b commit 678b350

File tree

7 files changed

+187
-33
lines changed

7 files changed

+187
-33
lines changed

store/backend/neurostore/ingest/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,8 @@ def ingest_neurosynth(max_rows=None):
364364

365365
# add notes to annotation
366366
annot.note_keys = {
367-
k: _check_type(v) for k, v in annotation_row._asdict().items()
367+
k: {"type": _check_type(v) or "string", "order": idx}
368+
for idx, (k, v) in enumerate(annotation_row._asdict().items())
368369
}
369370
annot.annotation_analyses = notes
370371
for note in notes:

store/backend/neurostore/resources/data.py

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,84 @@ def get_affected_ids(self, ids):
393393
}
394394
return unique_ids
395395

396+
@staticmethod
397+
def _ordered_note_keys(note_keys):
398+
if not note_keys:
399+
return []
400+
keys = list(note_keys.keys())
401+
alphabetical = sorted(keys)
402+
if keys == alphabetical:
403+
return alphabetical
404+
return keys
405+
406+
@classmethod
407+
def _normalize_note_keys(cls, note_keys):
408+
if note_keys is None:
409+
return None
410+
if not isinstance(note_keys, dict):
411+
abort_validation("`note_keys` must be an object.")
412+
413+
ordered_keys = cls._ordered_note_keys(note_keys)
414+
normalized = OrderedDict()
415+
used_orders = set()
416+
next_order = 0
417+
418+
for key in ordered_keys:
419+
descriptor = note_keys.get(key) or {}
420+
note_type = descriptor.get("type")
421+
if note_type not in {"string", "number", "boolean"}:
422+
abort_validation(
423+
"Invalid `type` for note_keys entry "
424+
f"'{key}', choose from: ['boolean', 'number', 'string']."
425+
)
426+
427+
order = descriptor.get("order")
428+
if isinstance(order, bool) or (
429+
order is not None and not isinstance(order, int)
430+
):
431+
order = None
432+
433+
if isinstance(order, int) and order not in used_orders:
434+
used_orders.add(order)
435+
if order >= next_order:
436+
next_order = order + 1
437+
else:
438+
while next_order in used_orders:
439+
next_order += 1
440+
order = next_order
441+
used_orders.add(order)
442+
next_order += 1
443+
444+
normalized[key] = {"type": note_type, "order": order}
445+
446+
return normalized
447+
448+
@classmethod
449+
def _merge_note_keys(cls, existing, additions):
450+
"""
451+
additions is a mapping of key -> type
452+
"""
453+
base = cls._normalize_note_keys(existing or {}) or OrderedDict()
454+
used_orders = {v.get("order") for v in base.values() if isinstance(v, dict)}
455+
used_orders = {o for o in used_orders if isinstance(o, int)}
456+
next_order = max(used_orders, default=-1) + 1
457+
458+
for key, value_type in additions.items():
459+
if key in base:
460+
descriptor = base[key] or {}
461+
descriptor["type"] = value_type or descriptor.get("type") or "string"
462+
base[key] = descriptor
463+
continue
464+
465+
descriptor = {
466+
"type": value_type or "string",
467+
"order": next_order,
468+
}
469+
base[key] = descriptor
470+
next_order += 1
471+
472+
return base
473+
396474
@classmethod
397475
def load_nested_records(cls, data, record=None):
398476
if not data:
@@ -554,6 +632,9 @@ def put(self, id):
554632
schema = self._schema()
555633
data = schema.load(request_data)
556634

635+
if "note_keys" in data:
636+
data["note_keys"] = self._normalize_note_keys(data["note_keys"])
637+
557638
pipeline_payload = data.pop("pipelines", [])
558639

559640
args = {}
@@ -942,12 +1023,10 @@ def _apply_pipeline_columns(self, annotation, data, specs, column_counter):
9421023

9431024
if column_types:
9441025
if data.get("note_keys") is None:
945-
note_keys = dict(annotation.note_keys or {})
1026+
note_keys = self._normalize_note_keys(annotation.note_keys or {})
9461027
else:
947-
note_keys = dict(data["note_keys"])
948-
for key, value_type in column_types.items():
949-
note_keys[key] = value_type or "string"
950-
data["note_keys"] = note_keys
1028+
note_keys = self._normalize_note_keys(data["note_keys"])
1029+
data["note_keys"] = self._merge_note_keys(note_keys, column_types)
9511030

9521031
data["annotation_analyses"] = list(note_map.values())
9531032

store/backend/neurostore/schemas/data.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,70 @@ class AnnotationPipelineSchema(BaseSchema):
663663
columns = fields.List(fields.String(), required=True)
664664

665665

666+
class NoteKeysField(fields.Field):
667+
allowed_types = {"string", "number", "boolean"}
668+
669+
def _serialize(self, value, attr, obj, **kwargs):
670+
if not value:
671+
return {}
672+
serialized = {}
673+
for key, descriptor in value.items():
674+
if not isinstance(descriptor, dict):
675+
continue
676+
serialized[key] = {
677+
"type": descriptor.get("type"),
678+
"order": descriptor.get("order"),
679+
}
680+
return serialized
681+
682+
def _deserialize(self, value, attr, data, **kwargs):
683+
if value is None:
684+
return {}
685+
if not isinstance(value, dict):
686+
raise ValidationError("`note_keys` must be an object.")
687+
688+
normalized = {}
689+
used_orders = set()
690+
explicit_orders = []
691+
for descriptor in value.values():
692+
if isinstance(descriptor, dict) and isinstance(
693+
descriptor.get("order"), int
694+
):
695+
explicit_orders.append(descriptor["order"])
696+
next_order = max(explicit_orders, default=-1) + 1
697+
698+
for key, descriptor in value.items():
699+
if not isinstance(descriptor, dict):
700+
raise ValidationError("Each note key must map to an object.")
701+
702+
note_type = descriptor.get("type")
703+
if note_type not in self.allowed_types:
704+
raise ValidationError(
705+
f"Invalid note type for '{key}', choose from: {sorted(self.allowed_types)}"
706+
)
707+
708+
order = descriptor.get("order")
709+
if isinstance(order, bool) or (
710+
order is not None and not isinstance(order, int)
711+
):
712+
order = None
713+
714+
if isinstance(order, int) and order not in used_orders:
715+
used_orders.add(order)
716+
if order >= next_order:
717+
next_order = order + 1
718+
else:
719+
while next_order in used_orders:
720+
next_order += 1
721+
order = next_order
722+
used_orders.add(order)
723+
next_order += 1
724+
725+
normalized[key] = {"type": note_type, "order": order}
726+
727+
return normalized
728+
729+
666730
class AnnotationSchema(BaseDataSchema):
667731
# serialization
668732
studyset_id = fields.String(data_key="studyset")
@@ -675,7 +739,7 @@ class AnnotationSchema(BaseDataSchema):
675739
source_id = fields.String(dump_only=True, allow_none=True)
676740
source_updated_at = fields.DateTime(dump_only=True, allow_none=True)
677741

678-
note_keys = fields.Dict()
742+
note_keys = NoteKeysField()
679743
metadata = fields.Dict(attribute="metadata_", dump_only=True)
680744
# deserialization
681745
metadata_ = fields.Dict(data_key="metadata", load_only=True, allow_none=True)

store/backend/neurostore/tests/api/test_annotations.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
PipelineConfig,
1414
PipelineStudyResult,
1515
)
16+
from ..utils import ordered_note_keys
1617

1718

1819
def _create_annotation_with_two_analyses(session, user):
@@ -39,7 +40,7 @@ def _create_annotation_with_two_analyses(session, user):
3940
name="Test Annotation",
4041
studyset=studyset,
4142
user=user,
42-
note_keys={"existing": "string"},
43+
note_keys=ordered_note_keys({"existing": "string"}),
4344
)
4445

4546
session.add(annotation)
@@ -92,7 +93,7 @@ def test_blank_annotation_populates_note_fields(
9293
auth_client, ingest_neurosynth, session
9394
):
9495
dset = Studyset.query.first()
95-
note_keys = {"included": "boolean", "quality": "string"}
96+
note_keys = ordered_note_keys({"included": "boolean", "quality": "string"})
9697
payload = {
9798
"studyset": dset.id,
9899
"note_keys": note_keys,
@@ -121,7 +122,7 @@ def test_annotation_rejects_empty_note(auth_client, ingest_neurosynth, session):
121122
"note": {},
122123
}
123124
],
124-
"note_keys": {"included": "boolean"},
125+
"note_keys": ordered_note_keys({"included": "boolean"}),
125126
"name": "invalid annotation",
126127
}
127128

@@ -143,7 +144,7 @@ def test_post_annotation(auth_client, ingest_neurosynth, session):
143144
payload = {
144145
"studyset": dset.id,
145146
"notes": data,
146-
"note_keys": {"foo": "string"},
147+
"note_keys": ordered_note_keys({"foo": "string"}),
147148
"name": "mah notes",
148149
}
149150
resp = auth_client.post("/api/annotations/", data=payload)
@@ -280,7 +281,7 @@ def test_blank_slate_creation(auth_client, session):
280281
# create annotation
281282
annotation_data = {
282283
"studyset": ss_id,
283-
"note_keys": {"include": "boolean"},
284+
"note_keys": ordered_note_keys({"include": "boolean"}),
284285
"name": "mah notes",
285286
}
286287
annotation_post = auth_client.post("/api/annotations/", data=annotation_data)
@@ -342,7 +343,7 @@ def test_mismatched_notes(auth_client, ingest_neurosynth, session):
342343
for s in dset.studies
343344
for a in s.analyses
344345
]
345-
note_keys = {"foo": "string", "doo": "string"}
346+
note_keys = ordered_note_keys({"foo": "string", "doo": "string"})
346347
payload = {
347348
"studyset": dset.id,
348349
"notes": data,
@@ -377,7 +378,7 @@ def test_put_nonexistent_analysis(auth_client, ingest_neurosynth, session):
377378
for s in dset.studies
378379
for a in s.analyses
379380
]
380-
note_keys = {"foo": "string", "doo": "string"}
381+
note_keys = ordered_note_keys({"foo": "string", "doo": "string"})
381382
payload = {
382383
"studyset": dset.id,
383384
"notes": data,
@@ -412,7 +413,7 @@ def test_post_put_subset_of_analyses(auth_client, ingest_neurosynth, session):
412413
# remove last note
413414
data.pop()
414415

415-
note_keys = {"foo": "string", "doo": "string"}
416+
note_keys = ordered_note_keys({"foo": "string", "doo": "string"})
416417
payload = {
417418
"studyset": dset.id,
418419
"notes": data,
@@ -444,7 +445,7 @@ def test_correct_note_overwrite(auth_client, ingest_neurosynth, session):
444445
payload = {
445446
"studyset": dset.id,
446447
"notes": data,
447-
"note_keys": {"foo": "string", "doo": "string"},
448+
"note_keys": ordered_note_keys({"foo": "string", "doo": "string"}),
448449
"name": "mah notes",
449450
}
450451

@@ -514,10 +515,10 @@ def test_put_annotation_applies_pipeline_columns(auth_client, session):
514515
assert resp.status_code == 200
515516
body = resp.json()
516517

517-
assert body["note_keys"]["existing"] == "string"
518-
assert body["note_keys"]["string_field"] == "string"
519-
assert body["note_keys"]["numeric_field"] == "number"
520-
assert body["note_keys"]["name"] == "string"
518+
assert body["note_keys"]["existing"]["type"] == "string"
519+
assert body["note_keys"]["string_field"]["type"] == "string"
520+
assert body["note_keys"]["numeric_field"]["type"] == "number"
521+
assert body["note_keys"]["name"]["type"] == "string"
521522

522523
notes = body["notes"]
523524
assert len(notes) == 2
@@ -603,9 +604,9 @@ def test_put_annotation_pipeline_column_conflict_suffix(auth_client, session):
603604

604605
assert key_one in body["note_keys"]
605606
assert key_two in body["note_keys"]
606-
assert body["note_keys"][key_one] == "string"
607-
assert body["note_keys"][key_two] == "string"
608-
assert body["note_keys"]["name"] == "string"
607+
assert body["note_keys"][key_one]["type"] == "string"
608+
assert body["note_keys"][key_two]["type"] == "string"
609+
assert body["note_keys"]["name"]["type"] == "string"
609610

610611
for entry in body["notes"]:
611612
note = entry["note"]
@@ -625,7 +626,7 @@ def test_annotation_analyses_post(auth_client, ingest_neurosynth, session):
625626
payload = {
626627
"studyset": dset.id,
627628
"notes": data,
628-
"note_keys": {"foo": "string", "doo": "string"},
629+
"note_keys": ordered_note_keys({"foo": "string", "doo": "string"}),
629630
"name": "mah notes",
630631
}
631632

store/backend/neurostore/tests/api/test_studysets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def _create_studyset_with_annotation(auth_client, study_ids, name="clone-source"
164164

165165
annotation_payload = {
166166
"studyset": studyset_id,
167-
"note_keys": {"include": "boolean"},
167+
"note_keys": {"include": {"type": "boolean", "order": 0}},
168168
"name": "annotation for clone",
169169
}
170170
annotation_resp = auth_client.post("/api/annotations/", data=annotation_payload)

store/backend/neurostore/tests/conftest.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import vcr
3030

3131
import logging
32+
from .utils import ordered_note_keys
3233

3334
LOGGER = logging.getLogger(__name__)
3435

@@ -602,7 +603,7 @@ def user_data(session, mock_add_users):
602603
annotation = Annotation(
603604
name=name + "annotation",
604605
source="neurostore",
605-
note_keys={"food": "string"},
606+
note_keys=ordered_note_keys({"food": "string"}),
606607
studyset=studyset,
607608
user=user,
608609
)
@@ -639,13 +640,13 @@ def simple_neurosynth_annotation(session, ingest_neurosynth):
639640
)
640641
)
641642

642-
smol_annot = Annotation(
643-
name="smol " + annot.name,
644-
source="neurostore",
645-
studyset=annot.studyset,
646-
note_keys={"animal": "number"},
647-
annotation_analyses=smol_notes,
648-
)
643+
smol_annot = Annotation(
644+
name="smol " + annot.name,
645+
source="neurostore",
646+
studyset=annot.studyset,
647+
note_keys=ordered_note_keys({"animal": "number"}),
648+
annotation_analyses=smol_notes,
649+
)
649650
session.add(smol_annot)
650651
session.commit()
651652

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def ordered_note_keys(type_map):
2+
"""
3+
Build ordered note key descriptors from a simple name->type mapping.
4+
"""
5+
return {
6+
key: {"type": value_type, "order": idx}
7+
for idx, (key, value_type) in enumerate(type_map.items())
8+
}

0 commit comments

Comments
 (0)