Skip to content

Commit 5990096

Browse files
committed
wip: add order
1 parent 876ee4c commit 5990096

File tree

6 files changed

+146
-16
lines changed

6 files changed

+146
-16
lines changed

store/backend/neurostore/ingest/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
Entity,
2929
)
3030
from neurostore.models.data import StudysetStudy, _check_type
31+
from neurostore.utils import normalize_note_keys
3132

3233
META_ANALYSIS_WORDS = ["meta analysis", "meta-analysis", "systematic review"]
3334

@@ -356,9 +357,9 @@ def ingest_neurosynth(max_rows=None):
356357
notes.append(aa)
357358

358359
# add notes to annotation
359-
annot.note_keys = {
360-
k: _check_type(v) for k, v in annotation_row._asdict().items()
361-
}
360+
annot.note_keys = normalize_note_keys(
361+
{k: _check_type(v) for k, v in annotation_row._asdict().items()}
362+
)
362363
annot.annotation_analyses = notes
363364
for note in notes:
364365
to_commit.append(note.analysis)

store/backend/neurostore/models/data.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from .migration_types import TSVector, VectorType
1515
from ..database import db
1616
from ..utils import parse_json_filter, build_jsonpath
17+
from ..utils import normalize_note_keys
1718

1819
# status of pipeline run
1920
STATUS_ENUM = PGEnum(
@@ -129,6 +130,12 @@ class Annotation(BaseMixin, db.Model):
129130
cascade_backrefs=False,
130131
)
131132

133+
@validates("note_keys")
134+
def _validate_note_keys(self, key, value):
135+
if isinstance(value, dict):
136+
return normalize_note_keys(value)
137+
return value
138+
132139

133140
class AnnotationAnalysis(db.Model):
134141
__tablename__ = "annotation_analyses"

store/backend/neurostore/resources/data.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
)
5252
from ..models.data import StudysetStudy, BaseStudy, _check_type
5353
from ..utils import parse_json_filter, build_jsonpath
54-
54+
from ..utils import normalize_note_keys
5555

5656
from ..schemas import (
5757
BooleanOrString,
@@ -330,7 +330,9 @@ def _clone_annotations(self, source_record, cloned_record):
330330
),
331331
public=annotation.public,
332332
note_keys=(
333-
deepcopy(annotation.note_keys) if annotation.note_keys else {}
333+
normalize_note_keys(annotation.note_keys)
334+
if annotation.note_keys
335+
else {}
334336
),
335337
)
336338
clone_annotation.studyset = cloned_record
@@ -944,8 +946,13 @@ def _apply_pipeline_columns(self, annotation, data, specs, column_counter):
944946
else:
945947
note_keys = dict(data["note_keys"])
946948
for key, value_type in column_types.items():
947-
note_keys[key] = value_type or "string"
948-
data["note_keys"] = note_keys
949+
existing_value = note_keys.get(key)
950+
normalized_type = value_type or "string"
951+
if isinstance(existing_value, dict):
952+
existing_value["type"] = normalized_type
953+
else:
954+
note_keys[key] = {"type": normalized_type}
955+
data["note_keys"] = normalize_note_keys(note_keys)
949956

950957
data["annotation_analyses"] = list(note_map.values())
951958

store/backend/neurostore/schemas/data.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from neurostore.core import db
1818
from neurostore.models import Analysis, Point
19+
from neurostore.utils import normalize_note_keys
1920

2021
# context parameters
2122
# clone: create a new object with new ids (true or false)
@@ -667,6 +668,12 @@ class AnnotationSchema(BaseDataSchema):
667668
fields.Nested(AnnotationPipelineSchema), load_only=True, required=False
668669
)
669670

671+
@pre_load
672+
def normalize_note_keys_field(self, data, **kwargs):
673+
if data.get("note_keys") is not None:
674+
data["note_keys"] = normalize_note_keys(data["note_keys"])
675+
return data
676+
670677
@pre_load
671678
def add_studyset_id(self, data, **kwargs):
672679
if data.get("studyset") and data.get("notes"):
@@ -694,6 +701,12 @@ def validate_notes(self, data, **kwargs):
694701
if invalid:
695702
raise ValidationError({"notes": invalid})
696703

704+
@post_dump
705+
def ensure_note_keys_order(self, data, **kwargs):
706+
if data.get("note_keys"):
707+
data["note_keys"] = normalize_note_keys(data["note_keys"])
708+
return data
709+
697710

698711
class BaseSnapshot(object):
699712
def __init__(self):

store/backend/neurostore/tests/api/test_annotations.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,16 @@
1515
)
1616

1717

18+
def _assert_note_keys(note_keys, expected_entries):
19+
keys = list(note_keys.keys())
20+
expected_keys = [key for key, _ in expected_entries]
21+
assert keys == expected_keys
22+
for order, (key, expected_type) in enumerate(expected_entries):
23+
entry = note_keys[key]
24+
assert entry["type"] == expected_type
25+
assert entry["order"] == order
26+
27+
1828
def _create_annotation_with_two_analyses(session, user):
1929
base_study = BaseStudy(name="Test Base Study", level="group", user=user)
2030
study = Study(
@@ -514,10 +524,15 @@ def test_put_annotation_applies_pipeline_columns(auth_client, session):
514524
assert resp.status_code == 200
515525
body = resp.json()
516526

517-
assert body["note_keys"]["existing"] == "string"
518-
assert body["note_keys"]["string_field"] == "string"
519-
assert body["note_keys"]["numeric_field"] == "number"
520-
assert body["note_keys"]["name"] == "string"
527+
_assert_note_keys(
528+
body["note_keys"],
529+
[
530+
("existing", "string"),
531+
("string_field", "string"),
532+
("numeric_field", "number"),
533+
("name", "string"),
534+
],
535+
)
521536

522537
notes = body["notes"]
523538
assert len(notes) == 2
@@ -601,11 +616,15 @@ def test_put_annotation_pipeline_column_conflict_suffix(auth_client, session):
601616
key_one = f"string_field_{pipeline_one.name}_{config_one.version}_{config_one.id}"
602617
key_two = f"string_field_{pipeline_two.name}_{config_two.version}_{config_two.id}"
603618

604-
assert key_one in body["note_keys"]
605-
assert key_two in body["note_keys"]
606-
assert body["note_keys"][key_one] == "string"
607-
assert body["note_keys"][key_two] == "string"
608-
assert body["note_keys"]["name"] == "string"
619+
_assert_note_keys(
620+
body["note_keys"],
621+
[
622+
("existing", "string"),
623+
(key_one, "string"),
624+
("name", "string"),
625+
(key_two, "string"),
626+
],
627+
)
609628

610629
for entry in body["notes"]:
611630
note = entry["note"]

store/backend/neurostore/utils.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
"""
44

55
import re
6+
from copy import deepcopy
7+
from typing import Any, Dict, Iterable, Tuple
68

79

810
def determine_value_type(value: str):
@@ -203,3 +205,84 @@ def parse_json_filter(filter_str: str) -> tuple:
203205
raise ValueError(f"Invalid numeric value '{value}' for operator {operator}")
204206

205207
return pipeline_name, version, field_path, operator, value
208+
209+
210+
DEFAULT_NOTE_KEY_TYPE = "string"
211+
212+
213+
def _extract_note_key_type(value: Any) -> str:
214+
"""Derive the type string from a note_keys entry."""
215+
if isinstance(value, dict):
216+
for candidate in ("type", "value_type", "data_type", "datatype"):
217+
candidate_value = value.get(candidate)
218+
if candidate_value is not None:
219+
return str(candidate_value)
220+
return DEFAULT_NOTE_KEY_TYPE
221+
if value is None:
222+
return DEFAULT_NOTE_KEY_TYPE
223+
return str(value)
224+
225+
226+
def _hydrate_note_key_entry(
227+
key: str, value: Any, original_index: int
228+
) -> Tuple[str, Dict[str, Any], int, int]:
229+
"""
230+
Build a normalized representation of a note_key entry while delaying order assignment.
231+
232+
Returns a tuple of:
233+
- key name
234+
- entry dict (deep copied when applicable)
235+
- provided order (or None)
236+
- original index (for stability when order is missing)
237+
"""
238+
if isinstance(value, dict):
239+
entry = deepcopy(value)
240+
provided_order = entry.get("order")
241+
else:
242+
entry = {}
243+
provided_order = None
244+
245+
entry["type"] = _extract_note_key_type(value)
246+
if provided_order is not None:
247+
try:
248+
provided_order = int(provided_order)
249+
except (ValueError, TypeError):
250+
provided_order = None
251+
252+
return key, entry, provided_order, original_index
253+
254+
255+
def normalize_note_keys(note_keys: Any) -> Dict[str, Dict[str, Any]]:
256+
"""
257+
Ensure note_keys are stored as an ordered mapping of key -> metadata, where
258+
metadata includes a ``type`` and zero-based ``order`` attribute.
259+
260+
The returned mapping preserves any additional attributes present on each
261+
entry and reindexes orders sequentially starting at 0. When no explicit
262+
order is supplied, the original insertion order is used.
263+
"""
264+
if not isinstance(note_keys, dict) or not note_keys:
265+
return {}
266+
267+
hydrated_entries: Iterable[Tuple[str, Dict[str, Any], int, int]] = [
268+
_hydrate_note_key_entry(key, value, original_index)
269+
for original_index, (key, value) in enumerate(note_keys.items())
270+
]
271+
272+
sorted_entries = sorted(
273+
hydrated_entries,
274+
key=lambda item: (
275+
item[2] is None,
276+
item[2] if item[2] is not None else item[3],
277+
item[0],
278+
),
279+
)
280+
281+
normalized: Dict[str, Dict[str, Any]] = {}
282+
for new_order, (key, entry, _provided_order, _original_index) in enumerate(
283+
sorted_entries
284+
):
285+
entry["order"] = new_order
286+
normalized[key] = entry
287+
288+
return normalized

0 commit comments

Comments
 (0)