Skip to content

Commit 4c301de

Browse files
committed
add backfill-note-keys
1 parent 2c82757 commit 4c301de

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

store/backend/manage.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from neurostore.core import app, db
1111
from neurostore import ingest
1212
from neurostore import models
13+
from collections import OrderedDict
1314

1415
if not getattr(app, "config", None):
1516
app = app._app
@@ -71,3 +72,53 @@ def ingest_neuroquery(max_rows):
7172
if max_rows is not None:
7273
max_rows = int(max_rows)
7374
ingest.ingest_neuroquery(max_rows=max_rows)
75+
76+
77+
@app.cli.command("backfill-note-keys")
78+
@click.option("--limit", default=None, type=int, help="limit number of annotations to process")
79+
@click.option("--dry-run", is_flag=True, help="do not persist changes")
80+
def backfill_note_keys(limit, dry_run):
81+
"""Infer missing note_keys from existing annotation notes."""
82+
updated = 0
83+
checked = 0
84+
85+
q = models.Annotation.query.order_by(models.Annotation.created_at)
86+
if limit:
87+
q = q.limit(limit)
88+
89+
for annotation in q:
90+
checked += 1
91+
current = annotation.note_keys if isinstance(annotation.note_keys, dict) else {}
92+
if current:
93+
continue
94+
95+
inferred: OrderedDict[str, dict] = OrderedDict()
96+
97+
for aa in annotation.annotation_analyses:
98+
note = aa.note or {}
99+
for key, value in note.items():
100+
if key not in inferred:
101+
inferred[key] = {"type": None, "order": len(inferred)}
102+
# Keep searching until we see a non-null sample for this key
103+
if value is None or inferred[key]["type"] is not None:
104+
continue
105+
if isinstance(value, bool):
106+
inferred[key]["type"] = "boolean"
107+
elif isinstance(value, (int, float)) and not isinstance(value, bool):
108+
inferred[key]["type"] = "number"
109+
else:
110+
inferred[key]["type"] = "string"
111+
112+
# Default any keys that never had a non-null sample to string
113+
for key, descriptor in inferred.items():
114+
if descriptor["type"] is None:
115+
descriptor["type"] = "string"
116+
117+
if inferred:
118+
annotation.note_keys = inferred
119+
updated += 1
120+
121+
if updated and not dry_run:
122+
db.session.commit()
123+
124+
click.echo(f"Checked {checked} annotations; updated {updated} (dry_run={dry_run}).")

0 commit comments

Comments
 (0)