|
10 | 10 | from neurostore.core import app, db |
11 | 11 | from neurostore import ingest |
12 | 12 | from neurostore import models |
| 13 | +from collections import OrderedDict |
13 | 14 |
|
14 | 15 | if not getattr(app, "config", None): |
15 | 16 | app = app._app |
@@ -71,3 +72,53 @@ def ingest_neuroquery(max_rows): |
71 | 72 | if max_rows is not None: |
72 | 73 | max_rows = int(max_rows) |
73 | 74 | ingest.ingest_neuroquery(max_rows=max_rows) |
| 75 | + |
| 76 | + |
| 77 | +@app.cli.command("backfill-note-keys") |
| 78 | +@click.option("--limit", default=None, type=int, help="limit number of annotations to process") |
| 79 | +@click.option("--dry-run", is_flag=True, help="do not persist changes") |
| 80 | +def backfill_note_keys(limit, dry_run): |
| 81 | + """Infer missing note_keys from existing annotation notes.""" |
| 82 | + updated = 0 |
| 83 | + checked = 0 |
| 84 | + |
| 85 | + q = models.Annotation.query.order_by(models.Annotation.created_at) |
| 86 | + if limit: |
| 87 | + q = q.limit(limit) |
| 88 | + |
| 89 | + for annotation in q: |
| 90 | + checked += 1 |
| 91 | + current = annotation.note_keys if isinstance(annotation.note_keys, dict) else {} |
| 92 | + if current: |
| 93 | + continue |
| 94 | + |
| 95 | + inferred: OrderedDict[str, dict] = OrderedDict() |
| 96 | + |
| 97 | + for aa in annotation.annotation_analyses: |
| 98 | + note = aa.note or {} |
| 99 | + for key, value in note.items(): |
| 100 | + if key not in inferred: |
| 101 | + inferred[key] = {"type": None, "order": len(inferred)} |
| 102 | + # Keep searching until we see a non-null sample for this key |
| 103 | + if value is None or inferred[key]["type"] is not None: |
| 104 | + continue |
| 105 | + if isinstance(value, bool): |
| 106 | + inferred[key]["type"] = "boolean" |
| 107 | + elif isinstance(value, (int, float)) and not isinstance(value, bool): |
| 108 | + inferred[key]["type"] = "number" |
| 109 | + else: |
| 110 | + inferred[key]["type"] = "string" |
| 111 | + |
| 112 | + # Default any keys that never had a non-null sample to string |
| 113 | + for key, descriptor in inferred.items(): |
| 114 | + if descriptor["type"] is None: |
| 115 | + descriptor["type"] = "string" |
| 116 | + |
| 117 | + if inferred: |
| 118 | + annotation.note_keys = inferred |
| 119 | + updated += 1 |
| 120 | + |
| 121 | + if updated and not dry_run: |
| 122 | + db.session.commit() |
| 123 | + |
| 124 | + click.echo(f"Checked {checked} annotations; updated {updated} (dry_run={dry_run}).") |
0 commit comments