Skip to content

Commit 06c5b07

Browse files
committed
try again
1 parent c652a4b commit 06c5b07

File tree

1 file changed

+35
-15
lines changed

1 file changed

+35
-15
lines changed

store/backend/manage.py

Lines changed: 35 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -78,33 +78,49 @@ def ingest_neuroquery(max_rows):
7878
@click.option("--limit", default=None, type=int, help="limit number of annotations to process")
7979
@click.option("--dry-run", is_flag=True, help="do not persist changes")
8080
def backfill_note_keys(limit, dry_run):
81-
"""Infer missing note_keys from existing annotation notes."""
81+
"""Infer missing note_keys from existing annotation notes.
82+
83+
We target annotations whose note_keys is null/empty, collect all keys present
84+
in their annotation_analyses.note payloads, assign order by first appearance,
85+
and infer type from the first non-null sample (defaulting to string).
86+
"""
8287
updated = 0
8388
checked = 0
8489

85-
# Collect note keys across analyses first, then write back once per annotation.
86-
q = models.Annotation.query.order_by(models.Annotation.created_at)
90+
conn = db.session.connection()
91+
ids_sql = """
92+
SELECT id
93+
FROM annotations
94+
WHERE note_keys IS NULL OR note_keys = '{}'::jsonb
95+
ORDER BY created_at
96+
"""
8797
if limit:
88-
q = q.limit(limit)
98+
ids_sql += " LIMIT :limit"
99+
ids = [row.id for row in conn.execute(sa.text(ids_sql), {"limit": limit} if limit else {})]
89100

90-
for annotation in q:
101+
for annot_id in ids:
91102
checked += 1
92-
current = annotation.note_keys if isinstance(annotation.note_keys, dict) else {}
93-
if current:
94-
continue
103+
notes = conn.execute(
104+
sa.text("SELECT note FROM annotation_analyses WHERE annotation_id = :id"),
105+
{"id": annot_id},
106+
).fetchall()
95107

96108
inferred: OrderedDict[str, dict] = OrderedDict()
97109

98-
# First pass: collect all keys present in any note with their first-seen order.
99-
for aa in annotation.annotation_analyses:
100-
note = aa.note or {}
110+
# First pass: record all keys in order of first appearance across notes
111+
for row in notes:
112+
note = row.note or {}
113+
if not isinstance(note, dict):
114+
continue
101115
for key in note.keys():
102116
if key not in inferred:
103117
inferred[key] = {"type": None, "order": len(inferred)}
104118

105-
# Second pass: try to find a non-null sample for each key to set its type.
106-
for aa in annotation.annotation_analyses:
107-
note = aa.note or {}
119+
# Second pass: find first non-null sample for each key to determine type
120+
for row in notes:
121+
note = row.note or {}
122+
if not isinstance(note, dict):
123+
continue
108124
for key, value in note.items():
109125
if key not in inferred or inferred[key]["type"] is not None:
110126
continue
@@ -123,8 +139,12 @@ def backfill_note_keys(limit, dry_run):
123139
descriptor["type"] = "string"
124140

125141
if inferred:
126-
annotation.note_keys = inferred
127142
updated += 1
143+
if not dry_run:
144+
conn.execute(
145+
sa.text("UPDATE annotations SET note_keys = :note_keys WHERE id = :id"),
146+
{"id": annot_id, "note_keys": inferred},
147+
)
128148

129149
if updated and not dry_run:
130150
db.session.commit()

0 commit comments

Comments
 (0)