@@ -78,33 +78,49 @@ def ingest_neuroquery(max_rows):
7878@click .option ("--limit" , default = None , type = int , help = "limit number of annotations to process" )
7979@click .option ("--dry-run" , is_flag = True , help = "do not persist changes" )
8080def backfill_note_keys (limit , dry_run ):
81- """Infer missing note_keys from existing annotation notes."""
81+ """Infer missing note_keys from existing annotation notes.
82+
83+ We target annotations whose note_keys is null/empty, collect all keys present
84+ in their annotation_analyses.note payloads, assign order by first appearance,
85+ and infer type from the first non-null sample (defaulting to string).
86+ """
8287 updated = 0
8388 checked = 0
8489
85- # Collect note keys across analyses first, then write back once per annotation.
86- q = models .Annotation .query .order_by (models .Annotation .created_at )
90+ conn = db .session .connection ()
91+ ids_sql = """
92+ SELECT id
93+ FROM annotations
94+ WHERE note_keys IS NULL OR note_keys = '{}'::jsonb
95+ ORDER BY created_at
96+ """
8797 if limit :
88- q = q .limit (limit )
98+ ids_sql += " LIMIT :limit"
99+ ids = [row .id for row in conn .execute (sa .text (ids_sql ), {"limit" : limit } if limit else {})]
89100
90- for annotation in q :
101+ for annot_id in ids :
91102 checked += 1
92- current = annotation .note_keys if isinstance (annotation .note_keys , dict ) else {}
93- if current :
94- continue
103+ notes = conn .execute (
104+ sa .text ("SELECT note FROM annotation_analyses WHERE annotation_id = :id" ),
105+ {"id" : annot_id },
106+ ).fetchall ()
95107
96108 inferred : OrderedDict [str , dict ] = OrderedDict ()
97109
98- # First pass: collect all keys present in any note with their first-seen order.
99- for aa in annotation .annotation_analyses :
100- note = aa .note or {}
110+ # First pass: record all keys in order of first appearance across notes
111+ for row in notes :
112+ note = row .note or {}
113+ if not isinstance (note , dict ):
114+ continue
101115 for key in note .keys ():
102116 if key not in inferred :
103117 inferred [key ] = {"type" : None , "order" : len (inferred )}
104118
105- # Second pass: try to find a non-null sample for each key to set its type.
106- for aa in annotation .annotation_analyses :
107- note = aa .note or {}
119+ # Second pass: find first non-null sample for each key to determine type
120+ for row in notes :
121+ note = row .note or {}
122+ if not isinstance (note , dict ):
123+ continue
108124 for key , value in note .items ():
109125 if key not in inferred or inferred [key ]["type" ] is not None :
110126 continue
@@ -123,8 +139,12 @@ def backfill_note_keys(limit, dry_run):
123139 descriptor ["type" ] = "string"
124140
125141 if inferred :
126- annotation .note_keys = inferred
127142 updated += 1
143+ if not dry_run :
144+ conn .execute (
145+ sa .text ("UPDATE annotations SET note_keys = :note_keys WHERE id = :id" ),
146+ {"id" : annot_id , "note_keys" : inferred },
147+ )
128148
129149 if updated and not dry_run :
130150 db .session .commit ()
0 commit comments