Skip to content

Commit 3aef55c

Browse files
committed
Fix meta-data loaded for Analysis screening
1 parent 3d2e183 commit 3aef55c

7 files changed

Lines changed: 112 additions & 95 deletions

File tree

autonima.egg-info/PKG-INFO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Autonima enables end-to-end automation of systematic review steps:
8888

8989
3. **Full-Text Retrieval**
9090

91-
* Downloads full texts of candidate papers via a combination of **PubGet** and **ACE**.
91+
* Downloads full texts of candidate papers via a combination of **PubGet**
9292

9393
4. **Eligibility & Inclusion**
9494

autonima.egg-info/SOURCES.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ autonima/cli.py
66
autonima/config.py
77
autonima/meta.py
88
autonima/pipeline.py
9-
autonima/utils.py
109
autonima.egg-info/PKG-INFO
1110
autonima.egg-info/SOURCES.txt
1211
autonima.egg-info/dependency_links.txt
@@ -29,7 +28,6 @@ autonima/llm/client.py
2928
autonima/models/__init__.py
3029
autonima/models/types.py
3130
autonima/retrieval/__init__.py
32-
autonima/retrieval/ace.py
3331
autonima/retrieval/base.py
3432
autonima/retrieval/pubget.py
3533
autonima/retrieval/utils.py
@@ -42,10 +40,16 @@ autonima/screening/screener.py
4240
autonima/search/__init__.py
4341
autonima/search/base.py
4442
autonima/search/pubmed.py
43+
autonima/utils/__init__.py
44+
autonima/utils/base.py
45+
autonima/utils/criteria.py
46+
tests/test_annotation_incremental_caching.py
4547
tests/test_basic.py
4648
tests/test_confidence_reporting.py
49+
tests/test_criteria_mapping.py
4750
tests/test_fulltext_loading.py
4851
tests/test_fulltext_screening.py
52+
tests/test_multi_annotation.py
4953
tests/test_objective_in_prompt.py
5054
tests/test_parallel_screening.py
5155
tests/test_pipeline_retrieval.py

autonima/annotation/client.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import List, Dict, Any
66
from pydantic import BaseModel
77
from ..llm.client import GenericLLMClient
8-
from .schema import AnalysisMetadata, AnnotationCriteriaConfig, AnnotationDecision, StudyAnalysisGroup
8+
from .schema import AnalysisMetadata, AnnotationConfig, AnnotationCriteriaConfig, AnnotationDecision, StudyAnalysisGroup
99

1010
logger = logging.getLogger(__name__)
1111

@@ -122,14 +122,14 @@ def make_multi_decision(
122122
metadata: AnalysisMetadata,
123123
criteria_list: List[AnnotationCriteriaConfig],
124124
model: str = "gpt-4o-mini",
125-
prompt_type: str = "multi_analysis_table",
125+
prompt_type: str = "single_analysis",
126126
study_group: StudyAnalysisGroup = None
127127
) -> List[AnnotationDecision]:
128128
"""
129129
Make decisions about whether an analysis should be included in multiple annotations.
130130
131131
Args:
132-
metadata: Analysis metadata (used for multi_analysis_table)
132+
metadata: Analysis metadata
133133
criteria_list: List of annotation criteria configurations
134134
model: LLM model to use
135135
prompt_type: Type of prompt ("single_analysis" or "multi_analysis")
@@ -142,25 +142,28 @@ def make_multi_decision(
142142
return []
143143

144144
try:
145+
# Extract metadata fields that are actually present in the metadata object
146+
# (non-None values, excluding required fields)
147+
metadata_fields = []
148+
metadata_dict = metadata.model_dump()
149+
for field_name, field_value in metadata_dict.items():
150+
# Skip required fields and None values
151+
if field_name not in ['analysis_id', 'study_id', 'table_id', 'custom_fields'] and field_value is not None:
152+
metadata_fields.append(field_name)
153+
145154
# Select the appropriate prompt based on prompt_type
146155
if prompt_type == "multi_analysis":
147156
if study_group is None:
148157
raise ValueError(
149158
"study_group is required for multi_analysis prompt type"
150159
)
151160
from .prompts import create_study_multi_annotation_prompt
152-
metadata_fields = getattr(
153-
criteria_list[0], 'metadata_fields', None
154-
)
155161
prompt = create_study_multi_annotation_prompt(
156162
study_group, criteria_list, metadata_fields
157163
)
158164
else: # Default to single_analysis
159-
from .prompts import create_multi_annotation_prompt
160-
metadata_fields = getattr(
161-
criteria_list[0], 'metadata_fields', None
162-
)
163-
prompt = create_multi_annotation_prompt(
165+
from .prompts import create_single_study_annotation_prompt
166+
prompt = create_single_study_annotation_prompt(
164167
metadata, criteria_list, metadata_fields
165168
)
166169

@@ -238,22 +241,12 @@ def make_multi_decision(
238241
exclusion_criteria_applied=decision_output.exclusion_criteria_applied
239242
)
240243
decisions.append(decision)
241-
242-
# If we didn't get enough responses, fill in with fallback
243-
while len(decisions) < len(criteria_list):
244-
criteria = criteria_list[len(decisions)]
245-
decision = self.make_decision(metadata, criteria, model)
246-
decisions.append(decision)
247244

248245
return decisions
249246

250247
except Exception as e:
251248
logger.error(f"Error making multi annotation decisions: {e}")
252-
# Return individual decisions as fallback
253-
return [
254-
self.make_decision(metadata, criteria, model)
255-
for criteria in criteria_list
256-
]
249+
raise
257250

258251
def chat_completion(self, messages, model, response_format=None):
259252
"""

autonima/annotation/processor.py

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import json
55
from pathlib import Path
6-
from typing import List, Set
6+
from typing import List, Set, Optional
77
from concurrent.futures import ThreadPoolExecutor, as_completed
88
from tqdm import tqdm
99

@@ -252,7 +252,7 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
252252
for study, annotations_to_process in tqdm(studies_to_process, desc="Processing studies"):
253253
try:
254254
study_decisions = self._process_single_study_annotations(
255-
study, annotations_to_process, model
255+
study, self.config.metadata_fields, annotations_to_process, model
256256
)
257257
decisions.extend(study_decisions)
258258
except Exception as e:
@@ -263,7 +263,7 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
263263
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
264264
# Submit all studies for processing
265265
future_to_study = {
266-
executor.submit(self._process_single_study_annotations, study, annotations_to_process, model): (study, annotations_to_process)
266+
executor.submit(self._process_single_study_annotations, study, self.config.metadata_fields, annotations_to_process, model): (study, annotations_to_process)
267267
for study, annotations_to_process in studies_to_process
268268
}
269269

@@ -279,12 +279,13 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
279279

280280
return decisions
281281

282-
def _process_single_study_annotations(self, study: Study, annotations_to_process: List[AnnotationCriteriaConfig], model: str) -> List[AnnotationDecision]:
282+
def _process_single_study_annotations(self, study: Study, metadata_fields: List[str], annotations_to_process: List[AnnotationCriteriaConfig], model: str) -> List[AnnotationDecision]:
283283
"""
284284
Process annotations for a single study.
285285
286286
Args:
287287
study: Study to process
288+
metadata_fields: List of metadata fields to extract
288289
annotations_to_process: List of annotations to process
289290
model: LLM model to use
290291
@@ -298,8 +299,8 @@ def _process_single_study_annotations(self, study: Study, annotations_to_process
298299
# Create a unique analysis ID
299300
analysis_id = f"{study.pmid}_analysis_{i}"
300301

301-
# Extract metadata for this analysis
302-
metadata = self._extract_analysis_metadata(study, analysis, analysis_id)
302+
# Extract metadata for the analysis
303+
metadata = self._extract_analysis_metadata(study, analysis, analysis_id, metadata_fields)
303304

304305
# Make multi-annotation decision for this analysis
305306
analysis_decisions = self.client.make_multi_decision(
@@ -401,42 +402,72 @@ def _process_single_decision(self, metadata: AnalysisMetadata, criteria: Annotat
401402
"""
402403
return self.client.make_decision(metadata, criteria, model)
403404

404-
def _extract_analysis_metadata(self, study: Study, analysis: Analysis, analysis_id: str) -> AnalysisMetadata:
405+
def _extract_analysis_metadata(
406+
self,
407+
study: Study,
408+
analysis: Analysis,
409+
analysis_id: str,
410+
metadata_fields: Optional[List[str]] = None
411+
) -> AnalysisMetadata:
405412
"""
406413
Extract metadata for an analysis from a study.
407414
408415
Args:
409416
study: Study containing the analysis
410417
analysis: Analysis to extract metadata for
411418
analysis_id: Unique ID for the analysis
419+
metadata_fields: Optional list of fields to include. If None, includes all fields.
412420
413421
Returns:
414-
Analysis metadata
422+
Analysis metadata with only the requested fields populated
415423
"""
416424
# Extract table information if available
425+
table_id = None
417426
table_caption = None
418427
table_footer = None
419428
if study.activation_tables:
420429
# Use the first table's information for now
421430
# In the future, we might want to associate analyses with specific tables
431+
table_id = study.activation_tables[0].table_id
422432
table_caption = study.activation_tables[0].table_caption
423433
table_footer = study.activation_tables[0].table_foot
424434

435+
# If no activation tables exist, generate a default table_id
436+
if table_id is None:
437+
table_id = f"{study.pmid}_default_table"
438+
439+
# Build kwargs with required fields
440+
kwargs = {
441+
"analysis_id": analysis_id,
442+
"study_id": study.pmid,
443+
"table_id": table_id,
444+
}
445+
446+
# Map of field names to their values
447+
field_mapping = {
448+
"analysis_name": analysis.name,
449+
"analysis_description": analysis.description,
450+
"table_caption": table_caption,
451+
"table_footer": table_footer,
452+
"study_title": study.title,
453+
"study_abstract": study.abstract,
454+
"study_authors": study.authors,
455+
"study_journal": study.journal,
456+
"study_publication_date": study.publication_date,
457+
"study_fulltext": study.full_text,
458+
}
459+
460+
# If metadata_fields is None, include all fields
461+
if metadata_fields is None:
462+
kwargs.update(field_mapping)
463+
else:
464+
# Only include requested fields
465+
for field in metadata_fields:
466+
if field in field_mapping:
467+
kwargs[field] = field_mapping[field]
468+
425469
# Create the metadata object
426-
metadata = AnalysisMetadata(
427-
analysis_id=analysis_id,
428-
study_id=study.pmid,
429-
analysis_name=analysis.name,
430-
analysis_description=analysis.description,
431-
table_caption=table_caption,
432-
table_footer=table_footer,
433-
study_title=study.title,
434-
study_abstract=study.abstract,
435-
study_authors=study.authors,
436-
study_journal=study.journal,
437-
study_publication_date=study.publication_date,
438-
study_fulltext=study.full_text # Add full text
439-
)
470+
metadata = AnalysisMetadata(**kwargs)
440471

441472
return metadata
442473

0 commit comments

Comments
 (0)