Skip to content

Commit 10716bf

Browse files
authored
Merge pull request #32 from adelavega/enh/tag_exclusion
ENH: Tag with inclusion/exclusion reasons
2 parents b3b40d2 + 203c779 commit 10716bf

18 files changed

Lines changed: 1110 additions & 212 deletions

File tree

autonima/annotation/processor.py

Lines changed: 71 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,19 @@ def __init__(self, config: AnnotationConfig):
2828
self.client = AnnotationClient()
2929
self.annotation_results: List[AnnotationDecision] = []
3030

31-
def process_studies(self, studies: List[Study], output_dir: str) -> List[AnnotationDecision]:
31+
def process_studies(
32+
self,
33+
included_studies: List[Study],
34+
all_studies: List[Study] = None,
35+
output_dir: str = None
36+
) -> List[AnnotationDecision]:
3237
"""
33-
Process all studies and annotate their analyses.
38+
Process studies and annotate their analyses.
3439
3540
Args:
36-
studies: List of studies with parsed analyses
41+
included_studies: List of INCLUDED studies with parsed analyses
42+
all_studies: Optional list of ALL studies (INCLUDED + EXCLUDED)
43+
with parsed analyses for the all_studies annotation
3744
output_dir: Output directory for caching results
3845
3946
Returns:
@@ -44,33 +51,53 @@ def process_studies(self, studies: List[Study], output_dir: str) -> List[Annotat
4451
if cached_results:
4552
# Check if cached results are still valid
4653
if self._are_cached_results_valid(cached_results):
47-
logger.info(f"Loaded {len(cached_results)} cached annotation results")
54+
logger.info(
55+
f"Loaded {len(cached_results)} cached annotation "
56+
"results"
57+
)
4858
self.annotation_results = cached_results
4959
return cached_results
5060
else:
51-
logger.info("Cached results are outdated, processing fresh annotations")
52-
53-
# Filter studies to only those that are included and have analyses
54-
included_studies = [
55-
study for study in studies
56-
if study.status == StudyStatus.INCLUDED and study.analyses
57-
]
61+
logger.info(
62+
"Cached results are outdated, processing fresh "
63+
"annotations"
64+
)
5865

5966
if not included_studies:
60-
logger.info("No studies with analyses found for annotation")
67+
logger.info(
68+
"No INCLUDED studies with analyses found for annotation"
69+
)
6170
return []
6271

63-
logger.info(f"Processing {len(included_studies)} studies with analyses for annotation")
72+
logger.info(
73+
f"Processing {len(included_studies)} INCLUDED studies with "
74+
"analyses for annotation"
75+
)
6476

6577
# Process all analysis-annotation combinations
6678
all_decisions = []
6779

68-
# Process the "all_analyses" annotation if enabled
69-
if self.config.include_all_analyses:
70-
all_analyses_decisions = self._create_all_analyses_annotations(included_studies)
80+
# Process the "all_analyses" annotation for INCLUDED studies
81+
if self.config.create_all_included_annotation:
82+
all_analyses_decisions = self._create_all_analyses_annotations(
83+
included_studies,
84+
annotation_name="all_analyses"
85+
)
7186
all_decisions.extend(all_analyses_decisions)
7287

73-
# Process custom annotations if any are defined
88+
# Process the "all_studies" annotation for ALL studies if enabled
89+
if self.config.create_all_from_search_annotation and all_studies:
90+
logger.info(
91+
f"Creating 'all_studies' annotation for "
92+
f"{len(all_studies)} studies (INCLUDED + EXCLUDED)"
93+
)
94+
all_studies_decisions = self._create_all_analyses_annotations(
95+
all_studies,
96+
annotation_name="all_studies"
97+
)
98+
all_decisions.extend(all_studies_decisions)
99+
100+
# Process custom annotations on INCLUDED studies only
74101
if self.config.annotations:
75102
custom_decisions = self._process_custom_annotations(
76103
included_studies,
@@ -84,12 +111,17 @@ def process_studies(self, studies: List[Study], output_dir: str) -> List[Annotat
84111

85112
return all_decisions
86113

87-
def _create_all_analyses_annotations(self, studies: List[Study]) -> List[AnnotationDecision]:
114+
def _create_all_analyses_annotations(
115+
self,
116+
studies: List[Study],
117+
annotation_name: str = "all_analyses"
118+
) -> List[AnnotationDecision]:
88119
"""
89-
Create annotation decisions for the "all_analyses" annotation.
120+
Create annotation decisions for a default annotation.
90121
91122
Args:
92-
studies: List of included studies with analyses
123+
studies: List of studies with analyses
124+
annotation_name: Name of the annotation to create
93125
94126
Returns:
95127
List of annotation decisions (all marked as included)
@@ -101,18 +133,21 @@ def _create_all_analyses_annotations(self, studies: List[Study]) -> List[Annotat
101133
# Create a unique analysis ID
102134
analysis_id = f"{study.pmid}_analysis_{i}"
103135

104-
# Create decision for all_analyses annotation
136+
# Create decision for the annotation
105137
decision = AnnotationDecision(
106-
annotation_name="all_analyses",
138+
annotation_name=annotation_name,
107139
analysis_id=analysis_id,
108140
study_id=study.pmid,
109141
include=True,
110-
reasoning="All analyses included by default",
142+
reasoning=f"All analyses included in '{annotation_name}'",
111143
model_used="none"
112144
)
113145
decisions.append(decision)
114146

115-
logger.info(f"Created {len(decisions)} decisions for 'all_analyses' annotation")
147+
logger.info(
148+
f"Created {len(decisions)} decisions for '{annotation_name}' "
149+
"annotation"
150+
)
116151
return decisions
117152

118153
def _process_custom_annotations(self, studies: List[Study], model: str) -> List[AnnotationDecision]:
@@ -307,15 +342,24 @@ def _are_cached_results_valid(self, cached_results: List[AnnotationDecision]) ->
307342
# Check if we have the right number of annotations
308343
# (all_analyses + custom annotations)
309344
expected_annotation_count = 0
310-
if self.config.include_all_analyses:
345+
if self.config.create_all_included_annotation:
346+
expected_annotation_count += 1
347+
if self.config.create_all_from_search_annotation:
311348
expected_annotation_count += 1
312349
expected_annotation_count += len(self.config.annotations)
313350

314351
# Get unique annotation names from cached results
315-
cached_annotation_names = set(result.annotation_name for result in cached_results)
352+
cached_annotation_names = set(
353+
result.annotation_name for result in cached_results
354+
)
316355

317356
# Check if we have the expected annotations
318-
if self.config.include_all_analyses and "all_analyses" not in cached_annotation_names:
357+
if (self.config.create_all_included_annotation and
358+
"all_analyses" not in cached_annotation_names):
359+
return False
360+
361+
if (self.config.create_all_from_search_annotation and
362+
"all_studies" not in cached_annotation_names):
319363
return False
320364

321365
# Check if we have all custom annotations

autonima/annotation/prompts.py

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Prompt templates for annotation decisions."""
22

3-
from typing import List, Optional
3+
from typing import List
44
from .schema import AnalysisMetadata, AnnotationCriteriaConfig
55

66

@@ -10,12 +10,15 @@ def create_annotation_prompt(
1010
metadata_fields: List[str] = None
1111
) -> str:
1212
"""
13-
Create a prompt for the LLM to decide if an analysis should be included in an annotation.
13+
Create a prompt for the LLM to decide if an analysis should be included
14+
in an annotation.
1415
1516
Args:
1617
metadata: Analysis metadata to include in the prompt
1718
criteria: Annotation criteria configuration
19+
metadata_fields: List of metadata fields to include
1820
21+
Returns:
1922
Formatted prompt string
2023
"""
2124
# Use the provided metadata_fields or fall back to criteria.metadata_fields
@@ -48,31 +51,50 @@ def create_annotation_prompt(
4851
metadata_lines.append(f"- Study Abstract: {metadata.study_abstract}")
4952

5053
if "study_authors" in fields_to_use and metadata.study_authors:
51-
metadata_lines.append(f"- Study Authors: {', '.join(metadata.study_authors)}")
54+
authors = ', '.join(metadata.study_authors)
55+
metadata_lines.append(f"- Study Authors: {authors}")
5256

5357
if "study_journal" in fields_to_use and metadata.study_journal:
5458
metadata_lines.append(f"- Study Journal: {metadata.study_journal}")
5559

56-
if "study_publication_date" in fields_to_use and metadata.study_publication_date:
57-
metadata_lines.append(f"- Study Publication Date: {metadata.study_publication_date}")
60+
if ("study_publication_date" in fields_to_use and
61+
metadata.study_publication_date):
62+
date_str = metadata.study_publication_date
63+
metadata_lines.append(f"- Study Publication Date: {date_str}")
5864

5965
# Add any custom fields
6066
for field_name, field_value in metadata.custom_fields.items():
6167
if field_name in fields_to_use and field_value:
62-
metadata_lines.append(f"- {field_name.replace('_', ' ').title()}: {field_value}")
68+
formatted_name = field_name.replace('_', ' ').title()
69+
metadata_lines.append(f"- {formatted_name}: {field_value}")
6370

64-
# Format inclusion and exclusion criteria
65-
inclusion_criteria_text = "\n".join([f" - {c}" for c in criteria.inclusion_criteria])
66-
exclusion_criteria_text = "\n".join([f" - {c}" for c in criteria.exclusion_criteria])
71+
# Format criteria with IDs if mapping is provided
72+
if criteria.criteria_mapping:
73+
inclusion_items = criteria.criteria_mapping.get('inclusion', {}).items()
74+
inclusion_list = [f"{id}: {text}" for id, text in inclusion_items]
75+
inclusion_text = "\n".join(inclusion_list)
76+
77+
exclusion_items = criteria.criteria_mapping.get('exclusion', {}).items()
78+
exclusion_list = [f"{id}: {text}" for id, text in exclusion_items]
79+
exclusion_text = "\n".join(exclusion_list)
80+
else:
81+
inclusion_list = [f" - {c}" for c in criteria.inclusion_criteria]
82+
inclusion_text = "\n".join(inclusion_list)
83+
exclusion_list = [f" - {c}" for c in criteria.exclusion_criteria]
84+
exclusion_text = "\n".join(exclusion_list)
6785

6886
# Create the prompt
6987
prompt = f"""
70-
You are a neuroimaging meta-analysis expert evaluating whether an analysis meets specific inclusion criteria.
88+
You are a neuroimaging meta-analysis expert evaluating whether an analysis meets
89+
specific inclusion criteria.
7190
72-
The following analysis has been extracted from within a table of a published fMRI/neuroimaging article.
73-
You will be provided with metadata about the analysis, the table it was extracted from, and the study it belongs to.
74-
Note that since each table may have contained multiple analyses, the table caption may describe multiple analyses that are not relevant to this specific analysis.
75-
As such, while taking into account the table caption, please focus primarily on the analysis name and description for your decision.
91+
The following analysis has been extracted from within a table of a published
92+
fMRI/neuroimaging article. You will be provided with metadata about the
93+
analysis, the table it was extracted from, and the study it belongs to. Note
94+
that since each table may have contained multiple analyses, the table caption
95+
may describe multiple analyses that are not relevant to this specific analysis.
96+
As such, while taking into account the table caption, please focus primarily on
97+
the analysis name and description for your decision.
7698
7799
STUDY CONTEXT:
78100
{chr(10).join(metadata_lines) if metadata_lines else "No metadata available"}
@@ -81,17 +103,25 @@ def create_annotation_prompt(
81103
Description: {criteria.description or "No description provided"}
82104
83105
INCLUSION CRITERIA:
84-
{inclusion_criteria_text or "No inclusion criteria specified"}
106+
{inclusion_text or "No inclusion criteria specified"}
85107
86108
EXCLUSION CRITERIA:
87-
{exclusion_criteria_text or "No exclusion criteria specified"}
109+
{exclusion_text or "No exclusion criteria specified"}
110+
111+
Based on the provided information, should this analysis be included in the
112+
"{criteria.name}" annotation?
88113
89-
Based on the provided information, should this analysis be included in the "{criteria.name}" annotation?
114+
IMPORTANT: In your response, you must specify which specific criteria IDs apply
115+
to this analysis.
116+
- For included analyses: List the inclusion criteria IDs that are satisfied
117+
- For excluded analyses: List the exclusion criteria IDs that apply
90118
91119
Respond with JSON:
92120
{{
93121
"include": true/false,
94-
"reasoning": "Brief explanation of decision"
122+
"reasoning": "Brief explanation of decision",
123+
"inclusion_criteria_applied": ["I1", "I2"],
124+
"exclusion_criteria_applied": []
95125
}}
96126
"""
97127

autonima/annotation/schema.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from pydantic import BaseModel, field_validator
44
from typing import List, Optional, Dict, Any
5-
from typing_extensions import Literal
65
from datetime import datetime
76

87

@@ -13,13 +12,18 @@ class AnnotationCriteriaConfig(BaseModel):
1312
inclusion_criteria: List[str] = []
1413
exclusion_criteria: List[str] = []
1514
metadata_fields: List[str] = []
16-
15+
16+
# NEW: Store criteria mappings
17+
criteria_mapping: Optional[Dict[str, Dict[str, str]]] = None
1718

1819

1920
class AnnotationConfig(BaseModel):
2021
"""Configuration for the annotation phase."""
2122
model: str = "gpt-4o-mini"
22-
include_all_analyses: bool = True
23+
# Create "all_analyses" annotation with all analyses from INCLUDED studies
24+
create_all_included_annotation: bool = True
25+
# Create "all_studies" annotation from INCLUDED and EXCLUDED studies
26+
create_all_from_search_annotation: bool = False
2327
annotations: List[AnnotationCriteriaConfig] = []
2428
enabled: bool = True
2529
metadata_fields: List[str] = [
@@ -42,6 +46,10 @@ class AnnotationDecision(BaseModel):
4246
confidence: Optional[float] = None
4347
model_used: str
4448
timestamp: datetime = datetime.now()
49+
50+
# NEW: Track which criteria were applied
51+
inclusion_criteria_applied: List[str] = []
52+
exclusion_criteria_applied: List[str] = []
4553

4654

4755
class AnalysisMetadata(BaseModel):
@@ -60,10 +68,14 @@ class AnalysisMetadata(BaseModel):
6068
# Add any other fields as needed
6169
custom_fields: Dict[str, Any] = {}
6270

63-
@field_validator('analysis_name', 'analysis_description', 'table_caption', 'table_footer', 'study_title', 'study_abstract', 'study_journal', 'study_publication_date', mode='before')
71+
@field_validator('analysis_name', 'analysis_description', 'table_caption',
72+
'table_footer', 'study_title', 'study_abstract',
73+
'study_journal', 'study_publication_date', mode='before')
6474
@classmethod
6575
def validate_string_fields(cls, v):
66-
"""Validate that string fields are properly formatted and handle nan values."""
76+
"""
77+
Validate string fields and handle nan values.
78+
"""
6779
if v is None:
6880
return None
6981
# Handle nan values (both float nan and string 'nan')

0 commit comments

Comments
 (0)