Skip to content

Commit 7360c86

Browse files
committed
Create system level annotation (i.e. all_studies, all_abstract)
1 parent 4787211 commit 7360c86

11 files changed

Lines changed: 200 additions & 77 deletions

File tree

autonima.egg-info/SOURCES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ tests/test_cli.py
5353
tests/test_confidence_reporting.py
5454
tests/test_criteria_mapping.py
5555
tests/test_docs.py
56+
tests/test_fulltext_incomplete_outputs.py
5657
tests/test_fulltext_loading.py
5758
tests/test_fulltext_screening.py
5859
tests/test_multi_annotation.py

autonima/annotation/processor.py

Lines changed: 73 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,19 @@ def process_studies(
3838
self,
3939
included_studies: List[Study],
4040
all_studies: List[Study] = None,
41+
all_abstract_studies: List[Study] = None,
4142
output_dir: str = None
4243
) -> List[AnnotationDecision]:
4344
"""
4445
Process studies and annotate their analyses.
4546
4647
Args:
4748
included_studies: List of INCLUDED studies with parsed analyses
48-
all_studies: Optional list of ALL studies (INCLUDED + EXCLUDED)
49-
with parsed analyses for the all_studies annotation
49+
all_studies: Optional list of all studies from search with parsed
50+
analyses for the all_studies annotation
51+
all_abstract_studies: Optional list of studies included after
52+
abstract screening, with parsed analyses for the all_abstract
53+
annotation
5054
output_dir: Output directory for caching results
5155
5256
Returns:
@@ -55,55 +59,80 @@ def process_studies(
5559
# Load existing cached results
5660
existing_cached_results = self._load_cached_results(output_dir) or []
5761

58-
if not included_studies:
62+
if (
63+
not included_studies
64+
and not all_studies
65+
and not all_abstract_studies
66+
):
67+
logger.info("No studies with analyses found for annotation")
68+
return existing_cached_results
69+
70+
if included_studies:
5971
logger.info(
60-
"No INCLUDED studies with analyses found for annotation"
72+
f"Processing {len(included_studies)} INCLUDED studies with "
73+
"analyses for annotation"
6174
)
62-
return existing_cached_results
63-
64-
logger.info(
65-
f"Processing {len(included_studies)} INCLUDED studies with "
66-
"analyses for annotation"
67-
)
6875

6976
if output_dir:
7077
for study in included_studies:
7178
if not study.full_text_output_dir:
7279
study.full_text_output_dir = output_dir
73-
if all_studies:
74-
for study in all_studies:
75-
if not study.full_text_output_dir:
76-
study.full_text_output_dir = output_dir
80+
for study in all_studies or []:
81+
if not study.full_text_output_dir:
82+
study.full_text_output_dir = output_dir
83+
for study in all_abstract_studies or []:
84+
if not study.full_text_output_dir:
85+
study.full_text_output_dir = output_dir
7786

7887
# Process all analysis-annotation combinations incrementally by study
7988
all_decisions = []
80-
81-
# Process the "all_analyses" annotation for INCLUDED studies
82-
if self.config.create_all_included_annotation:
83-
all_analyses_decisions = self._create_all_analyses_annotations_by_study(
84-
included_studies,
85-
annotation_name="all_analyses",
86-
output_dir=output_dir,
87-
existing_results=existing_cached_results
88-
)
89-
all_decisions.extend(all_analyses_decisions)
90-
91-
# Process the "all_studies" annotation for ALL studies if enabled
92-
if self.config.create_all_from_search_annotation and all_studies:
93-
logger.info(
94-
f"Creating 'all_studies' annotation for "
95-
f"{len(all_studies)} studies (INCLUDED + EXCLUDED)"
96-
)
97-
all_studies_decisions = self._create_all_analyses_annotations_by_study(
98-
all_studies,
99-
annotation_name="all_studies",
100-
output_dir=output_dir,
101-
existing_results=existing_cached_results
102-
)
103-
all_decisions.extend(all_studies_decisions)
89+
90+
# Process system annotations from search/screening/full-text inclusion.
91+
if self.config.create_all_included_annotations:
92+
if all_studies:
93+
logger.info(
94+
f"Creating 'all_studies' annotation for "
95+
f"{len(all_studies)} studies from search"
96+
)
97+
all_studies_decisions = (
98+
self._create_all_analyses_annotations_by_study(
99+
all_studies,
100+
annotation_name="all_studies",
101+
output_dir=output_dir,
102+
existing_results=existing_cached_results
103+
)
104+
)
105+
all_decisions.extend(all_studies_decisions)
106+
107+
if all_abstract_studies:
108+
logger.info(
109+
f"Creating 'all_abstract' annotation for "
110+
f"{len(all_abstract_studies)} "
111+
"abstract-screened studies"
112+
)
113+
all_abstract_decisions = (
114+
self._create_all_analyses_annotations_by_study(
115+
all_abstract_studies,
116+
annotation_name="all_abstract",
117+
output_dir=output_dir,
118+
existing_results=existing_cached_results
119+
)
120+
)
121+
all_decisions.extend(all_abstract_decisions)
122+
123+
if included_studies:
124+
all_analyses_decisions = (
125+
self._create_all_analyses_annotations_by_study(
126+
included_studies,
127+
annotation_name="all_analyses",
128+
output_dir=output_dir,
129+
existing_results=existing_cached_results
130+
)
131+
)
132+
all_decisions.extend(all_analyses_decisions)
104133

105134
# Process custom annotations on INCLUDED studies only
106-
if self.config.annotations:
135+
if self.config.annotations and included_studies:
107136
custom_decisions = self._process_custom_annotations_by_study(
108137
included_studies,
109138
self.config.model,
@@ -691,7 +720,11 @@ def _save_results_by_study(self, new_decisions: List[AnnotationDecision], output
691720
updated_annotation_names = {
692721
decision.annotation_name for decision in study_decisions
693722
}
694-
keep_system_annotations = {"all_analyses", "all_studies"}
723+
keep_system_annotations = {
724+
"all_analyses",
725+
"all_studies",
726+
"all_abstract",
727+
}
695728
has_custom_updates = any(
696729
name not in keep_system_annotations
697730
for name in updated_annotation_names

autonima/annotation/schema.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ class AnnotationCriteriaConfig(BaseModel):
1919
class AnnotationConfig(BaseModel):
2020
"""Configuration for the annotation phase."""
2121
model: str = "gpt-4o-mini"
22-
# Create "all_analyses" annotation with all analyses from INCLUDED studies
23-
create_all_included_annotation: bool = True
24-
# Create "all_studies" annotation from INCLUDED and EXCLUDED studies
25-
create_all_from_search_annotation: bool = False
22+
# Create system annotations:
23+
# - "all_studies" (all studies with parsed analyses)
24+
# - "all_abstract" (studies included after abstract screening)
25+
# - "all_analyses" (studies included after full-text screening)
26+
create_all_included_annotations: bool = True
2627
annotations: List[AnnotationCriteriaConfig] = []
2728
enabled: bool = True
2829
# Options: "single_analysis" (per-analysis) or

autonima/config.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -345,21 +345,32 @@ def _load_annotation_config(
345345
for criteria_dict in annotation_dict['annotations']:
346346
criteria = AnnotationCriteriaConfig(**criteria_dict)
347347
annotations.append(criteria)
348+
349+
legacy_keys = [
350+
key for key in (
351+
'create_all_included_annotation',
352+
'create_all_studies_annotations',
353+
'create_all_from_search_annotation',
354+
)
355+
if key in annotation_dict
356+
]
357+
if legacy_keys:
358+
legacy_list = ", ".join(legacy_keys)
359+
raise ConfigurationError(
360+
"Deprecated annotation config key(s): "
361+
f"{legacy_list}. Use "
362+
"'create_all_included_annotations' only."
363+
)
348364

349365
# Get annotation configuration values
350-
create_all_included = annotation_dict.get(
351-
'create_all_included_annotation', True
352-
)
353-
354-
create_all_from_search = annotation_dict.get(
355-
'create_all_from_search_annotation', False
366+
create_all_included_annotations = annotation_dict.get(
367+
'create_all_included_annotations', True
356368
)
357369

358370
# Create annotation config
359371
annotation_config = AnnotationConfig(
360372
model=annotation_dict.get('model', 'gpt-4o-mini'),
361-
create_all_included_annotation=create_all_included,
362-
create_all_from_search_annotation=create_all_from_search,
373+
create_all_included_annotations=create_all_included_annotations,
363374
annotations=annotations,
364375
enabled=annotation_dict.get('enabled', True),
365376
prompt_type=annotation_dict.get('prompt_type', 'single_analysis'),

autonima/models/types.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -383,11 +383,8 @@ def serialize_screening_dict(
383383
},
384384
"annotation": {
385385
"model": self.annotation.model,
386-
"create_all_included_annotation": (
387-
self.annotation.create_all_included_annotation
388-
),
389-
"create_all_from_search_annotation": (
390-
self.annotation.create_all_from_search_annotation
386+
"create_all_included_annotations": (
387+
self.annotation.create_all_included_annotations
391388
),
392389
"metadata_fields": self.annotation.metadata_fields,
393390
"annotations": [

autonima/pipeline.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,19 @@ async def _execute_retrieval_phase(self):
434434
# Save intermediary results
435435
output_dir = Path(self.config.output.directory)
436436
retrieval_results_file = output_dir / "outputs" / "fulltext_retrieval_results.json"
437+
438+
def _study_has_coordinates(study) -> bool:
439+
"""Return True when at least one valid coordinate point is present."""
440+
for analysis in study.analyses or []:
441+
for point in getattr(analysis, "points", []) or []:
442+
coordinates = getattr(point, "coordinates", None)
443+
if (
444+
isinstance(coordinates, list)
445+
and len(coordinates) == 3
446+
):
447+
return True
448+
return False
449+
437450
retrieval_data = {
438451
"studies_with_fulltext": [
439452
{
@@ -446,7 +459,8 @@ async def _execute_retrieval_phase(self):
446459
),
447460
"status": study.status.value,
448461
"full_text_path": study.full_text_path,
449-
"fulltext_available": study.fulltext_available
462+
"fulltext_available": study.fulltext_available,
463+
"coordinates_found": _study_has_coordinates(study),
450464
}
451465
for study in self.results.studies
452466
if study.fulltext_available or study.pmcid
@@ -608,6 +622,11 @@ async def _execute_coordinate_parsing(self):
608622
"tables_processed": 0,
609623
}
610624
return
625+
626+
logger.info(
627+
f"Starting coordinate parsing for {len(table_jobs)} tables "
628+
f"from {len(studies_with_tables)}"
629+
)
611630

612631
# Process tables with or without parallelization
613632
if self.num_workers <= 1 or len(table_jobs) <= 1:
@@ -689,17 +708,27 @@ async def _execute_annotation_phase(self):
689708
if s.status == StudyStatus.INCLUDED_FULLTEXT and s.analyses
690709
]
691710

692-
# Get ALL studies if create_all_from_search_annotation is enabled
711+
# Get studies for system-wide annotations when enabled.
693712
all_studies = None
713+
all_abstract_studies = None
694714
if getattr(
695-
self.config.annotation, 'create_all_from_search_annotation', False
715+
self.config.annotation, 'create_all_included_annotations', True
696716
):
697717
all_studies = [
698718
s for s in self.results.studies
699719
if s.analyses
700720
]
721+
# "all_abstract" includes studies not excluded at abstract stage.
722+
all_abstract_studies = [
723+
s for s in all_studies
724+
if s.status != StudyStatus.EXCLUDED_ABSTRACT
725+
]
701726

702-
if not included_studies and not all_studies:
727+
if (
728+
not included_studies
729+
and not all_studies
730+
and not all_abstract_studies
731+
):
703732
logger.debug("No studies with parsed analyses found for annotation")
704733
self.results.execution_stats["annotation"] = {
705734
"enabled": True,
@@ -766,6 +795,7 @@ async def _execute_annotation_phase(self):
766795
annotation_results = processor.process_studies(
767796
included_studies=included_studies,
768797
all_studies=all_studies,
798+
all_abstract_studies=all_abstract_studies,
769799
output_dir=self.config.output.directory
770800
)
771801

autonima/templates/sample_config.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ annotation:
111111
model: "gpt-4o-mini"
112112
# Options: "single_analysis" or "multi_analysis".
113113
prompt_type: "single_analysis"
114-
create_all_included_annotation: true
115-
create_all_from_search_annotation: false
114+
create_all_included_annotations: true
116115
# metadata_fields:
117116
# - "analysis_name"
118117
# - "analysis_description"

docs/guides/configuration.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,7 @@ Common fields:
205205
- `prompt_type`
206206
Type: string
207207
Values: `single_analysis`, `multi_analysis`
208-
- `create_all_included_annotation`
209-
Type: boolean
210-
- `create_all_from_search_annotation`
208+
- `create_all_included_annotations`
211209
Type: boolean
212210
- `metadata_fields`
213211
Type: list of strings

examples/sample_config.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,7 @@ annotation:
111111
model: "gpt-4o-mini"
112112
# Options: "single_analysis" or "multi_analysis".
113113
prompt_type: "single_analysis"
114-
create_all_included_annotation: true
115-
create_all_from_search_annotation: false
114+
create_all_included_annotations: true
116115
# metadata_fields:
117116
# - "analysis_name"
118117
# - "analysis_description"

0 commit comments

Comments
 (0)