neurostuff
diff --git a/‎autonima/annotation/processor.py‎
Lines changed: 71 additions & 27 deletions b/‎autonima/annotation/processor.py‎
Lines changed: 71 additions & 27 deletions
diff --git a/‎autonima/annotation/prompts.py‎
Lines changed: 48 additions & 18 deletions b/‎autonima/annotation/prompts.py‎
Lines changed: 48 additions & 18 deletions
diff --git a/‎autonima/annotation/schema.py‎
Lines changed: 17 additions & 5 deletions b/‎autonima/annotation/schema.py‎
Lines changed: 17 additions & 5 deletions
@@ -28,12 +28,19 @@ def __init__(self, config: AnnotationConfig):
         self.client = AnnotationClient()
         self.annotation_results: List[AnnotationDecision] = []
 
-    def process_studies(self, studies: List[Study], output_dir: str) -> List[AnnotationDecision]:
+    def process_studies(
+        self,
+        included_studies: List[Study],
+        all_studies: List[Study] = None,
+        output_dir: str = None
+    ) -> List[AnnotationDecision]:
         """
-        Process all studies and annotate their analyses.
+        Process studies and annotate their analyses.
         
         Args:
-            studies: List of studies with parsed analyses
+            included_studies: List of INCLUDED studies with parsed analyses
+            all_studies: Optional list of ALL studies (INCLUDED + EXCLUDED)
+                        with parsed analyses for the all_studies annotation
             output_dir: Output directory for caching results
             
         Returns:
@@ -44,33 +51,53 @@ def process_studies(self, studies: List[Study], output_dir: str) -> List[Annotat
         if cached_results:
             # Check if cached results are still valid
             if self._are_cached_results_valid(cached_results):
-                logger.info(f"Loaded {len(cached_results)} cached annotation results")
+                logger.info(
+                    f"Loaded {len(cached_results)} cached annotation "
+                    "results"
+                )
                 self.annotation_results = cached_results
                 return cached_results
             else:
-                logger.info("Cached results are outdated, processing fresh annotations")
-        
-        # Filter studies to only those that are included and have analyses
-        included_studies = [
-            study for study in studies
-            if study.status == StudyStatus.INCLUDED and study.analyses
-        ]
+                logger.info(
+                    "Cached results are outdated, processing fresh "
+                    "annotations"
+                )
 
         if not included_studies:
-            logger.info("No studies with analyses found for annotation")
+            logger.info(
+                "No INCLUDED studies with analyses found for annotation"
+            )
             return []
 
-        logger.info(f"Processing {len(included_studies)} studies with analyses for annotation")
+        logger.info(
+            f"Processing {len(included_studies)} INCLUDED studies with "
+            "analyses for annotation"
+        )
 
         # Process all analysis-annotation combinations
         all_decisions = []
 
-        # Process the "all_analyses" annotation if enabled
-        if self.config.include_all_analyses:
-            all_analyses_decisions = self._create_all_analyses_annotations(included_studies)
+        # Process the "all_analyses" annotation for INCLUDED studies
+        if self.config.create_all_included_annotation:
+            all_analyses_decisions = self._create_all_analyses_annotations(
+                included_studies,
+                annotation_name="all_analyses"
+            )
             all_decisions.extend(all_analyses_decisions)
 
-        # Process custom annotations if any are defined
+        # Process the "all_studies" annotation for ALL studies if enabled
+        if self.config.create_all_from_search_annotation and all_studies:
+            logger.info(
+                f"Creating 'all_studies' annotation for "
+                f"{len(all_studies)} studies (INCLUDED + EXCLUDED)"
+            )
+            all_studies_decisions = self._create_all_analyses_annotations(
+                all_studies,
+                annotation_name="all_studies"
+            )
+            all_decisions.extend(all_studies_decisions)
+        
+        # Process custom annotations on INCLUDED studies only
         if self.config.annotations:
             custom_decisions = self._process_custom_annotations(
                 included_studies,
@@ -84,12 +111,17 @@ def process_studies(self, studies: List[Study], output_dir: str) -> List[Annotat
 
         return all_decisions
 
-    def _create_all_analyses_annotations(self, studies: List[Study]) -> List[AnnotationDecision]:
+    def _create_all_analyses_annotations(
+        self,
+        studies: List[Study],
+        annotation_name: str = "all_analyses"
+    ) -> List[AnnotationDecision]:
         """
-        Create annotation decisions for the "all_analyses" annotation.
+        Create annotation decisions for a default annotation.
         
         Args:
-            studies: List of included studies with analyses
+            studies: List of studies with analyses
+            annotation_name: Name of the annotation to create
             
         Returns:
             List of annotation decisions (all marked as included)
@@ -101,18 +133,21 @@ def _create_all_analyses_annotations(self, studies: List[Study]) -> List[Annotat
                 # Create a unique analysis ID
                 analysis_id = f"{study.pmid}_analysis_{i}"
 
-                # Create decision for all_analyses annotation
+                # Create decision for the annotation
                 decision = AnnotationDecision(
-                    annotation_name="all_analyses",
+                    annotation_name=annotation_name,
                     analysis_id=analysis_id,
                     study_id=study.pmid,
                     include=True,
-                    reasoning="All analyses included by default",
+                    reasoning=f"All analyses included in '{annotation_name}'",
                     model_used="none"
                 )
                 decisions.append(decision)
 
-        logger.info(f"Created {len(decisions)} decisions for 'all_analyses' annotation")
+        logger.info(
+            f"Created {len(decisions)} decisions for '{annotation_name}' "
+            "annotation"
+        )
         return decisions
 
     def _process_custom_annotations(self, studies: List[Study], model: str) -> List[AnnotationDecision]:
@@ -307,15 +342,24 @@ def _are_cached_results_valid(self, cached_results: List[AnnotationDecision]) ->
             # Check if we have the right number of annotations
             # (all_analyses + custom annotations)
             expected_annotation_count = 0
-            if self.config.include_all_analyses:
+            if self.config.create_all_included_annotation:
+                expected_annotation_count += 1
+            if self.config.create_all_from_search_annotation:
                 expected_annotation_count += 1
             expected_annotation_count += len(self.config.annotations)
 
             # Get unique annotation names from cached results
-            cached_annotation_names = set(result.annotation_name for result in cached_results)
+            cached_annotation_names = set(
+                result.annotation_name for result in cached_results
+            )
 
             # Check if we have the expected annotations
-            if self.config.include_all_analyses and "all_analyses" not in cached_annotation_names:
+            if (self.config.create_all_included_annotation and
+                    "all_analyses" not in cached_annotation_names):
+                return False
+            
+            if (self.config.create_all_from_search_annotation and
+                    "all_studies" not in cached_annotation_names):
                 return False
 
             # Check if we have all custom annotations
 
@@ -1,6 +1,6 @@
 """Prompt templates for annotation decisions."""
 
-from typing import List, Optional
+from typing import List
 from .schema import AnalysisMetadata, AnnotationCriteriaConfig
 
 
@@ -10,12 +10,15 @@ def create_annotation_prompt(
     metadata_fields: List[str] = None
 ) -> str:
     """
-    Create a prompt for the LLM to decide if an analysis should be included in an annotation.
+    Create a prompt for the LLM to decide if an analysis should be included 
+    in an annotation.
     
     Args:
         metadata: Analysis metadata to include in the prompt
         criteria: Annotation criteria configuration
+        metadata_fields: List of metadata fields to include
         
+    Returns:
         Formatted prompt string
     """
     # Use the provided metadata_fields or fall back to criteria.metadata_fields
@@ -48,31 +51,50 @@ def create_annotation_prompt(
         metadata_lines.append(f"- Study Abstract: {metadata.study_abstract}")
 
     if "study_authors" in fields_to_use and metadata.study_authors:
-        metadata_lines.append(f"- Study Authors: {', '.join(metadata.study_authors)}")
+        authors = ', '.join(metadata.study_authors)
+        metadata_lines.append(f"- Study Authors: {authors}")
 
     if "study_journal" in fields_to_use and metadata.study_journal:
         metadata_lines.append(f"- Study Journal: {metadata.study_journal}")
 
-    if "study_publication_date" in fields_to_use and metadata.study_publication_date:
-        metadata_lines.append(f"- Study Publication Date: {metadata.study_publication_date}")
+    if ("study_publication_date" in fields_to_use and 
+            metadata.study_publication_date):
+        date_str = metadata.study_publication_date
+        metadata_lines.append(f"- Study Publication Date: {date_str}")
 
     # Add any custom fields
     for field_name, field_value in metadata.custom_fields.items():
         if field_name in fields_to_use and field_value:
-            metadata_lines.append(f"- {field_name.replace('_', ' ').title()}: {field_value}")
+            formatted_name = field_name.replace('_', ' ').title()
+            metadata_lines.append(f"- {formatted_name}: {field_value}")
 
-    # Format inclusion and exclusion criteria
-    inclusion_criteria_text = "\n".join([f"  - {c}" for c in criteria.inclusion_criteria])
-    exclusion_criteria_text = "\n".join([f"  - {c}" for c in criteria.exclusion_criteria])
+    # Format criteria with IDs if mapping is provided
+    if criteria.criteria_mapping:
+        inclusion_items = criteria.criteria_mapping.get('inclusion', {}).items()
+        inclusion_list = [f"{id}: {text}" for id, text in inclusion_items]
+        inclusion_text = "\n".join(inclusion_list)
+        
+        exclusion_items = criteria.criteria_mapping.get('exclusion', {}).items()
+        exclusion_list = [f"{id}: {text}" for id, text in exclusion_items]
+        exclusion_text = "\n".join(exclusion_list)
+    else:
+        inclusion_list = [f"  - {c}" for c in criteria.inclusion_criteria]
+        inclusion_text = "\n".join(inclusion_list)
+        exclusion_list = [f"  - {c}" for c in criteria.exclusion_criteria]
+        exclusion_text = "\n".join(exclusion_list)
 
     # Create the prompt
     prompt = f"""
-You are a neuroimaging meta-analysis expert evaluating whether an analysis meets specific inclusion criteria.
+You are a neuroimaging meta-analysis expert evaluating whether an analysis meets 
+specific inclusion criteria.
 
-The following analysis has been extracted from within a table of a published fMRI/neuroimaging article.
-You will be provided with metadata about the analysis, the table it was extracted from, and the study it belongs to.
-Note that since each table may have contained multiple analyses, the table caption may describe multiple analyses that are not relevant to this specific analysis.
-As such, while taking into account the table caption, please focus primarily on the analysis name and description for your decision.
+The following analysis has been extracted from within a table of a published 
+fMRI/neuroimaging article. You will be provided with metadata about the 
+analysis, the table it was extracted from, and the study it belongs to. Note 
+that since each table may have contained multiple analyses, the table caption 
+may describe multiple analyses that are not relevant to this specific analysis.
+As such, while taking into account the table caption, please focus primarily on
+the analysis name and description for your decision.
 
 STUDY CONTEXT:
 {chr(10).join(metadata_lines) if metadata_lines else "No metadata available"}
@@ -81,17 +103,25 @@ def create_annotation_prompt(
 Description: {criteria.description or "No description provided"}
 
 INCLUSION CRITERIA:
-{inclusion_criteria_text or "No inclusion criteria specified"}
+{inclusion_text or "No inclusion criteria specified"}
 
 EXCLUSION CRITERIA:
-{exclusion_criteria_text or "No exclusion criteria specified"}
+{exclusion_text or "No exclusion criteria specified"}
+
+Based on the provided information, should this analysis be included in the 
+"{criteria.name}" annotation?
 
-Based on the provided information, should this analysis be included in the "{criteria.name}" annotation?
+IMPORTANT: In your response, you must specify which specific criteria IDs apply
+to this analysis.
+- For included analyses: List the inclusion criteria IDs that are satisfied
+- For excluded analyses: List the exclusion criteria IDs that apply
 
 Respond with JSON:
 {{
   "include": true/false,
-  "reasoning": "Brief explanation of decision"
+  "reasoning": "Brief explanation of decision",
+  "inclusion_criteria_applied": ["I1", "I2"],
+  "exclusion_criteria_applied": []
 }}
 """
 
 
@@ -2,7 +2,6 @@
 
 from pydantic import BaseModel, field_validator
 from typing import List, Optional, Dict, Any
-from typing_extensions import Literal
 from datetime import datetime
 
 
@@ -13,13 +12,18 @@ class AnnotationCriteriaConfig(BaseModel):
     inclusion_criteria: List[str] = []
     exclusion_criteria: List[str] = []
     metadata_fields: List[str] = []
-
+    
+    # NEW: Store criteria mappings
+    criteria_mapping: Optional[Dict[str, Dict[str, str]]] = None
 
 
 class AnnotationConfig(BaseModel):
     """Configuration for the annotation phase."""
     model: str = "gpt-4o-mini"
-    include_all_analyses: bool = True
+    # Create "all_analyses" annotation with all analyses from INCLUDED studies
+    create_all_included_annotation: bool = True
+    # Create "all_studies" annotation from INCLUDED and EXCLUDED studies
+    create_all_from_search_annotation: bool = False
     annotations: List[AnnotationCriteriaConfig] = []
     enabled: bool = True
     metadata_fields: List[str] = [
@@ -42,6 +46,10 @@ class AnnotationDecision(BaseModel):
     confidence: Optional[float] = None
     model_used: str
     timestamp: datetime = datetime.now()
+    
+    # NEW: Track which criteria were applied
+    inclusion_criteria_applied: List[str] = []
+    exclusion_criteria_applied: List[str] = []
 
 
 class AnalysisMetadata(BaseModel):
@@ -60,10 +68,14 @@ class AnalysisMetadata(BaseModel):
     # Add any other fields as needed
     custom_fields: Dict[str, Any] = {}
 
-    @field_validator('analysis_name', 'analysis_description', 'table_caption', 'table_footer', 'study_title', 'study_abstract', 'study_journal', 'study_publication_date', mode='before')
+    @field_validator('analysis_name', 'analysis_description', 'table_caption', 
+                     'table_footer', 'study_title', 'study_abstract', 
+                     'study_journal', 'study_publication_date', mode='before')
     @classmethod
     def validate_string_fields(cls, v):
-        """Validate that string fields are properly formatted and handle nan values."""
+        """
+        Validate string fields and handle nan values.
+        """
         if v is None:
             return None
         # Handle nan values (both float nan and string 'nan')