Skip to content

Commit e0523c5

Browse files
committed
Add all ids option to config
1 parent a7a4ab3 commit e0523c5

8 files changed

Lines changed: 232 additions & 233 deletions

File tree

autonima/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,14 @@ def _validate_config(self, config: PipelineConfig) -> None:
134134
ConfigurationError: If configuration is invalid
135135
"""
136136
# Validate search configuration
137-
if not config.search.query.strip():
138-
raise ConfigurationError("Search query cannot be empty")
137+
if config.search.pmids_file or config.search.pmids_list:
138+
# PMID-based search
139+
if config.search.query.strip():
140+
raise ConfigurationError("Cannot specify both search query and PMIDs list/file")
141+
else:
142+
# Query-based search
143+
if not config.search.query.strip():
144+
raise ConfigurationError("Search query cannot be empty when not using PMIDs list/file")
139145

140146
if config.search.max_results <= 0:
141147
raise ConfigurationError("max_results must be positive")

autonima/coordinates/processor.py

Lines changed: 13 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6,74 +6,25 @@
66
from typing import List
77

88
from .openai_client import CoordinateParsingClient
9-
from .schema import Analysis
109

1110
logger = logging.getLogger(__name__)
1211

1312

1413
class CoordinateProcessor:
1514
"""Processor for parsing coordinates from activation tables."""
1615

17-
def __init__(self, model: str = "gpt-4o-mini"):
16+
def __init__(self, model: str = "gpt-4o-mini",
17+
path_preference: List[str] = ['table_raw_path', 'table_data_path']):
1818
"""
1919
Initialize the coordinate processor.
2020
2121
Args:
2222
model: The model to use for parsing
2323
"""
2424
self.model = model
25+
self.path_preference = path_preference
2526
self.client = CoordinateParsingClient()
26-
27-
def process_study(self, study):
28-
"""
29-
Process all activation tables for a study and extract analyses.
30-
31-
Args:
32-
study: The study to process
33-
34-
Returns:
35-
List of analyses extracted from the study's tables
36-
"""
37-
# Import locally to avoid circular imports
38-
from autonima.models.types import Study, ActivationTable
39-
40-
if not study.activation_tables:
41-
return []
42-
43-
all_analyses = []
44-
45-
for table in study.activation_tables:
46-
try:
47-
# Load the table data
48-
table_path = Path(table.table_path)
49-
if not table_path.exists():
50-
logger.warning(f"Table file not found: {table_path}")
51-
continue
52-
53-
# Read the table as text
54-
with open(table_path, "r", encoding="utf-8") as f:
55-
reader = csv.reader(f)
56-
rows = list(reader)
57-
table_text = "\n".join([",".join(r) for r in rows])
58-
59-
# Create a prompt for the table
60-
prompt = self._create_table_prompt(
61-
table_text,
62-
table_caption=table.table_caption or "",
63-
table_foot=table.table_foot or ""
64-
)
65-
66-
# Parse the table
67-
result = self.client.parse_analyses(prompt, model=self.model)
68-
69-
# Add the analyses to our list
70-
all_analyses.extend(result.analyses)
71-
72-
except Exception as e:
73-
logger.warning(f"Error processing table {table.table_path}: {e}")
74-
continue
75-
76-
return all_analyses
27+
7728

7829
def process_single_table(self, table):
7930
"""
@@ -87,7 +38,15 @@ def process_single_table(self, table):
8738
"""
8839
try:
8940
# Load the table data
90-
table_path = Path(table.table_path)
41+
for path_attr in self.path_preference:
42+
table_path_value = getattr(table, path_attr, None)
43+
if table_path_value:
44+
table_path = Path(table_path_value)
45+
break
46+
else:
47+
logger.warning(f"No valid table path found for table: {table.table_id}")
48+
return []
49+
table_path = Path(table_path)
9150
if not table_path.exists():
9251
logger.warning(f"Table file not found: {table_path}")
9352
return []
@@ -107,7 +66,6 @@ def process_single_table(self, table):
10766

10867
# Parse the table
10968
result = self.client.parse_analyses(prompt, model=self.model)
110-
11169
return result.analyses
11270

11371
except Exception as e:

autonima/models/types.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ class ActivationTable:
2525
"""Represents a table containing activation coordinates from a study."""
2626
table_id: str # New identifier for the table
2727
table_label: str # Label or identifier for the table
28-
table_path: str # Path to the raw table file (HTML/CSV/etc)
2928
table_caption: Optional[str] = None # Caption of the table
3029
table_foot: Optional[str] = None # Footer of the table
3130
table_data_path: Optional[str] = None # Path to processed table data file
31+
table_raw_path: Optional[str] = None # Path to raw table data file
3232

3333

3434
@dataclass
@@ -86,10 +86,10 @@ def to_dict(self) -> Dict[str, Any]:
8686
{
8787
"table_id": table.table_id, # Added table_id
8888
"table_label": table.table_label,
89-
"table_path": table.table_path,
9089
"table_caption": table.table_caption,
9190
"table_foot": table.table_foot,
92-
"table_data_path": table.table_data_path
91+
"table_data_path": table.table_data_path,
92+
"table_raw_path": table.table_raw_path
9393
} for table in self.activation_tables
9494
],
9595
"analyses": [
@@ -145,6 +145,8 @@ class SearchConfig:
145145
date_from: Optional[str] = None
146146
date_to: Optional[str] = None
147147
email: Optional[str] = None # Required for NCBI API
148+
pmids_file: Optional[str] = None # Path to file with PMIDs (one per line)
149+
pmids_list: Optional[List[str]] = None # Direct list of PMIDs
148150

149151

150152
@dataclass
@@ -201,7 +203,6 @@ class OutputConfig:
201203
nimads: bool = False
202204

203205

204-
205206
@dataclass
206207
class PipelineConfig:
207208
"""Main configuration for the Autonima pipeline."""
@@ -222,6 +223,8 @@ def to_dict(self) -> Dict[str, Any]:
222223
"date_from": self.search.date_from,
223224
"date_to": self.search.date_to,
224225
"email": self.search.email,
226+
"pmids_file": self.search.pmids_file,
227+
"pmids_list": self.search.pmids_list,
225228
},
226229
"screening": {
227230
"abstract": self.screening.abstract,

0 commit comments

Comments
 (0)