33import logging
44import json
55from pathlib import Path
6- from typing import List , Set
6+ from typing import List , Set , Optional
77from concurrent .futures import ThreadPoolExecutor , as_completed
88from tqdm import tqdm
99
@@ -252,7 +252,7 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
252252 for study , annotations_to_process in tqdm (studies_to_process , desc = "Processing studies" ):
253253 try :
254254 study_decisions = self ._process_single_study_annotations (
255- study , annotations_to_process , model
255+ study , self . config . metadata_fields , annotations_to_process , model
256256 )
257257 decisions .extend (study_decisions )
258258 except Exception as e :
@@ -263,7 +263,7 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
263263 with ThreadPoolExecutor (max_workers = self .num_workers ) as executor :
264264 # Submit all studies for processing
265265 future_to_study = {
266- executor .submit (self ._process_single_study_annotations , study , annotations_to_process , model ): (study , annotations_to_process )
266+ executor .submit (self ._process_single_study_annotations , study , self . config . metadata_fields , annotations_to_process , model ): (study , annotations_to_process )
267267 for study , annotations_to_process in studies_to_process
268268 }
269269
@@ -279,12 +279,13 @@ def _process_custom_annotations_by_study(self, studies: List[Study], model: str,
279279
280280 return decisions
281281
282- def _process_single_study_annotations (self , study : Study , annotations_to_process : List [AnnotationCriteriaConfig ], model : str ) -> List [AnnotationDecision ]:
282+ def _process_single_study_annotations (self , study : Study , metadata_fields : List [ str ], annotations_to_process : List [AnnotationCriteriaConfig ], model : str ) -> List [AnnotationDecision ]:
283283 """
284284 Process annotations for a single study.
285285
286286 Args:
287287 study: Study to process
288+ metadata_fields: List of metadata fields to extract
288289 annotations_to_process: List of annotations to process
289290 model: LLM model to use
290291
@@ -298,8 +299,8 @@ def _process_single_study_annotations(self, study: Study, annotations_to_process
298299 # Create a unique analysis ID
299300 analysis_id = f"{ study .pmid } _analysis_{ i } "
300301
301- # Extract metadata for this analysis
302- metadata = self ._extract_analysis_metadata (study , analysis , analysis_id )
302+ # Extract metadata for the analysis
303+ metadata = self ._extract_analysis_metadata (study , analysis , analysis_id , metadata_fields )
303304
304305 # Make multi-annotation decision for this analysis
305306 analysis_decisions = self .client .make_multi_decision (
@@ -401,42 +402,72 @@ def _process_single_decision(self, metadata: AnalysisMetadata, criteria: Annotat
401402 """
402403 return self .client .make_decision (metadata , criteria , model )
403404
404- def _extract_analysis_metadata (self , study : Study , analysis : Analysis , analysis_id : str ) -> AnalysisMetadata :
405+ def _extract_analysis_metadata (
406+ self ,
407+ study : Study ,
408+ analysis : Analysis ,
409+ analysis_id : str ,
410+ metadata_fields : Optional [List [str ]] = None
411+ ) -> AnalysisMetadata :
405412 """
406413 Extract metadata for an analysis from a study.
407414
408415 Args:
409416 study: Study containing the analysis
410417 analysis: Analysis to extract metadata for
411418 analysis_id: Unique ID for the analysis
419+ metadata_fields: Optional list of fields to include. If None, includes all fields.
412420
413421 Returns:
414- Analysis metadata
422+ Analysis metadata with only the requested fields populated
415423 """
416424 # Extract table information if available
425+ table_id = None
417426 table_caption = None
418427 table_footer = None
419428 if study .activation_tables :
420429 # Use the first table's information for now
421430 # In the future, we might want to associate analyses with specific tables
431+ table_id = study .activation_tables [0 ].table_id
422432 table_caption = study .activation_tables [0 ].table_caption
423433 table_footer = study .activation_tables [0 ].table_foot
424434
435+ # If no activation tables exist, generate a default table_id
436+ if table_id is None :
437+ table_id = f"{ study .pmid } _default_table"
438+
439+ # Build kwargs with required fields
440+ kwargs = {
441+ "analysis_id" : analysis_id ,
442+ "study_id" : study .pmid ,
443+ "table_id" : table_id ,
444+ }
445+
446+ # Map of field names to their values
447+ field_mapping = {
448+ "analysis_name" : analysis .name ,
449+ "analysis_description" : analysis .description ,
450+ "table_caption" : table_caption ,
451+ "table_footer" : table_footer ,
452+ "study_title" : study .title ,
453+ "study_abstract" : study .abstract ,
454+ "study_authors" : study .authors ,
455+ "study_journal" : study .journal ,
456+ "study_publication_date" : study .publication_date ,
457+ "study_fulltext" : study .full_text ,
458+ }
459+
460+ # If metadata_fields is None, include all fields
461+ if metadata_fields is None :
462+ kwargs .update (field_mapping )
463+ else :
464+ # Only include requested fields
465+ for field in metadata_fields :
466+ if field in field_mapping :
467+ kwargs [field ] = field_mapping [field ]
468+
425469 # Create the metadata object
426- metadata = AnalysisMetadata (
427- analysis_id = analysis_id ,
428- study_id = study .pmid ,
429- analysis_name = analysis .name ,
430- analysis_description = analysis .description ,
431- table_caption = table_caption ,
432- table_footer = table_footer ,
433- study_title = study .title ,
434- study_abstract = study .abstract ,
435- study_authors = study .authors ,
436- study_journal = study .journal ,
437- study_publication_date = study .publication_date ,
438- study_fulltext = study .full_text # Add full text
439- )
470+ metadata = AnalysisMetadata (** kwargs )
440471
441472 return metadata
442473
0 commit comments