From c811c56ce0765e0585ce0847a037715366fe24eb Mon Sep 17 00:00:00 2001 From: Pedro Assis Date: Mon, 15 Jun 2026 13:34:20 -1000 Subject: [PATCH] reading simple_sample_summaries from AnalysisSet obj --- data/adapters/file_fileset_adapter.py | 46 ++------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/data/adapters/file_fileset_adapter.py b/data/adapters/file_fileset_adapter.py index 40778e80f..1ac59b354 100644 --- a/data/adapters/file_fileset_adapter.py +++ b/data/adapters/file_fileset_adapter.py @@ -488,49 +488,6 @@ def parse_sample_donor_treatment_igvf( treatment_object['treatment_term_id']) # Add support for treatment vs. untreated analyses later - # special case STARR-seq for inclusion of 1000 Genomes donors in the simple sample summary - if method == 'STARR-seq': - thousand_genomes_ids = set() - construct_library_set_accessions = set() - integrated_content_files_accessions = set() - curated_set_accessions = set() - donors_accessions = set() - - for construct_library_set in sample_object.get('construct_library_sets', []): - construct_library_set_accessions.add( - construct_library_set['accession']) - construct_library_set_objects = FileFileSet.get_batch_objects( - list(construct_library_set_accessions), ['integrated_content_files'], api_url=FileFileSet.IGVF_API) - - for construct_library_set_object in construct_library_set_objects: - integrated_content_files = construct_library_set_object.get( - 'integrated_content_files', []) - for integrated_content_file in integrated_content_files: - integrated_content_files_accessions.add( - integrated_content_file['accession']) - integrated_content_files_objects = FileFileSet.get_batch_objects( - list(integrated_content_files_accessions), ['file_set'], api_url=FileFileSet.IGVF_API) - for integrated_content_file_object in integrated_content_files_objects: - curated_set = integrated_content_file_object['file_set'] - curated_set_accessions.add(curated_set['accession']) - curated_sets_objects = FileFileSet.get_batch_objects( - list(curated_set_accessions), ['donors'], api_url=FileFileSet.IGVF_API) - for curated_set_object in curated_sets_objects: - for donor in curated_set_object.get('donors', []): - donors_accessions.add(donor['accession']) - donors_objects = FileFileSet.get_batch_objects( - list(donors_accessions), ['dbxrefs'], api_url=FileFileSet.IGVF_API) - for donor_object in donors_objects: - dbxrefs = donor_object.get('dbxrefs', []) - for dbxref in dbxrefs: - if dbxref.startswith('IGSR'): - thousand_genomes_id = dbxref.split(':')[1] - thousand_genomes_ids.add(thousand_genomes_id) - if thousand_genomes_ids: - thousand_genomes_ids = ', '.join( - sorted(thousand_genomes_ids)) - simple_sample_summary = f'{simple_sample_summary} with variants from 1000 Genomes donors: {thousand_genomes_ids}' - simple_sample_summaries.add(simple_sample_summary) if len(crispr_modalities) > 1: raise ValueError( @@ -808,6 +765,9 @@ def query_fileset_files_props_igvf(file_object): samples, method) + if fileset_object_type == 'AnalysisSet': + simple_sample_summaries = [fileset_object['sample_summary']] + sample_term_ids = [sample_term_id.replace( ':', '_') for sample_term_id in sample_term_ids]