5151PHENOTYPE_KEY_COLUMNS = {"hpo_id" , "date_of_observation" , "status" }
5252
5353# Key columns to identify additional sheets
54- DISEASE_KEY_COLUMNS = {"disease_term" , "disease_onset" }
54+ DISEASE_KEY_COLUMNS = {"disease_term" , "disease_onset" }
5555MEASUREMENT_KEY_COLUMNS = {"measurement_type" , "measurement_value" , "measurement_unit" }
56- BIOSAMPLE_KEY_COLUMNS = {"biosample_id" , "biosample_type" , "collection_date" }
56+ BIOSAMPLE_KEY_COLUMNS = {"biosample_id" , "biosample_type" , "collection_date" }
5757
5858
5959# Map raw zygosity abbreviations to allowed dataclass zygosity values
@@ -110,34 +110,37 @@ def apply_mapping(
110110 list [DiseaseRecord ],
111111 list [MeasurementRecord ],
112112 list [BiosampleRecord ],
113- ]:
113+ ]:
114114 """
115115 1) classify each sheet as genotype / phenotype / disease / measurement / biosample
116116 2) call the matching mapper
117117 """
118118 # initialize the lists to return
119- genotype_records : list [Genotype ] = []
120- phenotype_records : list [Phenotype ] = []
121- disease_records : list [DiseaseRecord ] = []
122- measurement_records : list [MeasurementRecord ] = []
123- biosample_records : list [BiosampleRecord ] = []
124-
119+ genotype_records : list [Genotype ] = []
120+ phenotype_records : list [Phenotype ] = []
121+ disease_records : list [DiseaseRecord ] = []
122+ measurement_records : list [MeasurementRecord ] = []
123+ biosample_records : list [BiosampleRecord ] = []
125124
126125 for sheet_name , df in tables .items ():
127126 columns = set (df .columns )
128127 """ 1) classify: does this look like genotype, phenotype, or something to skip? """
129128 has_raw = RAW_VARIANT_COLUMNS .issubset (columns )
130129 has_hgvs = bool (HGVS_VARIANT_COLUMNS & columns )
131130 """Send each sheet to the right extractor and collect all records."""
132- is_genotype_sheet = GENOTYPE_BASE_COLUMNS .issubset (columns ) and (has_raw or has_hgvs )
131+ is_genotype_sheet = GENOTYPE_BASE_COLUMNS .issubset (columns ) and (
132+ has_raw or has_hgvs
133+ )
133134 is_phenotype_sheet = PHENOTYPE_KEY_COLUMNS .issubset (columns )
134135
135136 if is_genotype_sheet == is_phenotype_sheet :
136137 # if we have both raw & HGVS notations, we need to validate that they match
137138 if has_raw and has_hgvs :
138139 self ._check_hgvs_consistency (sheet_name , df , notepad )
139140 # ambiguous sheet should give a warning instead of an error
140- notepad .add_warning (f"Skipping { sheet_name !r} : cannot unambiguously classify as genotype or phenotype" )
141+ notepad .add_warning (
142+ f"Skipping { sheet_name !r} : cannot unambiguously classify as genotype or phenotype"
143+ )
141144 continue
142145
143146 # rename the former-index column
@@ -157,10 +160,14 @@ def apply_mapping(
157160 disease_records .extend (self ._map_disease (sheet_name , working , notepad ))
158161 continue
159162 if MEASUREMENT_KEY_COLUMNS .issubset (columns ):
160- measurement_records .extend (self ._map_measurement (sheet_name , working , notepad ))
163+ measurement_records .extend (
164+ self ._map_measurement (sheet_name , working , notepad )
165+ )
161166 continue
162167 if BIOSAMPLE_KEY_COLUMNS .issubset (columns ):
163- biosample_records .extend (self ._map_biosample (sheet_name , working , notepad ))
168+ biosample_records .extend (
169+ self ._map_biosample (sheet_name , working , notepad )
170+ )
164171 continue
165172
166173 return (
@@ -376,26 +383,30 @@ def _map_phenotype(
376383
377384 return records
378385
379- def _map_disease (self , sheet_name : str , df : pd .DataFrame , notepad : Notepad ) -> list [DiseaseRecord ]:
386+ def _map_disease (
387+ self , sheet_name : str , df : pd .DataFrame , notepad : Notepad
388+ ) -> list [DiseaseRecord ]:
389+ # TODO: implement row→DiseaseRecord, row→MeasurementRecord conversion, and row→BiosampleRecord conversions
380390 """
381391 Map each row in a disease sheet to a DiseaseRecord.
382392 """
383- records : list [DiseaseRecord ] = []
384- # TODO: implement row→DiseaseRecord conversion
393+ # TODO: fix as this is not in use now: records: list[DiseaseRecord] = []
385394 raise NotImplementedError
386395
387- def _map_measurement (self , sheet_name : str , df : pd .DataFrame , notepad : Notepad ) -> list [MeasurementRecord ]:
396+ def _map_measurement (
397+ self , sheet_name : str , df : pd .DataFrame , notepad : Notepad
398+ ) -> list [MeasurementRecord ]:
388399 """
389400 Map each row in a measurement sheet to a MeasurementRecord.
390401 """
391- records : list [MeasurementRecord ] = []
392- # TODO: implement row→MeasurementRecord conversion
402+ # TODO: fix as this is not in use now: records: list[MeasurementRecord] = []
393403 raise NotImplementedError
394404
395- def _map_biosample (self , sheet_name : str , df : pd .DataFrame , notepad : Notepad ) -> list [BiosampleRecord ]:
405+ def _map_biosample (
406+ self , sheet_name : str , df : pd .DataFrame , notepad : Notepad
407+ ) -> list [BiosampleRecord ]:
396408 """
397409 Map each row in a biosample sheet to a BiosampleRecord.
398410 """
399- records : list [BiosampleRecord ] = []
400- # TODO: implement row→BiosampleRecord conversion
411+ # TODO: fix as this is not in use now: records: list[BiosampleRecord] = []
401412 raise NotImplementedError
0 commit comments