Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/P6/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def _write_phenopackets(
for phenotype in patient_data["phenotype_records"]:
feature = phenopacket.phenotypic_features.add()
feature.type.id = phenotype.HPO_ID

# mark as excluded if status is False
if not phenotype.status:
feature.excluded = True
Expand Down
39 changes: 39 additions & 0 deletions src/P6/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
import pandas as pd
import re
import typing
from collections import defaultdict

from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket
import phenopackets
from stairval.notepad import Notepad

from .genotype import Genotype
Expand Down Expand Up @@ -168,9 +170,39 @@ def _map_genotype(
notepad.add_error(f"Sheet {sheet_name!r}, row {idx}: {e}")
return records



def create_phenotypic_feature(
self,
hpo_id: str,
hpo_label: str,
status: str
) -> phenopackets.PhenotypicFeature:
"""
Create an PhenotypicFeature from input strings

"""
if hpo_id not in self._hpo:
raise ValueError(f"Could not find HPO Term Id {hpo_id} in HPO")
else:
term = self._hpo.get_term(hpo_id)
# if term.
allowable_status_entries = {"e", "o", "na"}
pf = phenopackets.PhenotypicFeature()
pf.type.id = hpo_id
pf.type.label = hpo_label
if status not in allowable_status_entries:
raise ValueError(f"Unrecognized status {status}")
if status == "e": ## TODO check input
pf.excluded = True

return pf


def _map_phenotype(
self, sheet_name: str, df: pd.DataFrame, notepad: Notepad
) -> list[Phenotype]:
phenotypic_feature_d = defaultdict(list) # key: Patient id, value: List of phenotypic features
records: list[Phenotype] = []
# Collect every HPO ID in this sheet, so we can validate propagation later:
all_ids: list[hpotk.TermId] = []
Expand Down Expand Up @@ -220,10 +252,17 @@ def _map_phenotype(
Phenotype(
phenotype_patient_ID=str(row["phenotype_patient_ID"]),
HPO_ID=curie,
hpo_label=raw_label,
date_of_observation=date_str,
status=bool(row["status"]),
)
)
patient_id = row["phenotype_patient_ID"]
try:
pf = self.create_phenotypic_feature(hpo_id=curie, hpo_label=raw_label, status=row["status"])
phenotypic_feature_d[patient_id].append(pf)
except Exception as exc:
print(f"Could not parse line {exc}")

# 3) The IDs must exist in the ontology:
# Validate ID against ontology
Expand Down
1 change: 1 addition & 0 deletions src/P6/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Phenotype:

phenotype_patient_ID: str
HPO_ID: str
hpo_label: str
date_of_observation: str
status: bool

Expand Down
Loading