Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f9c93b9
newline so there is a diff to open a draft PR
VarenyaJ Aug 15, 2025
0abdf7b
Add pyphetools as a package
VarenyaJ Aug 19, 2025
7103b19
feat(genotype): enrich VariationDescriptor with gene, zygosity, and i…
VarenyaJ Aug 19, 2025
3c64805
remove excess comments
VarenyaJ Aug 19, 2025
a2d1cd7
deprecate hpo-toolkit package due to version conflict with pyphetools
VarenyaJ Aug 20, 2025
e25f91f
fix(genotype): remove blanket exception handling in VariationDescript…
VarenyaJ Aug 20, 2025
baac56f
fix(genotype): remove blanket exception handling, add explicit guards…
VarenyaJ Aug 20, 2025
83b5096
further deprecate
VarenyaJ Aug 20, 2025
d827ca6
expand variant validator implementation
VarenyaJ Aug 28, 2025
ec0340e
ruff checks
VarenyaJ Aug 28, 2025
72e61a1
feat: add VariantValidator gene cross-ref lookup utility (vv_lookup.py)
VarenyaJ Aug 28, 2025
612d53e
Improved code readability and maintainability in `genotype.py`
VarenyaJ Aug 28, 2025
008b0a7
feat(vv_lookup): add resilient VariantValidator gene xref helper with…
VarenyaJ Aug 28, 2025
eb1d54f
refactor(genotype): clarify names, narrow exceptions, and add VV xref…
VarenyaJ Aug 28, 2025
170c29b
Ruff checks
VarenyaJ Aug 28, 2025
0a135e8
refactor(genotype): split VariationDescriptor builder into helpers to…
VarenyaJ Aug 28, 2025
782966a
Ruff checks and format
VarenyaJ Aug 28, 2025
df13ab3
Deduplicate g.HGVS expressions in VariationDescriptor; add comments &…
VarenyaJ Aug 28, 2025
4610ea5
change text size in README
VarenyaJ Sep 9, 2025
ed8b678
Merge pull request #18 from VarenyaJ/exp/attempt_pyphetools
VarenyaJ Sep 10, 2025
c054d14
Merge pull request #19 from VarenyaJ/feature/expand-phenopackets
VarenyaJ Sep 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,4 @@ tests/.DS_Store
tests/data/.DS_Store
/tests/data/hp.json
/src/P6/hp.json
/uv.lock
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,13 @@ A simple, extensible CLI for downloading the Human Phenotype Ontology, parsing g
```

2. (Recommended) Create a virtual environment (venv or Conda):
# === Simple Venv setup ===
### === Simple Venv setup ===
```bash
python3 -m venv .venv
source .venv/bin/activate
```
# === or with Conda ===

### === or with Conda ===
```bash
conda env create -f requirements/environment.yml -y
conda activate P6
Expand Down Expand Up @@ -207,3 +208,4 @@ This project is licensed under the AGPL-3.0. See LICENSE for details.
Varenya Jain
varenyajj@gmail.com
GitHub: @VarenyaJ

4 changes: 3 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
click==8.2.1
hpo-toolkit==0.7.0
#hpo-toolkit==0.7.0
hpo-toolkit==0.5.5
pandas==2.3.1
phenopackets==2.0.2.post4
protobuf==3.20.3
openpyxl==3.1.5
requests==2.32.4
stairval==0.2.1
pyphetools==0.9.118
60 changes: 14 additions & 46 deletions src/P6/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from google.protobuf.json_format import MessageToJson
from stairval.notepad import create_notepad
from phenopackets.schema.v2.phenopackets_pb2 import Phenopacket
import phenopackets.schema.v2 as pps2

from .loader import load_sheets_as_tables
from .mapper import DefaultMapper
Expand Down Expand Up @@ -237,6 +236,8 @@ def _locate_hpo_file(hpo_path: typing.Optional[str]) -> pathlib.Path:
hpo_file = pathlib.Path(hpo_path)
else:
hpo_file = pathlib.Path("tests/data") / "hp.json"
# Explicit file check avoids try/except (Ruff BLE001) while providing clear error flow.
# More efficient than catching an IOError later because we fail fast and early.
if not hpo_file.is_file():
click.echo(f"Error: HPO file not found at {hpo_file}", err=True)
sys.exit(1)
Expand Down Expand Up @@ -347,54 +348,21 @@ def _write_phenopackets(
genomic_interpretation_entry.InterpretationStatus.CONTRIBUTORY
)

# TODO: Revise VariationDescriptor and gene_context later, omit setting gene_context for now.
# variation_descriptor = genomic_interpretation_entry.variant_interpretation.variation_descriptor
# we can also set variation_descriptor.gene_context and variation_descriptor.allelic_state here then serialize out as before
# variation_descriptor.gene_context.gene_symbol = genotype_record.gene_symbol
# variation_descriptor.allelic_state = variation_descriptor.AllelicState.Value(genotype_record.zygosity.upper())
# TODO: Revise VariationDescriptor and gene_context
# Build a complete VariationDescriptor directly from the Genotype

# Grab the VariantInterpretation and its descriptor
# Build a complete VariationDescriptor directly from the Genotype
vd = genotype_record.to_variation_descriptor()
variant_interpretation = genomic_interpretation_entry.variant_interpretation
variation_descriptor = variant_interpretation.variation_descriptor

# 1) Gene symbol & allelic state
# 'gene_context' is a message; we need to CopyFrom if setting a message,
# but for its scalar fields we can still assign directly:
variation_descriptor.gene_context.symbol = genotype_record.gene_symbol
variation_descriptor.allelic_state.CopyFrom(
pps2.OntologyClass(
id="GENO:"
+ genotype_record.zygosity_code, # or however we decide to construct this later on
label=genotype_record.zygosity,
)
# Prefer CopyFrom when available (protobuf Message API) to avoid Ruff BLE001 (broad exception). Feature-detecting keeps us compatible with protobuf builds where CopyFrom may or may not exist on the generated message class, without catching a blanket Exception
copy_from = getattr(
variant_interpretation.variation_descriptor, "CopyFrom", None
)

# 2) HGVS expression
hgvs_expr = variation_descriptor.expressions.add()
# Attempt to set the HGVS syntax enum if available; otherwise skip.
try:
hgvs_expr.syntax = pps2.VariationDescriptor.Expression.HGVS
except AttributeError:
pass
hgvs_expr.value = genotype_record.hgvsg

# 3) Genomic location (exact interval) and alleles, if supported
try:
loc_ctx = variation_descriptor.location
# use the nested VariationDescriptor.Location enum
loc_ctx.interval.interval_type = (
pps2.VariationDescriptor.Location.Interval.Type.EXACT
)
loc_ctx.interval.start = genotype_record.start_position
loc_ctx.interval.end = genotype_record.end_position
loc_ctx.reference_sequence_id = genotype_record.chromosome

# 4) Reference & alternate alleles
variation_descriptor.reference = genotype_record.reference
variation_descriptor.alternate = genotype_record.alternate
except AttributeError:
# some protobuffs give trouble when trying to expose location/alleles so just skip
pass
if callable(copy_from):
copy_from(vd)
else:
# Fallback when CopyFrom is absent: MergeFrom retains the previous behavior without catching a blanket Exception
variant_interpretation.variation_descriptor.MergeFrom(vd) # type: ignore[attr-defined]

# 3c) Add optional entries (if any):
for d in patient_data["disease_records"]:
Expand Down
Loading