Skip to content

feat: --maf option for filtering output of transform command #274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"extensions": [
"ms-python.python",
"ms-vscode.live-server",
"ms-python.black-formatter",
"ms-python.black-formatter@2025.2.0",
"GitHub.vscode-github-actions"
],
"settings": {
Expand Down
9 changes: 9 additions & 0 deletions haptools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,13 @@ def simphenotype(
default=False,
help="Also transform using VCF 'POP' FORMAT field and 'ancestry' .hap extra field",
)
@click.option(
"--maf",
type=float,
default=None,
show_default="all haplotypes",
help="Do not output haplotypes with an MAF below this value",
)
@click.option(
"-o",
"--output",
Expand Down Expand Up @@ -649,6 +656,7 @@ def transform(
chunk_size: int = None,
discard_missing: bool = False,
ancestry: bool = False,
maf: float = None,
output: Path = Path("-"),
verbosity: str = "INFO",
):
Expand Down Expand Up @@ -694,6 +702,7 @@ def transform(
chunk_size,
discard_missing,
ancestry,
maf,
output,
log,
)
Expand Down
1 change: 0 additions & 1 deletion haptools/data/breakpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

from .data import Data


# A haplotype block consists of
# 1) pop - A population label (str), like 'YRI'
# 2) chrom - A chromosome name (str), like 'chr19' or simply '19'
Expand Down
3 changes: 1 addition & 2 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,7 @@ def check_biallelic(self, discard_also=False):
self._var_idx = None
else:
raise ValueError(
"Variant with ID {} at POS {}:{} is multiallelic for sample {}"
.format(
"Variant with ID {} at POS {}:{} is multiallelic for sample {}".format(
*tuple(self.variants[variant_idx[0]])[:3],
self.samples[samp_idx[0]],
)
Expand Down
1 change: 0 additions & 1 deletion haptools/data/haplotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from .data import Data
from .genotypes import GenotypesVCF


# the current version of the hap format spec
HAP_VERSION = "0.2.0"

Expand Down
10 changes: 4 additions & 6 deletions haptools/data/tr_harmonizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utilities for harmonizing tandem repeat VCF records.
Handles VCFs generated by various TR genotyping tools
"""

import re
import math
import enum
Expand Down Expand Up @@ -462,8 +463,7 @@ def _HarmonizeHipSTRRecord(vcfrecord: cyvcf2.Variant):
):
raise TypeError(
"Record at {}:{} is missing one of the mandatory HipSTR info fields START,"
" END, PERIOD. ".format(vcfrecord.CHROM, vcfrecord.POS)
+ _beagle_error
" END, PERIOD. ".format(vcfrecord.CHROM, vcfrecord.POS) + _beagle_error
)
# determine full alleles and trimmed alleles
pos = int(vcfrecord.POS)
Expand Down Expand Up @@ -536,8 +536,7 @@ def _HarmonizeAdVNTRRecord(vcfrecord: cyvcf2.Variant):
if vcfrecord.INFO.get("RU") is None or vcfrecord.INFO.get("VID") is None:
raise TypeError(
"Record at {}:{} is missing one of the mandatory ADVNTR info fields RU,"
" VID. ".format(vcfrecord.CHROM, vcfrecord.POS)
+ _beagle_error
" VID. ".format(vcfrecord.CHROM, vcfrecord.POS) + _beagle_error
)
ref_allele = vcfrecord.REF.upper()
if vcfrecord.ALT:
Expand Down Expand Up @@ -651,8 +650,7 @@ def _HarmonizeEHRecord(vcfrecord: cyvcf2.Variant):
if vcfrecord.INFO.get("VARID") is None or vcfrecord.INFO.get("RU") is None:
raise TypeError(
"Record at {}:{} is missing one of the mandatory ExpansionHunter info"
" fields VARID, RU. ".format(vcfrecord.CHROM, vcfrecord.POS)
+ _beagle_error
" fields VARID, RU. ".format(vcfrecord.CHROM, vcfrecord.POS) + _beagle_error
)
record_id = vcfrecord.INFO["VARID"]
motif = vcfrecord.INFO["RU"].upper()
Expand Down
10 changes: 8 additions & 2 deletions haptools/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,7 @@ def check_biallelic(self, discard_also=False):
self._var_idx = None
else:
raise ValueError(
"Variant with ID {} at POS {}:{} is multiallelic for sample {}"
.format(
"Variant with ID {} at POS {}:{} is multiallelic for sample {}".format(
*tuple(self.variants[variant_idx[0]])[:3],
self.samples[samp_idx[0]],
)
Expand Down Expand Up @@ -537,6 +536,7 @@ def transform_haps(
chunk_size: int = None,
discard_missing: bool = False,
ancestry: bool = False,
maf: float = None,
output: Path = Path("-"),
log: logging.Logger = None,
):
Expand Down Expand Up @@ -571,6 +571,8 @@ def transform_haps(
ancestry : bool, optional
Whether to also match ancestral population labels from the VCF against those in
the .hap file
maf : float, optional
If specified, only haplotypes with an MAF above this value will be output
output : Path, optional
The location to which to write output
log : Logger, optional
Expand Down Expand Up @@ -666,6 +668,10 @@ def transform_haps(
log.info("Transforming genotypes via haplotypes")
hp.transform(gt, hp_gt)

if maf is not None:
log.info(f"Removing haplotypes with MAF < {maf}")
hp_gt.check_maf(threshold=maf, discard_also=True)

log.info(f"Writing haplotypes to {out_file_type} file")
hp_gt.write()

Expand Down
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Nox sessions."""

import os
import shutil
from pathlib import Path
Expand All @@ -7,7 +8,6 @@
from nox_poetry import Session # type: ignore
from nox_poetry import session # type: ignore


package = "haptools"
python_versions = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
locked_python_version = "3.9" # keep in sync with dev-env.yml
Expand Down Expand Up @@ -37,7 +37,7 @@ def docs(session: Session) -> None:
def lint(session: Session) -> None:
"""Lint our code."""
session.install("black")
session.run("black", "--verbose", "--check", ".")
session.run("black", "--diff", "--verbose", "--check", ".")


# detect whether conda/mamba is installed
Expand Down
Loading