broadinstitute
diff --git a/‎.cargo/config.toml‎
Lines changed: 9 additions & 0 deletions b/‎.cargo/config.toml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 35 additions & 9 deletions b/‎Cargo.lock‎
Lines changed: 35 additions & 9 deletions
diff --git a/‎scripts/run_dummy_pipeline.sh‎
Lines changed: 51 additions & 0 deletions b/‎scripts/run_dummy_pipeline.sh‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/hidive/Cargo.toml‎
Lines changed: 2 additions & 1 deletion b/‎src/hidive/Cargo.toml‎
Lines changed: 2 additions & 1 deletion
@@ -0,0 +1,9 @@
+[env]
+LIBTORCH = { value = "/Users/kiran/repositories/hidive/venv/lib/python3.12/site-packages/torch", relative = false }
+LD_LIBRARY_PATH = { value = "/Users/kiran/repositories/hidive/venv/lib/python3.12/site-packages/torch/lib", relative = false }
+
+[target.x86_64-apple-darwin]
+rustflags = ["-C", "link-args=-Wl,-rpath,/Users/kiran/repositories/hidive/venv/lib/python3.12/site-packages/torch/lib"]
+
+[target.aarch64-apple-darwin]
+rustflags = ["-C", "link-args=-Wl,-rpath,/Users/kiran/repositories/hidive/venv/lib/python3.12/site-packages/torch/lib"] 
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+DATA_DIR="${REPO_ROOT}/examples/dummy_data"
+OUT_DIR="${1:-${DATA_DIR}/output}"
+
+GFA_PATH="${OUT_DIR}/pangenome.gfa"
+MODEL_PATH="${OUT_DIR}/crf_model.json"
+HAP_PATH="${OUT_DIR}/haplotypes.fa"
+
+mkdir -p "${OUT_DIR}"
+
+if ! command -v seqwish >/dev/null 2>&1; then
+  echo "Error: seqwish is not available in PATH. Install it before running this script." >&2
+  exit 1
+fi
+
+echo "=== Building dummy pangenome graph ==="
+cargo run --bin hidive -- build-pangenome \
+  --tier1-fasta-paths "${DATA_DIR}/tier1.fa" \
+  --tier2-fasta-paths "${DATA_DIR}/tier2.fa" \
+  --tier3-fasta-paths "${DATA_DIR}/tier3.fa" \
+  --output "${GFA_PATH}" \
+  --kmer-size 11 \
+  --min-aln-len 30
+
+echo "=== Training CRF on dummy data ==="
+cargo run --bin hidive -- train-crf \
+  --graph "${GFA_PATH}" \
+  --reads "${DATA_DIR}/reads.fa" \
+  --truth-haplotypes "${DATA_DIR}/truth_hap1.fa" \
+  --truth-haplotypes "${DATA_DIR}/truth_hap2.fa" \
+  --output "${MODEL_PATH}" \
+  --kmer-size 11 \
+  --iterations 5
+
+echo "=== Inferring haplotypes on dummy data ==="
+cargo run --bin hidive -- infer-haplotypes \
+  --graph "${GFA_PATH}" \
+  --model "${MODEL_PATH}" \
+  --reads "${DATA_DIR}/reads.fa" \
+  --output "${HAP_PATH}" \
+  --kmer-size 11
+
+echo "Dummy pipeline complete."
+echo "GFA:      ${GFA_PATH}"
+echo "Model:    ${MODEL_PATH}"
+echo "Haplotypes: ${HAP_PATH}"
+
@@ -9,7 +9,7 @@ resolver = "2"
 [dependencies]
 bio = "=2.0.1"
 chrono = "=0.4.38"
-clap = { version = "=4.5.1", features = ["derive"] }
+clap = { version = "=4.5.41", features = ["derive"] }
 flate2 = "=1.0.30"
 gbdt = "=0.1.3"
 gaoya = "=0.2.0"
@@ -23,6 +23,7 @@ needletail = "=0.5.1"
 ndarray = { version = "=0.16.1", features = ["rayon"] }
 ndarray-stats = "=0.6.0"
 num-format = "=0.4.4"
+parfait-gfa = "=0.1.2"
 path-absolutize = "=3.1.1"
 petgraph = "=0.6.5"
 rand = "=0.8.5"