diff --git a/.gitignore b/.gitignore
index 0233c583..ae94401e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ glnexus-test
 6tf_ipsc_wt
 all_Ngn2-KOLF
 all_KOLF21J
+26apr10_Ngn2-KOLF
 
 hifisomatic_resources
 hifi-wdl-resources-v*
@@ -18,6 +19,7 @@ wgs_backlog
 *.bam
 *.tbi
 *.bai
+*.fai
 *.gz
 *.bak
 *.gz
diff --git a/genbank_to_crispr_json.py b/genbank_to_crispr_json.py
index 2c6ec7a3..b4006108 100755
--- a/genbank_to_crispr_json.py
+++ b/genbank_to_crispr_json.py
@@ -223,6 +223,15 @@ def parse_genbank_to_crispr_json(genbank_path: str, edit_id: str,
     right_ha_seq = get_feature_sequence(right_ha_feature, record)
     payload_seq = get_feature_sequence(payload_feature, record)
 
+    # Extract donor context (e.g. plasmid backbone) flanking the HDR cassette.
+    # Taken linearly from the GenBank record: everything before left_ha and
+    # after right_ha. For circular plasmids exported with backbone at the
+    # record boundaries (the common case), this captures the full backbone.
+    lha_start = int(left_ha_feature.location.start)
+    rha_end = int(right_ha_feature.location.end)
+    donor_context_before = str(record.seq[:lha_start])
+    donor_context_after = str(record.seq[rha_end:])
+
     # Find payload components
     payload_start = int(payload_feature.location.start)
     payload_end = int(payload_feature.location.end)
@@ -263,6 +272,11 @@ def parse_genbank_to_crispr_json(genbank_path: str, edit_id: str,
         ]
     }
 
+    if donor_context_before:
+        crispr_json['edits'][0]['edit']['donor_context_before'] = donor_context_before
+    if donor_context_after:
+        crispr_json['edits'][0]['edit']['donor_context_after'] = donor_context_after
+
     # Add optional symbol if available
     if gene_info.get('symbol'):
         crispr_json['edits'][0]['target']['symbol'] = gene_info['symbol']
@@ -328,6 +342,11 @@ def main():
         print(f"  Gene: {crispr_json['edits'][0]['target']['symbol']}", file=sys.stderr)
     print(f"  Payload components: {len(crispr_json['edits'][0]['edit'].get('payload_components', []))}",
           file=sys.stderr)
+    edit_out = crispr_json['edits'][0]['edit']
+    if 'donor_context_before' in edit_out or 'donor_context_after' in edit_out:
+        print(f"  Donor context: {len(edit_out.get('donor_context_before', ''))} bp before, "
+              f"{len(edit_out.get('donor_context_after', ''))} bp after",
+              file=sys.stderr)
 
 
 if __name__ == '__main__':
diff --git a/scripts/process_input_config.py b/scripts/process_input_config.py
index da1ef136..cb906457 100755
--- a/scripts/process_input_config.py
+++ b/scripts/process_input_config.py
@@ -72,30 +72,38 @@ def main():
             all_exist = all(os.path.exists(hifi_read_path) for hifi_read_path in sample["hifi_reads"])
 
             if all_exist and should_copy_file(dest_path):
-                with tempfile.TemporaryDirectory() as tmpdirname:
-                    tmpdir_path = Path(tmpdirname)
-                    temp_path = tmpdir_path / f"{sample['sample_id']}_hifi_reads_temp.bam"
-                    print(f"Merging {len(sample['hifi_reads'])} HiFi read files -> {dest_path} (stripping tags: fi,ri,fp,rp,ip,pw,HP,PS,PC)")
-
-                    # Step 1: Concatenate BAM files
-                    print(f"  Step 1/2: Concatenating BAM files...")
-                    subprocess.run([
-                        "samtools", "cat",
-                        "-o", str(temp_path)
-                    ] + sample["hifi_reads"], check=True)
-
-                    # Step 2: Strip tags using samtools reset
-                    print(f"  Step 2/2: Stripping tags...")
-                    subprocess.run([
+                print(f"Merging {len(sample['hifi_reads'])} HiFi read files -> {dest_path} (stripping tags: fi,ri,fp,rp,ip,pw,HP,PS,PC)")
+
+                # Write to a local /tmp file first to avoid slow NFS streaming writes,
+                # then copy to NFS destination in one sequential transfer.
+                # samtools cat stdout is piped directly into samtools reset (no temp BAM on disk).
+                tmp_fd, tmp_out_str = tempfile.mkstemp(dir="/tmp", suffix=".bam")
+                os.close(tmp_fd)
+                tmp_out = Path(tmp_out_str)
+                try:
+                    # Step 1 + 2 (pipelined): cat BAMs from NFS | reset/strip tags -> local /tmp
+                    print(f"  Step 1/2: Streaming cat | reset to local /tmp...")
+                    cat_proc = subprocess.Popen(
+                        ["samtools", "cat"] + sample["hifi_reads"],
+                        stdout=subprocess.PIPE
+                    )
+                    reset_proc = subprocess.run([
                         "samtools", "reset",
                         "--thread", "15",
                         "--remove-tag", "fi,ri,fp,rp,ip,pw,HP,PS,PC",
-                        "-o", str(dest_path),
-                        str(temp_path)
-                    ], check=True)
-
-                    # Clean up temporary files
-                    temp_path.unlink()
+                        "-o", str(tmp_out),
+                        "-"
+                    ], stdin=cat_proc.stdout, check=True)
+                    cat_proc.stdout.close()
+                    cat_proc.wait()
+                    if cat_proc.returncode != 0:
+                        raise subprocess.CalledProcessError(cat_proc.returncode, "samtools cat")
+
+                    # Step 2/2: Copy finished BAM from local /tmp to NFS destination
+                    print(f"  Step 2/2: Copying local /tmp result to NFS destination...")
+                    shutil.copy2(str(tmp_out), str(dest_path))
+                finally:
+                    tmp_out.unlink(missing_ok=True)
 
                 new_sample["hifi_reads"] = [str(dest_path.absolute())]
             elif all_exist:
diff --git a/workflows/assembly/assembly_tasks.wdl b/workflows/assembly/assembly_tasks.wdl
index 2e5fcb52..e6fde86b 100755
--- a/workflows/assembly/assembly_tasks.wdl
+++ b/workflows/assembly/assembly_tasks.wdl
@@ -223,8 +223,8 @@ task pav {
   >>>
 
   output {
-    File? pav_vcf = "pav_outputs_~{sub(sample_name, "\\.", "_")}/~{sub(sample_name, "\\.", "_")}.vcf.gz"
-    File? pav_vcf_index = "pav_outputs_~{sub(sample_name, "\\.", "_")}/~{sub(sample_name, "\\.", "_")}.vcf.gz.tbi"
+    File? pav_vcf = "pav_outputs_~{sample_name}/~{sub(sample_name, "\\.", "_")}.vcf.gz"
+    File? pav_vcf_index = "pav_outputs_~{sample_name}/~{sub(sample_name, "\\.", "_")}.vcf.gz.tbi"
   }
 
   runtime {
diff --git a/workflows/edit-qc/bcftools_aux.wdl b/workflows/edit-qc/bcftools_aux.wdl
index 07911cbc..a1ad2fa9 100644
--- a/workflows/edit-qc/bcftools_aux.wdl
+++ b/workflows/edit-qc/bcftools_aux.wdl
@@ -116,8 +116,8 @@ task count_vcf_variants {
 
   runtime {
     docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: 1
-    memory: "2 GB"
+    cpu: 2
+    memory: "8 GB"
     disk: disk_size + " GB"
     disks: "local-disk " + disk_size + " HDD"
     preemptible: runtime_attributes.preemptible_tries
@@ -162,8 +162,83 @@ task count_tsv_rows {
 
   runtime {
     docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: 1
-    memory: "2 GB"
+    cpu: 2
+    memory: "8 GB"
+    disk: disk_size + " GB"
+    disks: "local-disk " + disk_size + " HDD"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+task bcftools_qual_filter {
+  meta {
+    description: "Filter variants to FILTER=PASS. Small variants and sawfish SVs additionally require GQ>=20 and, if AD is present, VAF>0.1. PAV assembly SVs (identified by presence of INFO/COV_PROP) skip the GQ filter and instead require all haplotype COV_PROP values >0.1."
+  }
+
+  parameter_meta {
+    vcf: { name: "Input VCF" }
+    out_prefix: { name: "Output file prefix" }
+    runtime_attributes: { name: "Runtime attribute structure" }
+    filtered_vcf: { name: "Filtered VCF" }
+    filtered_vcf_index: { name: "Filtered VCF index" }
+  }
+
+  input {
+    File vcf
+    String out_prefix
+    RuntimeAttributes runtime_attributes
+  }
+
+  Int disk_size = ceil(size(vcf, "GB") * 2 + 10)
+
+  command <<<
+    set -euo pipefail
+
+    bcftools --version
+
+    # PAV assembly SVs carry INFO/COV_PROP typed as String in their VCF header.
+    # Reheader to Float (or add the tag as Float if absent) so bcftools can
+    # evaluate MIN(INFO/COV_PROP) numerically.  Records without the tag remain
+    # absent (compared as ".") in either case.
+    if bcftools view -h ~{vcf} | grep -q '##INFO=<ID=COV_PROP'; then
+      bcftools view -h ~{vcf} | \
+        sed 's|##INFO=<ID=COV_PROP,Number=\.,Type=String|##INFO=<ID=COV_PROP,Number=.,Type=Float|' \
+        > fixed_header.txt
+    else
+      bcftools view -h ~{vcf} | grep -v '^#CHROM' > fixed_header.txt
+      echo '##INFO=<ID=COV_PROP,Number=.,Type=Float,Description="Assembly haplotype coverage proportion (PAV)">' \
+        >> fixed_header.txt
+      bcftools view -h ~{vcf} | grep '^#CHROM' >> fixed_header.txt
+    fi
+    bcftools reheader -h fixed_header.txt -o reheaded.vcf.gz ~{vcf}
+    bcftools index --tbi reheaded.vcf.gz
+
+    # Unified filter:
+    #   Non-PAV (INFO/COV_PROP absent): require GQ>=20; if AD present, VAF>0.1.
+    #   PAV (INFO/COV_PROP present):    require MIN(COV_PROP)>0.1; no GQ check.
+    bcftools view \
+      --output-type z \
+      -i '(FILTER="PASS" || FILTER=".") && (
+        (INFO/COV_PROP="." && GQ>=20 && (AD[0:0]="." || (AD[0:0]+AD[0:1])=0 || AD[0:1]/(AD[0:0]+AD[0:1]) > 0.1)) ||
+        (INFO/COV_PROP!="." && MIN(INFO/COV_PROP) > 0.1)
+      )' \
+      reheaded.vcf.gz \
+      -o ~{out_prefix}.vcf.gz
+    bcftools index --tbi ~{out_prefix}.vcf.gz
+  >>>
+
+  output {
+    File filtered_vcf       = "~{out_prefix}.vcf.gz"
+    File filtered_vcf_index = "~{out_prefix}.vcf.gz.tbi"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 4
+    memory: "8 GB"
     disk: disk_size + " GB"
     disks: "local-disk " + disk_size + " HDD"
     preemptible: runtime_attributes.preemptible_tries
@@ -182,6 +257,9 @@ task bcftools_merge_assembly_align {
     vcfs: {
       name: "VCFs"
     }
+    sample_id: {
+      name: "Sample ID to normalize VCF sample names to before merging"
+    }
     out_prefix: {
       name: "Output VCF name"
     }
@@ -190,7 +268,7 @@ task bcftools_merge_assembly_align {
     }
     merged_vcf: {
       name: "Merged VCF"
-    }  
+    }
     merged_vcf_index: {
       name: "Merged VCF index"
     }
@@ -198,6 +276,7 @@ task bcftools_merge_assembly_align {
 
   input {
     Array[File] vcfs
+    String sample_id
 
     String out_prefix
 
@@ -210,9 +289,11 @@ task bcftools_merge_assembly_align {
 
   command <<<
     set -euo pipefail
+    echo "~{sample_id}" > sample_name.txt
     for vcf in ~{sep=' ' vcfs}; do
-        cp "$vcf" ./
-        bcftools index --tbi "$(basename $vcf)"
+        base="$(basename $vcf)"
+        bcftools reheader -s sample_name.txt "$vcf" -o "$base"
+        bcftools index --tbi "$base"
     done
 
     bcftools --version
@@ -247,4 +328,4 @@ task bcftools_merge_assembly_align {
     awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
     zones: runtime_attributes.zones
   }
-}
\ No newline at end of file
+}
diff --git a/workflows/edit-qc/crispr_consensus_tasks.wdl b/workflows/edit-qc/crispr_consensus_tasks.wdl
index c7b06662..e45c58b3 100644
--- a/workflows/edit-qc/crispr_consensus_tasks.wdl
+++ b/workflows/edit-qc/crispr_consensus_tasks.wdl
@@ -2,531 +2,20 @@ version 1.1
 
 import "../wdl-common/wdl/structs.wdl"
 
-task create_sample_reference_consensus {
-  meta {
-    description: "Create sample-specific reference consensus for edit region using bcftools consensus with sample variants"
-  }
-
-  parameter_meta {
-    ref_fasta: "Reference genome FASTA"
-    ref_fasta_index: "Reference genome FASTA index"
-    small_variant_vcf: "Sample small variant VCF (phased)"
-    small_variant_vcf_index: "Index for small variant VCF"
-    sv_vcf: "Sample structural variant VCF (phased)"
-    sv_vcf_index: "Index for structural variant VCF"
-    crispr_edit_json: "JSON file describing CRISPR edit with target region"
-  }
-
-  input {
-    File ref_fasta
-    File ref_fasta_index
-    File small_variant_vcf
-    File small_variant_vcf_index
-    File sv_vcf
-    File sv_vcf_index
-    File crispr_edit_json
-    String sample_id
-    RuntimeAttributes runtime_attributes
-  }
-
-  String out_prefix = "~{sample_id}_sample_ref"
-  Int padding = 5000  # Add padding around edit region for better alignment context
-
-  command <<<
-    set -euxo pipefail
-
-    python3 <<'EOF'
-import json
-
-# Read CRISPR edit JSON to get target region
-with open("~{crispr_edit_json}", 'r') as f:
-    crispr_data = json.load(f)
-
-# Get first edit's target region
-edit = crispr_data['edits'][0]
-chr = edit['target']['chr']
-start = max(1, edit['target']['start'] - ~{padding})  # Add padding but don't go below 1
-end = edit['target']['end'] + ~{padding}
-symbol = edit['target']['symbol']
-
-# Write region info for bash
-with open('region.txt', 'w') as f:
-    f.write(f"{chr}:{start}-{end}\n")
-with open('symbol.txt', 'w') as f:
-    f.write(f"{symbol}\n")
-with open('chr.txt', 'w') as f:
-    f.write(f"{chr}\n")
-EOF
-
-    REGION=$(cat region.txt)
-    SYMBOL=$(cat symbol.txt)
-    CHR=$(cat chr.txt)
-
-    echo "Creating sample-specific reference for region: ${REGION}"
-
-    # Extract region from reference genome
-    samtools faidx ~{ref_fasta} "${REGION}" > region_ref.fasta
-
-    # Index the small variant VCF if needed and extract region
-    tabix -p vcf ~{small_variant_vcf} || true
-    bcftools view -r "${REGION}" ~{small_variant_vcf} -O z -o small_variants_region.vcf.gz
-    tabix -p vcf small_variants_region.vcf.gz
-
-    # Index the SV VCF if needed and extract region
-    tabix -p vcf ~{sv_vcf} || true
-    bcftools view -r "${REGION}" ~{sv_vcf} -O z -o sv_region.vcf.gz
-    tabix -p vcf sv_region.vcf.gz
-
-    # Merge small variants and SVs
-    bcftools concat -a -D small_variants_region.vcf.gz sv_region.vcf.gz -O z -o merged_variants.vcf.gz
-    tabix -p vcf merged_variants.vcf.gz
-
-    # Filter out symbolic alleles (except <DEL>, <*>, <NON_REF> which are supported)
-    # This removes alleles like <INV>, <DUP>, <CNV>, <INS>, etc.
-    bcftools view -e 'ALT~"<INV>" || ALT~"<DUP>" || ALT~"<CNV>" || ALT~"<INS>"' \
-      merged_variants.vcf.gz -O z -o merged_variants_filtered.vcf.gz
-    tabix -p vcf merged_variants_filtered.vcf.gz
-
-    echo "Filtered VCF stats:"
-    echo "  Total variants: $(bcftools view -H merged_variants.vcf.gz | wc -l)"
-    echo "  After filtering: $(bcftools view -H merged_variants_filtered.vcf.gz | wc -l)"
-
-    # Apply variants to reference using bcftools consensus
-    # This creates a sample-specific reference that includes the variants
-    cat region_ref.fasta | bcftools consensus merged_variants_filtered.vcf.gz > sample_ref_consensus.fasta
-
-    # Rename header to include sample info
-    python3 <<'EOF'
-symbol = open('symbol.txt').read().strip()
-region = open('region.txt').read().strip()
-
-with open('sample_ref_consensus.fasta', 'r') as f:
-    lines = f.readlines()
-
-# Replace header
-with open('~{out_prefix}_consensus.fasta', 'w') as out:
-    if lines:
-        out.write(f">{symbol}_sample_ref_{region}\n")
-        for line in lines[1:]:
-            out.write(line)
-EOF
-
-    # Index the consensus
-    samtools faidx ~{out_prefix}_consensus.fasta
-
-    echo "Sample-specific reference consensus created for ${SYMBOL}"
-  >>>
-
-  output {
-    File sample_ref_consensus_fasta = "~{out_prefix}_consensus.fasta"
-    File sample_ref_consensus_fasta_index = "~{out_prefix}_consensus.fasta.fai"
-  }
-
-  runtime {
-    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: 2
-    memory: "8 GB"
-    disk: "20 GB"
-    preemptible: runtime_attributes.preemptible_tries
-    maxRetries: runtime_attributes.max_retries
-    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
-    zones: runtime_attributes.zones
-  }
-}
-
-task extract_region_reads {
-  meta {
-    description: "Extract aligned reads from edit region preserving haplotype tags"
-  }
-
-  parameter_meta {
-    haplotagged_bam: "Haplotagged BAM file from phasing"
-    haplotagged_bam_index: "Index for haplotagged BAM"
-    crispr_edit_json: "JSON file describing CRISPR edit with target region"
-  }
-
-  input {
-    File haplotagged_bam
-    File haplotagged_bam_index
-    File crispr_edit_json
-    String sample_id
-    Int threads = 4
-    RuntimeAttributes runtime_attributes
-  }
-
-  String out_prefix = "~{sample_id}_region_reads"
-  Int padding = 5000  # Must match padding in create_sample_reference_consensus
-
-  command <<<
-    set -euxo pipefail
-
-    python3 <<'EOF'
-import json
-
-# Read CRISPR edit JSON to get target region
-with open("~{crispr_edit_json}", 'r') as f:
-    crispr_data = json.load(f)
-
-# Get first edit's target region
-edit = crispr_data['edits'][0]
-chr = edit['target']['chr']
-start = max(1, edit['target']['start'] - ~{padding})
-end = edit['target']['end'] + ~{padding}
-symbol = edit['target']['symbol']
-
-# Write region info for bash
-with open('region.txt', 'w') as f:
-    f.write(f"{chr}:{start}-{end}\n")
-with open('symbol.txt', 'w') as f:
-    f.write(f"{symbol}\n")
-EOF
-
-    REGION=$(cat region.txt)
-    SYMBOL=$(cat symbol.txt)
-
-    echo "Extracting reads from region: ${REGION}"
-
-    # Extract reads from the edit region (with haplotype tags preserved)
-    samtools view -h -@ ~{threads} ~{haplotagged_bam} "${REGION}" | \
-      samtools fastq -T HP,PS - | \
-      gzip > ~{out_prefix}.fastq.gz
-
-    # Count extracted reads
-    READ_COUNT=$(zcat ~{out_prefix}.fastq.gz | grep -c "^@" || echo 0)
-    echo "Extracted ${READ_COUNT} reads from ${REGION}"
-  >>>
-
-  output {
-    File region_reads_fastq = "~{out_prefix}.fastq.gz"
-  }
-
-  runtime {
-    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: threads
-    memory: "8 GB"
-    disk: "50 GB"
-    preemptible: runtime_attributes.preemptible_tries
-    maxRetries: runtime_attributes.max_retries
-    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
-    zones: runtime_attributes.zones
-  }
-}
-
-task minimap2_align_to_sample_reference {
-  meta {
-    description: "Align extracted reads to sample-specific reference using minimap2"
-  }
-
-  parameter_meta {
-    region_reads_fastq: "FASTQ reads extracted from edit region"
-    sample_ref_consensus_fasta: "Sample-specific reference consensus FASTA"
-  }
-
-  input {
-    File region_reads_fastq
-    File sample_ref_consensus_fasta
-    String sample_id
-    Int threads = 4
-    RuntimeAttributes runtime_attributes
-  }
-
-  String out_prefix = "~{sample_id}_realigned"
-
-  command <<<
-    set -euxo pipefail
-
-    # Align reads to sample-specific reference using minimap2
-    minimap2 -ax map-hifi -t ~{threads} -Y \
-      ~{sample_ref_consensus_fasta} \
-      ~{region_reads_fastq} > ~{out_prefix}.sam
-
-    echo "Alignment complete"
-  >>>
-
-  output {
-    File realigned_sam = "~{out_prefix}.sam"
-  }
-
-  runtime {
-    docker: "quay.io/biocontainers/minimap2@sha256:fdc9ef8bfbd31bab59a61b2e90dd226647deed971556175a4cd004f0bcdc7608"
-    cpu: threads
-    memory: "16 GB"
-    disk: "50 GB"
-    preemptible: runtime_attributes.preemptible_tries
-    maxRetries: runtime_attributes.max_retries
-    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
-    zones: runtime_attributes.zones
-  }
-}
-
-task sam_to_sorted_tagged_bam {
-  meta {
-    description: "Convert SAM to sorted BAM, generate stats, and transfer haplotype tags from original BAM"
-  }
-
-  parameter_meta {
-    sam_file: "SAM alignment file"
-    original_bam: "Original haplotagged BAM (source of HP/PS tags)"
-    original_bam_index: "Index for original BAM"
-    crispr_edit_json: "JSON file with region info"
-  }
-
-  input {
-    File sam_file
-    File original_bam
-    File original_bam_index
-    File crispr_edit_json
-    String sample_id
-    Int threads = 4
-    RuntimeAttributes runtime_attributes
-  }
-
-  String out_prefix = "~{sample_id}_realigned"
-  String tagged_prefix = "~{sample_id}_realigned_tagged"
-  Int padding = 5000
-
-  command <<<
-    set -euxo pipefail
-
-    # Step 1: Convert to BAM, sort, and index
-    samtools view -b -@ ~{threads} ~{sam_file} | \
-      samtools sort -@ ~{threads} -o ~{out_prefix}.bam
-
-    samtools index -@ ~{threads} ~{out_prefix}.bam
-
-    # Count aligned reads and generate stats
-    ALIGNED_COUNT=$(samtools view -c ~{out_prefix}.bam)
-    echo "Sorted ${ALIGNED_COUNT} reads"
-
-    samtools flagstat ~{out_prefix}.bam > ~{out_prefix}_flagstat.txt
-
-    # Step 2: Transfer haplotype tags from original BAM
-    python3 <<'EOF'
-import json
-import pysam
-
-# Read CRISPR edit JSON to get target region
-with open("~{crispr_edit_json}", 'r') as f:
-    crispr_data = json.load(f)
-
-edit = crispr_data['edits'][0]
-chr = edit['target']['chr']
-start = max(1, edit['target']['start'] - ~{padding})
-end = edit['target']['end'] + ~{padding}
-
-# Open BAMs
-original = pysam.AlignmentFile("~{original_bam}", "rb")
-realigned = pysam.AlignmentFile("~{out_prefix}.bam", "rb")
-
-# Create output BAM with same header as realigned
-output = pysam.AlignmentFile("~{tagged_prefix}.bam", "wb", template=realigned)
-
-# Build dict of read_name -> (HP, PS) from original BAM in the region
-tags_by_read = {}
-for read in original.fetch(chr, start, end):
-    hp_tag = read.get_tag("HP") if read.has_tag("HP") else None
-    ps_tag = read.get_tag("PS") if read.has_tag("PS") else None
-    if hp_tag is not None or ps_tag is not None:
-        tags_by_read[read.query_name] = (hp_tag, ps_tag)
-
-print(f"Extracted tags from {len(tags_by_read)} reads in original BAM")
-
-# Transfer tags to realigned reads
-transferred = 0
-for read in realigned.fetch():
-    if read.query_name in tags_by_read:
-        hp_tag, ps_tag = tags_by_read[read.query_name]
-        if hp_tag is not None:
-            read.set_tag("HP", hp_tag, value_type='i')
-        if ps_tag is not None:
-            read.set_tag("PS", ps_tag, value_type='i')
-        transferred += 1
-    output.write(read)
-
-print(f"Transferred tags to {transferred} reads in realigned BAM")
-
-original.close()
-realigned.close()
-output.close()
-EOF
-
-    # Index the tagged BAM
-    samtools index ~{tagged_prefix}.bam
-
-    echo "Combined SAM conversion, sorting, and tag transfer complete"
-  >>>
-
-  output {
-    File sorted_bam = "~{out_prefix}.bam"
-    File sorted_bam_index = "~{out_prefix}.bam.bai"
-    File alignment_stats = "~{out_prefix}_flagstat.txt"
-    File tagged_bam = "~{tagged_prefix}.bam"
-    File tagged_bam_index = "~{tagged_prefix}.bam.bai"
-  }
-
-  runtime {
-    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: threads
-    memory: "8 GB"
-    disk: "50 GB"
-    preemptible: runtime_attributes.preemptible_tries
-    maxRetries: runtime_attributes.max_retries
-    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
-    zones: runtime_attributes.zones
-  }
-}
-
-task create_haplotype_consensus_from_realigned {
-  meta {
-    description: "Create haplotype consensus sequences from reads realigned to sample-specific reference"
-  }
-
-  parameter_meta {
-    realigned_bam: "BAM file with reads realigned to sample-specific reference"
-    realigned_bam_index: "Index for realigned BAM"
-    sample_ref_consensus_fasta: "Sample-specific reference consensus FASTA (used as coordinate system)"
-    crispr_edit_json: "JSON file describing CRISPR edit with target region"
-  }
-
-  input {
-    File realigned_bam
-    File realigned_bam_index
-    File sample_ref_consensus_fasta
-    File crispr_edit_json
-    String sample_id
-    RuntimeAttributes runtime_attributes
-  }
-
-  String out_prefix = "~{sample_id}_haplotype_consensus"
-
-  command <<<
-    set -euxo pipefail
-
-    python3 <<'EOF'
-import json
-
-# Read CRISPR edit JSON to get target info
-with open("~{crispr_edit_json}", 'r') as f:
-    crispr_data = json.load(f)
-
-# Get first edit's metadata
-edit = crispr_data['edits'][0]
-symbol = edit['target']['symbol']
-
-# Write symbol for bash
-with open('symbol.txt', 'w') as f:
-    f.write(f"{symbol}\n")
-EOF
-
-    SYMBOL=$(cat symbol.txt)
-
-    echo "Creating haplotype consensus sequences for ${SYMBOL}"
-
-    # Get the sequence name from the sample reference (should be first/only sequence)
-    REF_NAME=$(head -n 1 ~{sample_ref_consensus_fasta} | cut -d' ' -f1 | sed 's/>//')
-    echo "Reference sequence name: ${REF_NAME}"
-
-    # Split BAM by haplotype tag (HP)
-    samtools split -d HP --output-fmt BAM --write-index -f "hap%#.%." -u "unphased.bam" ~{realigned_bam}
-
-    # Check if haplotype files were created and generate consensus for each
-    if [ -f "hap0.bam" ]; then
-      echo "Generating consensus for haplotype 1..."
-      samtools consensus -a -m simple --min-MQ 10 --min-BQ 10 -f fasta -X hifi hap0.bam > hap1_consensus.fasta || echo ">hap1_no_consensus" > hap1_consensus.fasta
-
-      # Get depth info
-      samtools depth hap0.bam | awk '{sum+=$3; count++} END {if(count>0) print "Haplotype 1 mean depth:", sum/count; else print "Haplotype 1 mean depth: 0"}' > hap1_depth.txt
-      cat hap1_depth.txt
-    else
-      echo "No haplotype 1 reads found"
-      echo ">hap1_no_consensus" > hap1_consensus.fasta
-      echo "Haplotype 1 mean depth: 0" > hap1_depth.txt
-    fi
-
-    if [ -f "hap1.bam" ]; then
-      echo "Generating consensus for haplotype 2..."
-      samtools consensus -a -m simple --min-MQ 10 --min-BQ 10 -f fasta -X hifi hap1.bam > hap2_consensus.fasta || echo ">hap2_no_consensus" > hap2_consensus.fasta
-
-      # Get depth info
-      samtools depth hap1.bam | awk '{sum+=$3; count++} END {if(count>0) print "Haplotype 2 mean depth:", sum/count; else print "Haplotype 2 mean depth: 0"}' > hap2_depth.txt
-      cat hap2_depth.txt
-    else
-      echo "No haplotype 2 reads found"
-      echo ">hap2_no_consensus" > hap2_consensus.fasta
-      echo "Haplotype 2 mean depth: 0" > hap2_depth.txt
-    fi
-
-    # Combine consensus sequences with informative headers
-    python3 <<'EOF'
-import sys
-
-symbol = open('symbol.txt').read().strip()
-
-# Combine haplotype consensus files
-with open('~{out_prefix}_combined.fasta', 'w') as out:
-    # Haplotype 1
-    with open('hap1_consensus.fasta', 'r') as f:
-        lines = f.readlines()
-        if len(lines) > 1:
-            seq = ''.join(line.strip() for line in lines[1:])
-            out.write(f">{symbol}_hap1_realigned\n{seq}\n")
-        else:
-            out.write(f">{symbol}_hap1_no_consensus\nN\n")
-
-    # Haplotype 2
-    with open('hap2_consensus.fasta', 'r') as f:
-        lines = f.readlines()
-        if len(lines) > 1:
-            seq = ''.join(line.strip() for line in lines[1:])
-            out.write(f">{symbol}_hap2_realigned\n{seq}\n")
-        else:
-            out.write(f">{symbol}_hap2_no_consensus\nN\n")
-
-print("Combined haplotype consensus sequences created")
-EOF
-
-    # Create summary file
-    echo "Haplotype Consensus Summary for ${SYMBOL}" > ~{out_prefix}_summary.txt
-    echo "==========================================" >> ~{out_prefix}_summary.txt
-    echo "" >> ~{out_prefix}_summary.txt
-    cat hap1_depth.txt >> ~{out_prefix}_summary.txt
-    cat hap2_depth.txt >> ~{out_prefix}_summary.txt
-    echo "" >> ~{out_prefix}_summary.txt
-    echo "Consensus sequences:" >> ~{out_prefix}_summary.txt
-    grep -c "^>" ~{out_prefix}_combined.fasta >> ~{out_prefix}_summary.txt || echo "0" >> ~{out_prefix}_summary.txt
-  >>>
-
-  output {
-    File haplotype_consensus_fasta = "~{out_prefix}_combined.fasta"
-    File consensus_summary = "~{out_prefix}_summary.txt"
-  }
-
-  runtime {
-    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
-    cpu: 2
-    memory: "8 GB"
-    disk: "20 GB"
-    preemptible: runtime_attributes.preemptible_tries
-    maxRetries: runtime_attributes.max_retries
-    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
-    zones: runtime_attributes.zones
-  }
-}
-
 task annotate_consensus_genbank {
   meta {
     description: "Create GenBank files annotating edit parts on haplotype consensus sequences"
   }
 
   parameter_meta {
-    parts_alignment_paf: "PAF file from minimap2 alignment of edit parts to consensus sequences"
+    parts_alignment_tsv: "Blastn tabular output from blastn_remap_parts (edit parts aligned to consensus sequences)"
     consensus_fasta: "FASTA file with haplotype consensus sequences"
     crispr_edit_json: "JSON file describing CRISPR edit structure"
     coverage_threshold: "Minimum coverage threshold for faithful alignments"
   }
 
   input {
-    File parts_alignment_paf
+    File parts_alignment_tsv
     File consensus_fasta
     File crispr_edit_json
     Float coverage_threshold = 0.8
@@ -562,37 +51,42 @@ from Bio.SeqFeature import SeqFeature, FeatureLocation
 # Type aliases
 Hit = Tuple[int, int, int, int, str, int]  # q_start, q_end, t_start, t_end, strand, n_match
 
-def parse_paf_line(line: str) -> Dict:
-    """Parse a single PAF format line"""
+def parse_blastn_line(line: str) -> Dict:
+    """Parse a single blastn tabular line (outfmt 6 qseqid qlen qstart qend sseqid slen sstart send nident mismatch gapopen gaps length sstrand bitscore)"""
     f = line.rstrip("\n").split("\t")
-    if len(f) < 12:
-        raise ValueError("PAF line has fewer than 12 mandatory columns")
+    if len(f) < 15:
+        raise ValueError(f"Expected >=15 columns, got {len(f)}")
+    sstart, send = int(f[6]), int(f[7])
     return dict(
         query_name=f[0], query_len=int(f[1]),
-        q_start=int(f[2]), q_end=int(f[3]), strand=f[4],
-        target_name=f[5], target_len=int(f[6]),
-        t_start=int(f[7]), t_end=int(f[8]),
-        n_match=int(f[9]), n_align=int(f[10]), mapq=int(f[11]),
+        q_start=int(f[2]) - 1, q_end=int(f[3]),
+        strand="+" if f[13] == "plus" else "-",
+        target_name=f[4], target_len=int(f[5]),
+        t_start=min(sstart, send) - 1, t_end=max(sstart, send),
+        n_match=int(f[8]), n_align=int(f[12]),
     )
 
-def load_faithful_alignments(paf_path: str, cov_thr: float) -> Tuple[Dict[str, Dict[str, List[Hit]]], Dict[str, int]]:
+def load_faithful_alignments(tsv_path: str, cov_thr: float) -> Tuple[Dict[str, Dict[str, List[Hit]]], Dict[str, int]]:
     """
-    Load only faithful alignments (high coverage, low gaps) from PAF.
+    Load only faithful alignments (high coverage, low gaps) from blastn tabular output.
     Returns: ({target_consensus_name: {query_part_name: [Hit, ...]}}, {query_name: length})
     """
     faithful = defaultdict(lambda: defaultdict(list))
     qlen = {}
 
-    with open(paf_path) as fh:
+    with open(tsv_path) as fh:
         for ln in fh:
-            if not ln.strip():
+            if not ln.strip() or ln.startswith('#'):
+                continue
+            try:
+                r = parse_blastn_line(ln)
+            except ValueError:
                 continue
-            r = parse_paf_line(ln)
             qlen.setdefault(r["query_name"], r["query_len"])
 
             # Calculate coverage and gap metrics
             cov_hit = r["n_match"] / r["query_len"]
-            gap_ok = abs(r["n_match"] - r["n_align"]) <= 0.05 * r["n_match"]
+            gap_ok = r["n_align"] - r["n_match"] <= 0.05 * r["n_match"]
 
             # Only keep faithful alignments
             if cov_hit >= cov_thr and gap_ok:
@@ -620,21 +114,33 @@ def load_edit_metadata(json_path: str) -> Dict:
         'symbol': edit['target']['symbol']
     }
 
-def create_source_and_gene_features(seq_len: int, metadata: Dict, haplotype: str) -> List[SeqFeature]:
+def parse_group_label(seq_id: str, description: str) -> str:
+    """
+    Extract a human-readable group label from MSA consensus FASTA headers.
+    Header format: '>group_{N} n_reads={K}' or '>group_{N}_singleton n_reads=1'
+    or '>group_{N}_hap_{L} n_reads={K}' for split haplotypes.
+    Falls back gracefully for anything else.
+    """
+    import re
+    m = re.match(r'(group_\d+(?:_(?:singleton|hap_\w+))?)', seq_id)
+    if m:
+        nr = ''
+        for token in description.split():
+            if token.startswith('n_reads='):
+                nr = token[8:]
+        parts = [m.group(1)]
+        if nr:
+            parts.append(f"n_reads={nr}")
+        return ' '.join(parts)
+    # Fallback
+    return seq_id
+
+def create_source_and_gene_features(seq_len: int, metadata: Dict, group_label: str) -> List[SeqFeature]:
     """
     Create source and gene features for the genomic context of the consensus sequence.
-
-    Args:
-        seq_len: Length of the consensus sequence
-        metadata: Edit metadata with genomic coordinates
-        haplotype: 'hap1' or 'hap2'
-
-    Returns:
-        List containing source and gene SeqFeature objects
     """
     features = []
 
-    # Source feature covering entire sequence
     source_feature = SeqFeature(
         location=FeatureLocation(0, seq_len, strand=+1),
         type="source",
@@ -644,12 +150,11 @@ def create_source_and_gene_features(seq_len: int, metadata: Dict, haplotype: str
             "db_xref": ["taxon:9606"],
             "chromosome": [metadata['chromosome'].replace('chr', '')],
             "map": [f"{metadata['start']}-{metadata['end']}"],
-            "note": [f"Consensus sequence from ~{sample_id} {haplotype}"]
+            "note": [f"Consensus from ~{sample_id} {group_label}"]
         }
     )
     features.append(source_feature)
 
-    # Gene feature covering entire sequence
     gene_feature = SeqFeature(
         location=FeatureLocation(0, seq_len, strand=+1),
         type="gene",
@@ -756,17 +261,11 @@ def annotate_consensus_sequences(faithful_alignments: Dict[str, Dict[str, List[H
         # Get consensus sequence
         original_rec = consensus_seqs[seq_id]
 
-        # Determine haplotype from sequence ID
-        if '_hap1' in seq_id:
-            haplotype = 'hap1'
-        elif '_hap2' in seq_id:
-            haplotype = 'hap2'
-        else:
-            haplotype = 'unknown'
+        group_label = parse_group_label(seq_id, original_rec.description)
 
         # Create source and gene features
         source_gene_features = create_source_and_gene_features(
-            len(original_rec.seq), edit_metadata, haplotype
+            len(original_rec.seq), edit_metadata, group_label
         )
 
         # Create edit part features
@@ -780,7 +279,7 @@ def annotate_consensus_sequences(faithful_alignments: Dict[str, Dict[str, List[H
             seq=original_rec.seq,
             id=original_rec.id,
             name=original_rec.id[:16],  # GenBank name max 16 chars
-            description=f"~{sample_id} {edit_metadata['symbol']} {haplotype} consensus {edit_metadata['chromosome']}:{edit_metadata['start']}-{edit_metadata['end']}",
+            description=f"~{sample_id} {edit_metadata['symbol']} {group_label} consensus {edit_metadata['chromosome']}:{edit_metadata['start']}-{edit_metadata['end']}",
             features=all_features,
             annotations={
                 "source": "~{sample_id}",
@@ -804,7 +303,7 @@ def main():
     parser = argparse.ArgumentParser(
         description="Create GenBank annotations for CRISPR edit parts on haplotype consensus sequences"
     )
-    parser.add_argument("paf", help="PAF alignment file")
+    parser.add_argument("tsv", help="Blastn tabular alignment file")
     parser.add_argument("fasta", help="FASTA file with consensus sequences")
     parser.add_argument("json", help="CRISPR edit JSON file")
     parser.add_argument("-c", "--coverage_threshold", type=float, default=0.8,
@@ -814,8 +313,8 @@ def main():
 
     args = parser.parse_args()
 
-    print("Loading faithful alignments from PAF...")
-    faithful_alignments, query_lens = load_faithful_alignments(args.paf, args.coverage_threshold)
+    print("Loading faithful alignments from blastn TSV...")
+    faithful_alignments, query_lens = load_faithful_alignments(args.tsv, args.coverage_threshold)
     print(f"Found {len(faithful_alignments)} consensus sequences with faithful alignments")
 
     print("Loading edit metadata...")
@@ -856,7 +355,7 @@ EOF
 
     # Run the annotation script
     python3 create_genbank_annotations.py \
-      ~{parts_alignment_paf} \
+      ~{parts_alignment_tsv} \
       ~{consensus_fasta} \
       ~{crispr_edit_json} \
       -c ~{coverage_threshold} \
diff --git a/workflows/edit-qc/crispr_edit.schema.json b/workflows/edit-qc/crispr_edit.schema.json
index 8b307efd..1e46159a 100644
--- a/workflows/edit-qc/crispr_edit.schema.json
+++ b/workflows/edit-qc/crispr_edit.schema.json
@@ -79,35 +79,43 @@
                 "type": "string",
                 "description": "right homology arm sequence"
               },
+              "donor_context_before": {
+                "type": "string",
+                "description": "optional donor sequence upstream of left_ha (e.g. plasmid backbone 5' of the HDR cassette); not used for edit validation but included when constructing the full donor sequence for knock-knock"
+              },
+              "donor_context_after": {
+                "type": "string",
+                "description": "optional donor sequence downstream of right_ha (e.g. plasmid backbone 3' of the HDR cassette); not used for edit validation but included when constructing the full donor sequence for knock-knock"
+              },
               "payload_components": {
                 "type": "array",
                 "items": {
                   "$ref": "#/$defs/NamedSequence"
                 }
-              },
-              "required": [
-                "type",
-                "strand",
-                "guide",
-                "left_ha",
-                "payload",
-                "right_ha"
-              ]
-            }
-          },
-          "required": [
-            "id",
-            "target",
-            "edit"
-          ]
-        }
+              }
+            },
+            "required": [
+              "type",
+              "strand",
+              "guide",
+              "left_ha",
+              "payload",
+              "right_ha"
+            ]
+          }
+        },
+        "required": [
+          "id",
+          "target",
+          "edit"
+        ]
       }
-    },
-    "required": [
-      "ref",
-      "edits"
-    ]
+    }
   },
+  "required": [
+    "ref",
+    "edits"
+  ],
   "$defs": {
     "NamedSequence": {
       "type": "object",
diff --git a/workflows/edit-qc/crispr_edit_consensus.wdl b/workflows/edit-qc/crispr_edit_consensus.wdl
index f1816a8e..2854871f 100644
--- a/workflows/edit-qc/crispr_edit_consensus.wdl
+++ b/workflows/edit-qc/crispr_edit_consensus.wdl
@@ -6,131 +6,55 @@ import "crispr_consensus_tasks.wdl" as CrisprConsensusTasks
 
 workflow crispr_edit_consensus {
   meta {
-    description: "Generate haplotype consensus sequences and annotate CRISPR edit parts in GenBank format using sample-specific reference"
+    description: "Annotate MSA-based group consensus sequences with CRISPR edit parts in GenBank format"
   }
 
   parameter_meta {
     sample_id: "Sample identifier"
-    haplotagged_bam: "Haplotagged BAM file from phasing"
-    haplotagged_bam_index: "Index for haplotagged BAM"
-    ref_fasta: "Reference genome FASTA"
-    ref_fasta_index: "Reference genome FASTA index"
-    small_variant_vcf: "Sample small variant VCF (phased)"
-    small_variant_vcf_index: "Index for small variant VCF"
-    sv_vcf: "Sample structural variant VCF (phased)"
-    sv_vcf_index: "Index for structural variant VCF"
-    crispr_edit_json: "JSON file describing expected CRISPR edit with target region"
+    consensus_fasta: "Per-group consensus FASTA from build_group_consensus (crispr_edit_qc output)"
+    crispr_edit_json: "JSON file describing expected CRISPR edit"
     coverage_threshold: "Minimum coverage threshold for faithful alignments (0-1)"
   }
 
   input {
     String sample_id
-    File haplotagged_bam
-    File haplotagged_bam_index
-    File ref_fasta
-    File ref_fasta_index
-    File small_variant_vcf
-    File small_variant_vcf_index
-    File sv_vcf
-    File sv_vcf_index
+    File consensus_fasta
     File crispr_edit_json
     Float coverage_threshold = 0.8
     RuntimeAttributes runtime_attributes
   }
 
-  # Step 1: Create sample-specific reference consensus using bcftools consensus
-  call CrisprConsensusTasks.create_sample_reference_consensus {
-    input:
-      ref_fasta = ref_fasta,
-      ref_fasta_index = ref_fasta_index,
-      small_variant_vcf = small_variant_vcf,
-      small_variant_vcf_index = small_variant_vcf_index,
-      sv_vcf = sv_vcf,
-      sv_vcf_index = sv_vcf_index,
-      crispr_edit_json = crispr_edit_json,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
-  }
-
-  # Step 2a: Extract reads from edit region
-  call CrisprConsensusTasks.extract_region_reads {
-    input:
-      haplotagged_bam = haplotagged_bam,
-      haplotagged_bam_index = haplotagged_bam_index,
-      crispr_edit_json = crispr_edit_json,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
-  }
-
-  # Step 2b: Align reads to sample-specific reference using minimap2
-  call CrisprConsensusTasks.minimap2_align_to_sample_reference {
-    input:
-      region_reads_fastq = extract_region_reads.region_reads_fastq,
-      sample_ref_consensus_fasta = create_sample_reference_consensus.sample_ref_consensus_fasta,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
-  }
-
-  # Step 2c: Convert SAM to sorted BAM and transfer haplotype tags
-  call CrisprConsensusTasks.sam_to_sorted_tagged_bam {
-    input:
-      sam_file = minimap2_align_to_sample_reference.realigned_sam,
-      original_bam = haplotagged_bam,
-      original_bam_index = haplotagged_bam_index,
-      crispr_edit_json = crispr_edit_json,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
-  }
-
-  # Step 3: Create haplotype consensus sequences from realigned reads
-  call CrisprConsensusTasks.create_haplotype_consensus_from_realigned {
-    input:
-      realigned_bam = sam_to_sorted_tagged_bam.tagged_bam,
-      realigned_bam_index = sam_to_sorted_tagged_bam.tagged_bam_index,
-      sample_ref_consensus_fasta = create_sample_reference_consensus.sample_ref_consensus_fasta,
-      crispr_edit_json = crispr_edit_json,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
-  }
-
-  # Step 4: Create FASTA with edit parts for alignment (including payload_components for detailed annotation)
+  # Build parts query FASTA (including payload components for detailed annotation)
   call CrisprEditTasks.create_parts_query_fasta {
     input:
-      crispr_edit_json = crispr_edit_json,
+      crispr_edit_json          = crispr_edit_json,
       include_payload_components = true,
-      runtime_attributes = runtime_attributes
+      runtime_attributes        = runtime_attributes
   }
 
-  # Step 5: Align edit parts to haplotype consensus sequences
-  call CrisprEditTasks.minimap2_remap_parts {
+  # Align edit parts to group consensus sequences
+  call CrisprEditTasks.blastn_remap_parts as blastn_remap_consensus_parts {
     input:
-      filtered_reads_fasta = create_haplotype_consensus_from_realigned.haplotype_consensus_fasta,
-      parts_query_fasta = create_parts_query_fasta.parts_query_fasta,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
+      filtered_reads_fasta = consensus_fasta,
+      parts_query_fasta    = create_parts_query_fasta.parts_query_fasta,
+      sample_id            = sample_id,
+      runtime_attributes   = runtime_attributes
   }
 
-  # Step 6: Create annotated GenBank files
+  # Annotate consensus sequences and emit GenBank
   call CrisprConsensusTasks.annotate_consensus_genbank {
     input:
-      parts_alignment_paf = minimap2_remap_parts.parts_alignment_paf,
-      consensus_fasta = create_haplotype_consensus_from_realigned.haplotype_consensus_fasta,
-      crispr_edit_json = crispr_edit_json,
-      coverage_threshold = coverage_threshold,
-      sample_id = sample_id,
-      runtime_attributes = runtime_attributes
+      parts_alignment_tsv = blastn_remap_consensus_parts.parts_alignment_tsv,
+      consensus_fasta     = consensus_fasta,
+      crispr_edit_json    = crispr_edit_json,
+      coverage_threshold  = coverage_threshold,
+      sample_id           = sample_id,
+      runtime_attributes  = runtime_attributes
   }
 
   output {
-    File sample_ref_consensus_fasta = create_sample_reference_consensus.sample_ref_consensus_fasta
-    File realigned_bam = sam_to_sorted_tagged_bam.tagged_bam
-    File realigned_bam_index = sam_to_sorted_tagged_bam.tagged_bam_index
-    File alignment_stats = sam_to_sorted_tagged_bam.alignment_stats
-    File haplotype_consensus_fasta = create_haplotype_consensus_from_realigned.haplotype_consensus_fasta
-    File consensus_summary = create_haplotype_consensus_from_realigned.consensus_summary
-    File parts_alignment_paf = minimap2_remap_parts.parts_alignment_paf
+    File parts_alignment_tsv = blastn_remap_consensus_parts.parts_alignment_tsv
     File genbank_annotations = annotate_consensus_genbank.genbank_annotations
-    File annotation_summary = annotate_consensus_genbank.annotation_summary
-    File region_reads_fastq = extract_region_reads.region_reads_fastq
+    File annotation_summary  = annotate_consensus_genbank.annotation_summary
   }
 }
diff --git a/workflows/edit-qc/crispr_edit_qc.wdl b/workflows/edit-qc/crispr_edit_qc.wdl
index 82c1a652..a7c26b8f 100644
--- a/workflows/edit-qc/crispr_edit_qc.wdl
+++ b/workflows/edit-qc/crispr_edit_qc.wdl
@@ -10,70 +10,187 @@ workflow crispr_edit_qc {
 
   parameter_meta {
     sample_id: "Sample identifier"
-    fasta_reads: "FASTA converted reads from upstream workflow"
+    fasta_reads: "FASTA converted reads from upstream workflow (whole genome; mutually exclusive with pre_filtered_reads_fasta)"
+    pre_filtered_reads_fasta: "Pre-filtered FASTA already restricted to the edit region (skips minimap2 filtering when provided)"
+    haplotagged_bam: "Optional whole-genome aligned BAM. When provided, reads with high-confidence primary alignments >offtarget_window_bp from the edit site are excluded before edit-part search; unmapped and low-MAPQ reads are retained."
+    haplotagged_bam_index: "Index for haplotagged_bam"
+    offtarget_window_bp: "Distance (bp) from edit target interval within which on-target primary alignments are kept"
+    offtarget_mapq_threshold: "Primary alignments with MAPQ <= this value are kept as low-confidence (not excluded as off-target)"
     crispr_edit_json: "JSON file describing expected CRISPR edit"
     min_identity: "Minimum alignment identity for initial filtering (0-1)"
     coverage_threshold: "Minimum coverage threshold for faithful alignments (0-1)"
-    threads: "Number of threads for minimap2"
+    threads: "Number of threads for minimap2 (initial read filtering step)"
+    clip_padding: "Flanking bases to retain around part-hit span when clipping reads for MSA"
+    gap_merge_threshold: "Max gap difference (bp) between two reads to merge into the same structural group"
+    wt_gap_threshold:    "Max gap (bp) between left_HA and right_HA to call WT_candidate (covers cut-site sequence)"
+    min_flank_bp:        "Minimum flanking bases beyond part hits required to call HDR or WT (vs inconclusive)"
+    min_part_hit_bp:     "Minimum query-span (bp) for a blastn hit to be counted; filters repetitive short matches"
   }
 
   input {
     String sample_id
-    File fasta_reads
+    File? fasta_reads
+    File? pre_filtered_reads_fasta
+    File? haplotagged_bam
+    File? haplotagged_bam_index
     File crispr_edit_json
     Float min_identity = 0.80
     Float coverage_threshold = 0.8
+    Int offtarget_window_bp = 500000
+    Int offtarget_mapq_threshold = 20
     Int threads = 8
+    Int clip_padding = 100
+    Int gap_merge_threshold = 5
+    Int wt_gap_threshold    = 20
+    Int min_flank_bp = 50
+    Int min_part_hit_bp = 100
     RuntimeAttributes runtime_attributes
   }
 
+  # ── Build parts query FASTA from edit JSON ────────────────────────────────
   call CrisprEditTasks.create_parts_query_fasta {
     input:
     crispr_edit_json = crispr_edit_json,
     runtime_attributes = runtime_attributes
   }
 
-  call CrisprEditTasks.minimap2_align_reads {
+  # ── Optional off-target read exclusion using whole-genome BAM ─────────────
+  if (!defined(pre_filtered_reads_fasta) && defined(haplotagged_bam)) {
+    call CrisprEditTasks.filter_offtarget_reads {
+      input:
+      haplotagged_bam       = select_first([haplotagged_bam]),
+      haplotagged_bam_index = select_first([haplotagged_bam_index]),
+      crispr_edit_json      = crispr_edit_json,
+      sample_id             = sample_id,
+      window_bp             = offtarget_window_bp,
+      mapq_threshold        = offtarget_mapq_threshold,
+      runtime_attributes    = runtime_attributes
+    }
+  }
+
+  # ── Initial read filter: minimap2 (skipped when pre_filtered_reads_fasta is supplied) ─
+  if (!defined(pre_filtered_reads_fasta)) {
+    File minimap2_input_fasta = select_first([filter_offtarget_reads.filtered_reads_fasta,
+                                              fasta_reads])
+    call CrisprEditTasks.minimap2_align_reads {
+      input:
+      fasta_reads = minimap2_input_fasta,
+      parts_query_fasta = create_parts_query_fasta.parts_query_fasta,
+      min_identity = min_identity,
+      threads = threads,
+      sample_id = sample_id,
+      runtime_attributes = runtime_attributes
+    }
+
+    call CrisprEditTasks.extract_reads {
+      input:
+      fasta_reads = minimap2_input_fasta,
+      matched_read_names = minimap2_align_reads.matched_read_names,
+      sample_id = sample_id,
+      runtime_attributes = runtime_attributes
+    }
+  }
+
+  File actual_filtered_fasta = select_first([pre_filtered_reads_fasta,
+                                             extract_reads.filtered_reads_fasta])
+
+  # ── Pass-1 blastn: map parts to filtered reads ────────────────────────────
+  call CrisprEditTasks.blastn_remap_parts as blastn_pass1 {
     input:
-    fasta_reads = fasta_reads,
+    filtered_reads_fasta = actual_filtered_fasta,
     parts_query_fasta = create_parts_query_fasta.parts_query_fasta,
-    min_identity = min_identity,
-    threads = threads,
+    sample_id = sample_id + "_pass1",
+    runtime_attributes = runtime_attributes
+  }
+
+  # ── Clip reads to union of part-hit spans ─────────────────────────────────
+  call CrisprEditTasks.clip_reads_to_parts {
+    input:
+    parts_alignment_tsv = blastn_pass1.parts_alignment_tsv,
+    filtered_reads_fasta = actual_filtered_fasta,
     sample_id = sample_id,
+    clip_padding = clip_padding,
+    min_part_hit_bp = min_part_hit_bp,
     runtime_attributes = runtime_attributes
   }
 
-  call CrisprEditTasks.extract_reads {
+  # ── Structural grouping: signature + prototype clustering ─────────────────
+  call CrisprEditTasks.structural_grouping {
     input:
-    fasta_reads = fasta_reads,
-    matched_read_names = minimap2_align_reads.matched_read_names,
+    parts_alignment_tsv = blastn_pass1.parts_alignment_tsv,
     sample_id = sample_id,
+    gap_merge_threshold = gap_merge_threshold,
+    min_part_hit_bp = min_part_hit_bp,
     runtime_attributes = runtime_attributes
   }
 
-  call CrisprEditTasks.minimap2_remap_parts {
+  # ── Per-group MSA and error-corrected consensus (three steps) ────────────
+  call CrisprEditTasks.prepare_group_fastas {
     input:
-    filtered_reads_fasta = extract_reads.filtered_reads_fasta,
-    parts_query_fasta = create_parts_query_fasta.parts_query_fasta,
+    clipped_reads_fasta = clip_reads_to_parts.clipped_reads_fasta,
+    groups_tsv = structural_grouping.groups_tsv,
     sample_id = sample_id,
     runtime_attributes = runtime_attributes
   }
 
-  call CrisprEditTasks.parse_edit_parts {
+  call CrisprEditTasks.run_group_msa {
     input:
-    parts_alignment_paf = minimap2_remap_parts.parts_alignment_paf,
+    group_inputs_tarball = prepare_group_fastas.group_inputs_tarball,
+    sample_id = sample_id,
+    runtime_attributes = runtime_attributes
+  }
+
+  call CrisprEditTasks.build_group_consensus {
+    input:
+    msa_tarball = run_group_msa.msa_tarball,
+    singletons_fasta = prepare_group_fastas.singletons_fasta,
+    groups_tsv = structural_grouping.groups_tsv,
+    sample_id = sample_id,
+    runtime_attributes = runtime_attributes
+  }
+
+  # ── Wide per-part coverage table (backwards-compatible) ──────────────────
+  call CrisprEditTasks.make_wide_coverage_table {
+    input:
+    parts_alignment_tsv = blastn_pass1.parts_alignment_tsv,
     coverage_threshold = coverage_threshold,
+    min_part_hit_bp = min_part_hit_bp,
+    sample_id = sample_id,
+    runtime_attributes = runtime_attributes
+  }
+
+  # ── Final categorization ──────────────────────────────────────────────────
+  call CrisprEditTasks.categorize_reads {
+    input:
+    filtered_reads_fasta = actual_filtered_fasta,
+    groups_tsv = structural_grouping.groups_tsv,
+    subhaps_tsv = build_group_consensus.subhaps_tsv,
+    pass2_alignment_tsv = blastn_pass1.parts_alignment_tsv,
     sample_id = sample_id,
+    gap_merge_threshold = gap_merge_threshold,
+    wt_gap_threshold    = wt_gap_threshold,
+    min_flank_bp = min_flank_bp,
+    min_part_hit_bp = min_part_hit_bp,
     runtime_attributes = runtime_attributes
   }
 
   output {
-    File? parts_query_fasta = create_parts_query_fasta.parts_query_fasta
-    File? filtered_reads_fasta = extract_reads.filtered_reads_fasta
-    File? parts_alignment_paf = minimap2_remap_parts.parts_alignment_paf
-    File? summary_table = parse_edit_parts.summary_table
-    File? faithful_table = parse_edit_parts.faithful_table
-    File? wide_table = parse_edit_parts.wide_table
-    File? wide_faithful_table = parse_edit_parts.wide_faithful_table
+    # Intermediate / diagnostic
+    File? parts_query_fasta      = create_parts_query_fasta.parts_query_fasta
+    File? filtered_reads_fasta   = extract_reads.filtered_reads_fasta
+    File? filtered_reads_fastq   = extract_reads.filtered_reads_fastq
+    File? offtarget_filter_counts_tsv = filter_offtarget_reads.counts_tsv
+    File? parts_alignment_tsv    = blastn_pass1.parts_alignment_tsv
+    File? clipped_reads_fasta    = clip_reads_to_parts.clipped_reads_fasta
+
+    # Per-part coverage table (wide faithful format, backwards-compatible)
+    File? wide_faithful_table    = make_wide_coverage_table.wide_faithful_table
+
+    # MSA outputs
+    File? msa_tarball            = build_group_consensus.all_msa_tarball
+    File? consensus_fasta        = build_group_consensus.consensus_fasta
+
+    # Primary result
+    File? categories_tsv         = categorize_reads.categories_tsv
   }
 }
diff --git a/workflows/edit-qc/crispr_edit_tasks.wdl b/workflows/edit-qc/crispr_edit_tasks.wdl
index cbb41216..a374173d 100644
--- a/workflows/edit-qc/crispr_edit_tasks.wdl
+++ b/workflows/edit-qc/crispr_edit_tasks.wdl
@@ -24,38 +24,32 @@ task create_parts_query_fasta {
   command <<<
     set -euxo pipefail
 
-    python3 <<EOF
+    python3 <<'PYEOF'
 import json
-import sys
 
-# Read CRISPR edit JSON
 with open("~{crispr_edit_json}", 'r') as f:
     crispr_data = json.load(f)
 
 include_components = ~{true="True" false="False" include_payload_components}
 
-# Create parts query FASTA
 with open("~{out_prefix}_parts_query.fasta", 'w') as fasta_out:
     for edit in crispr_data['edits']:
-        edit_id = edit['id'].replace(" ", "_")
-        left_ha = edit['edit']['left_ha']
-        payload = edit['edit']['payload']
+        edit_id  = edit['id'].replace(" ", "_")
+        left_ha  = edit['edit']['left_ha']
+        payload  = edit['edit']['payload']
         right_ha = edit['edit']['right_ha']
 
-        # Write each part as separate FASTA entry
         fasta_out.write(f">{edit_id}_left_HA\n{left_ha}\n")
         fasta_out.write(f">{edit_id}_payload\n{payload}\n")
         fasta_out.write(f">{edit_id}_right_HA\n{right_ha}\n")
         fasta_out.write(f">{edit_id}_HDR_template\n{left_ha}{payload}{right_ha}\n")
         fasta_out.write(f">{edit_id}_WT_template\n{left_ha}{right_ha}\n")
 
-        # Optionally include payload components for detailed annotation
         if include_components and 'payload_components' in edit['edit']:
             for component in edit['edit']['payload_components']:
                 comp_name = component['name'].replace(" ", "_")
-                comp_seq = component['seq']
-                fasta_out.write(f">{comp_name}\n{comp_seq}\n")
-EOF
+                fasta_out.write(f">{comp_name}\n{component['seq']}\n")
+PYEOF
 >>>
 
   output {
@@ -74,6 +68,99 @@ EOF
   }
 }
 
+task filter_offtarget_reads {
+  meta {
+    description: "Exclude reads with high-confidence primary alignments far from the edit site (off-target integrations). Keeps on-target (within window_bp of cut site), low-confidence (primary MAPQ<=threshold), and unmapped reads. Emits a gzipped FASTA and a counts report."
+  }
+
+  parameter_meta {
+    haplotagged_bam:       "Whole-genome aligned BAM (expected to include unmapped reads)"
+    haplotagged_bam_index: "Index for aligned BAM"
+    crispr_edit_json:      "JSON file describing CRISPR edit with target region"
+    window_bp:             "Keep high-confidence reads whose primary alignment start is within this many bp of the edit target interval (same chromosome)"
+    mapq_threshold:        "Primary alignments with MAPQ <= this value are treated as low-confidence and kept"
+  }
+
+  input {
+    File haplotagged_bam
+    File haplotagged_bam_index
+    File crispr_edit_json
+    String sample_id
+    Int window_bp = 500000
+    Int mapq_threshold = 20
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_ontarget_filter"
+  Float file_size = ceil(size(haplotagged_bam, "GB") * 2)
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import json
+with open("~{crispr_edit_json}") as fh:
+    data = json.load(fh)
+edit = data['edits'][0]
+chrom = edit['target']['chr']
+lo = max(1, edit['target']['start'] - ~{window_bp})
+hi = edit['target']['end'] + ~{window_bp}
+with open('region.env', 'w') as fh:
+    fh.write(f"CHR={chrom}\nLO={lo}\nHI={hi}\n")
+PYEOF
+    source region.env
+    echo "On-target window: ${CHR}:${LO}-${HI}"
+
+    # Single pass over primary records: classify and emit keep list + counts.
+    # Flag bit 4 (unmapped) detected via integer arithmetic for portable awk.
+    samtools view -F 2304 ~{haplotagged_bam} \
+      | awk -F'\t' -v OFS='\t' \
+            -v chr="${CHR}" -v lo="${LO}" -v hi="${HI}" -v q=~{mapq_threshold} '
+        {
+          flag = $2 + 0
+          unmapped = (int(flag/4) % 2)
+          if (unmapped)                        { u++; print $1 > "keep.txt"; next }
+          if (($5 + 0) <= q)                   { l++; print $1 > "keep.txt"; next }
+          if ($3 == chr && ($4+0) >= lo && ($4+0) <= hi) { o++; print $1 > "keep.txt"; next }
+          x++
+        }
+        END {
+          print "on_target",            o+0 > "counts.tsv"
+          print "low_confidence",       l+0 > "counts.tsv"
+          print "unmapped",             u+0 > "counts.tsv"
+          print "off_target_excluded",  x+0 > "counts.tsv"
+        }'
+
+    { echo -e "category\tread_count"; cat counts.tsv; } > ~{out_prefix}_counts.tsv
+    cat ~{out_prefix}_counts.tsv
+
+    # Deduplicate keep list (a read may appear on multiple primary records across references
+    # only when split — still a single primary, but guard anyway)
+    LC_ALL=C sort -u keep.txt > keep.uniq.txt
+    echo "Kept unique read names: $(wc -l < keep.uniq.txt)"
+
+    samtools view -h -N keep.uniq.txt -F 2304 ~{haplotagged_bam} \
+      | samtools fasta - \
+      | gzip -c > ~{out_prefix}.fasta.gz
+  >>>
+
+  output {
+    File filtered_reads_fasta = "~{out_prefix}.fasta.gz"
+    File counts_tsv           = "~{out_prefix}_counts.tsv"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 2
+    memory: "8 GB"
+    disk: file_size + " GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
 task minimap2_align_reads {
   meta {
     description: "Use minimap2 to identify ALL reads containing edit parts"
@@ -155,10 +242,31 @@ task extract_reads {
     # Count reads
     MATCHED_READS=$(grep -c '^>' ~{out_prefix}_filtered_reads.fasta || echo 0)
     echo "Reads matching edit parts: $MATCHED_READS"
+
+    # Convert FASTA to gzip FASTQ with synthetic Q40 quality scores (for tools requiring FASTQ)
+    python3 <<'PYEOF'
+import gzip
+name, buf = None, []
+with open("~{out_prefix}_filtered_reads.fasta") as fh, \
+     gzip.open("~{out_prefix}_filtered_reads.fastq.gz", 'wt') as out:
+    for line in fh:
+        line = line.rstrip()
+        if line.startswith('>'):
+            if name is not None:
+                seq = ''.join(buf)
+                out.write(f"@{name}\n{seq}\n+\n{'I' * len(seq)}\n")
+            name, buf = line[1:], []
+        else:
+            buf.append(line)
+    if name is not None:
+        seq = ''.join(buf)
+        out.write(f"@{name}\n{seq}\n+\n{'I' * len(seq)}\n")
+PYEOF
   >>>
 
   output {
     File filtered_reads_fasta = "~{out_prefix}_filtered_reads.fasta"
+    File filtered_reads_fastq = "~{out_prefix}_filtered_reads.fastq.gz"
   }
 
   runtime {
@@ -174,14 +282,101 @@ task extract_reads {
 }
 
 
-task minimap2_remap_parts {
+task extract_region_reads_fasta {
   meta {
-    description: "Remap edit parts to filtered reads with relaxed parameters"
+    description: "Extract reads from a haplotagged BAM overlapping the edit target region; output FASTA"
   }
 
   parameter_meta {
-    filtered_reads_fasta: "FASTA with reads containing edit parts"
-    parts_query_fasta: "FASTA file with edit parts sequences"
+    haplotagged_bam:       "Haplotagged BAM file from phasing"
+    haplotagged_bam_index: "Index for haplotagged BAM"
+    crispr_edit_json:      "JSON file describing CRISPR edit with target region"
+    padding:               "Bases to add on each side of the target region"
+  }
+
+  input {
+    File haplotagged_bam
+    File haplotagged_bam_index
+    File crispr_edit_json
+    String sample_id
+    Int padding = 10000
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_region_reads"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import json
+with open("~{crispr_edit_json}") as fh:
+    data = json.load(fh)
+edit = data['edits'][0]
+chrom = edit['target']['chr']
+start = max(1, edit['target']['start'] - ~{padding})
+end   = edit['target']['end'] + ~{padding}
+with open('region.txt', 'w') as fh:
+    fh.write(f"{chrom}:{start}-{end}\n")
+PYEOF
+
+    REGION=$(cat region.txt)
+    echo "Extracting reads from ${REGION}"
+
+    samtools view -h ~{haplotagged_bam} "${REGION}" | \
+      samtools fasta -F 2304 - \
+      > ~{out_prefix}.fasta
+
+    READ_COUNT=$(grep -c '^>' ~{out_prefix}.fasta || echo 0)
+    echo "Extracted ${READ_COUNT} reads"
+
+    # Synthetic Q40 FASTQ for tools that require FASTQ
+    python3 <<'PYEOF'
+import gzip
+name, buf = None, []
+with open("~{out_prefix}.fasta") as fh, \
+     gzip.open("~{out_prefix}.fastq.gz", 'wt') as out:
+    for line in fh:
+        line = line.rstrip()
+        if line.startswith('>'):
+            if name is not None:
+                seq = ''.join(buf)
+                out.write(f"@{name}\n{seq}\n+\n{'I' * len(seq)}\n")
+            name, buf = line[1:], []
+        else:
+            buf.append(line)
+    if name is not None:
+        seq = ''.join(buf)
+        out.write(f"@{name}\n{seq}\n+\n{'I' * len(seq)}\n")
+PYEOF
+  >>>
+
+  output {
+    File region_reads_fasta = "~{out_prefix}.fasta"
+    File region_reads_fastq = "~{out_prefix}.fastq.gz"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 2
+    memory: "4 GiB"
+    disk: "20 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+task blastn_remap_parts {
+  meta {
+    description: "Remap edit parts to filtered reads using blastn"
+  }
+
+  parameter_meta {
+    filtered_reads_fasta: "FASTA with reads containing edit parts (subject)"
+    parts_query_fasta: "FASTA file with edit parts sequences (query)"
   }
 
   input {
@@ -193,25 +388,155 @@ task minimap2_remap_parts {
 
   String out_prefix = "~{sample_id}_edit_parts"
 
+  # Tabular columns: query(part) coords, subject(read) coords, alignment stats, strand
+  # Coordinates are 1-based; strand is "plus"/"minus".
+  # word_size=32 prevents spurious 31 bp hits from XTEN-family repeat sequences
+  # that share k-mers with the human genome (validated on czML1116 EEA1 dataset).
+  # r=1 p=-2 go=2 ge=1: best CIGAR accuracy in ground-truth evaluation.
+  String blastn_outfmt = "6 qseqid qlen qstart qend sseqid slen sstart send nident mismatch gapopen gaps length sstrand bitscore"
+
   command <<<
     set -euxo pipefail
 
-    minimap2 -cx asm10 -N 1000 -p 0.5 \
-        ~{filtered_reads_fasta} ~{parts_query_fasta} \
-        > ~{out_prefix}_minimap2_crispr_parts.paf
+    blastn \
+      -query ~{parts_query_fasta} \
+      -subject ~{filtered_reads_fasta} \
+      -outfmt "~{blastn_outfmt}" \
+      -task blastn \
+      -word_size 32 \
+      -reward 1 \
+      -penalty -2 \
+      -gapopen 2 \
+      -gapextend 1 \
+      -evalue 1 \
+      -perc_identity 80 \
+      -max_target_seqs 10000 \
+      -parse_deflines \
+      > ~{out_prefix}_blastn_crispr_parts.tsv
+
+    echo "Total alignments: $(wc -l < ~{out_prefix}_blastn_crispr_parts.tsv)"
+  >>>
 
-    # Count alignments
-    echo "Total alignments: $(wc -l < ~{out_prefix}_minimap2_crispr_parts.paf)"
+  output {
+    File parts_alignment_tsv = "~{out_prefix}_blastn_crispr_parts.tsv"
+  }
+
+  runtime {
+    docker: "quay.io/biocontainers/blast@sha256:9dfc69f990c0aeb936276ee591ed32919a79f46dfa34060055e05a050a17959c"
+    cpu: 2
+    memory: "8 GiB"
+    disk: "20 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+
+task clip_reads_to_parts {
+  meta {
+    description: "Clip filtered reads to the union of all blastn part-hit spans plus padding"
+  }
+
+  parameter_meta {
+    parts_alignment_tsv: "Pass-1 blastn tabular output (from blastn_remap_parts)"
+    filtered_reads_fasta: "Uncompressed FASTA with reads overlapping edit parts"
+    clip_padding: "Bases to retain on each side of the union hit span (default 100)"
+    min_part_hit_bp: "Minimum query-span (bp) for a blastn hit to be counted; filters repetitive short matches"
+  }
+
+  input {
+    File parts_alignment_tsv
+    File filtered_reads_fasta
+    String sample_id
+    Int clip_padding = 100
+    Int min_part_hit_bp = 100
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_clipped"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import sys
+from collections import defaultdict
+
+# Parse blastn TSV: collect hit t-spans per read (all parts, all hits)
+# Columns: qseqid qlen qstart qend sseqid slen sstart send nident mismatch gapopen gaps length sstrand bitscore
+hit_spans  = defaultdict(list)   # read_name -> [(t_start, t_end), ...]
+read_lens  = {}
+
+MIN_HIT = ~{min_part_hit_bp}
+
+with open("~{parts_alignment_tsv}") as fh:
+    for ln in fh:
+        if not ln.strip() or ln.startswith('#'):
+            continue
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 15:
+            continue
+        if abs(int(f[3]) - int(f[2])) < MIN_HIT:
+            continue
+        read_name = f[4]
+        sstart, send = int(f[6]), int(f[7])
+        t_start = min(sstart, send) - 1   # 0-based half-open
+        t_end   = max(sstart, send)
+        hit_spans[read_name].append((t_start, t_end))
+        read_lens[read_name] = int(f[5])
+
+padding = ~{clip_padding}
+
+# Compute clip window per read
+clip_regions = {}
+for rname, spans in hit_spans.items():
+    rlen = read_lens[rname]
+    cs = max(0,    min(s[0] for s in spans) - padding)
+    ce = min(rlen, max(s[1] for s in spans) + padding)
+    clip_regions[rname] = (cs, ce)
+
+# Stream FASTA and clip
+n_written = 0
+name = None
+buf  = []
+
+with open("~{filtered_reads_fasta}") as fh, \
+     open("~{out_prefix}.fasta", 'w') as out:
+    def flush(name, buf):
+        global n_written
+        if name not in clip_regions:
+            return
+        seq = ''.join(buf)
+        cs, ce = clip_regions[name]
+        out.write(f">{name} clip={cs}-{ce} orig_len={len(seq)}\n{seq[cs:ce]}\n")
+        n_written += 1
+    for ln in fh:
+        ln = ln.rstrip('\n')
+        if ln.startswith('>'):
+            if name is not None:
+                flush(name, buf)
+            name = ln[1:].split()[0]
+            buf  = []
+        else:
+            buf.append(ln.upper())
+    if name is not None:
+        flush(name, buf)
+
+print(f"Clipped {n_written} reads (padding={padding} bp)")
+PYEOF
   >>>
 
   output {
-    File parts_alignment_paf = "~{out_prefix}_minimap2_crispr_parts.paf"
+    File clipped_reads_fasta = "~{out_prefix}.fasta"
   }
 
   runtime {
-    docker: "quay.io/biocontainers/minimap2@sha256:fdc9ef8bfbd31bab59a61b2e90dd226647deed971556175a4cd004f0bcdc7608"
-    cpu: 4
-    memory: "8 GB"
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 1
+    memory: "8 GiB"
     disk: "20 GB"
     preemptible: runtime_attributes.preemptible_tries
     maxRetries: runtime_attributes.max_retries
@@ -220,170 +545,1038 @@ task minimap2_remap_parts {
   }
 }
 
-task parse_edit_parts {
+
+task structural_grouping {
   meta {
-    description: "Parse PAF alignments to create summary tables with coverage metrics"
+    description: "Derive structural signatures from blastn part hits and cluster reads into groups"
   }
 
   parameter_meta {
-    parts_alignment_paf: "PAF file from minimap2 alignment of edit parts to reads"
-    coverage_threshold: "Minimum coverage threshold for faithful alignments"
+    parts_alignment_tsv:  "Pass-1 blastn tabular output"
+    gap_merge_threshold:  "Max gap difference (bp) between two reads to merge into the same structural group"
+    min_part_hit_bp:      "Minimum query-span (bp) for a blastn hit to be counted; filters repetitive short matches"
   }
 
   input {
-    File parts_alignment_paf
-    Float coverage_threshold = 0.8
+    File parts_alignment_tsv
     String sample_id
+    Int gap_merge_threshold = 5
+    Int min_part_hit_bp     = 100
     RuntimeAttributes runtime_attributes
   }
 
-  String out_prefix = "~{sample_id}_edit_parts_summary"
+  String out_prefix = "~{sample_id}_structural"
 
   command <<<
     set -euxo pipefail
 
-    cat > parse_paf_to_table.py <<'EOF'
-#!/usr/bin/env python3
-"""
-parse_paf_to_table.py  –  2025-07-03
+    python3 <<'PYEOF'
+from collections import defaultdict, Counter
+
+# ── Parse blastn TSV ──────────────────────────────────────────────────────────
+# Collect hits for individual parts only (left_HA, payload, right_HA).
+# Skip HDR_template and WT_template — they are composite sequences and their
+# hits are redundant with (and noisier than) the per-part hits.
+
+hits_by_read = defaultdict(list)   # read_name -> [(part_base, strand, t_start, t_end)]
+
+MIN_HIT = ~{min_part_hit_bp}
+
+with open("~{parts_alignment_tsv}") as fh:
+    for ln in fh:
+        if not ln.strip() or ln.startswith('#'):
+            continue
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 15:
+            continue
+        qname     = f[0]
+        read_name = f[4]
+        if abs(int(f[3]) - int(f[2])) < MIN_HIT:
+            continue
+        sstart, send = int(f[6]), int(f[7])
+        t_start = min(sstart, send) - 1
+        t_end   = max(sstart, send)
+        strand  = '+' if f[13] == 'plus' else '-'
+
+        if   qname.endswith('_left_HA'):    part_base = 'left_HA'
+        elif qname.endswith('_right_HA'):   part_base = 'right_HA'
+        elif qname.endswith('_payload'):    part_base = 'payload'
+        else:                               continue   # composite or unknown
+
+        hits_by_read[read_name].append((part_base, strand, t_start, t_end))
+
+# ── Build structural signature for each read ─────────────────────────────────
+# Signature: list of (part_base, strand) in t_start order; gaps between them.
+
+def build_sig(hits):
+    hits_s = sorted(hits, key=lambda h: h[2])   # sort by t_start
+    parts_strands = [(h[0], h[1]) for h in hits_s]
+    lengths = [h[3] - h[2] for h in hits_s]
+    gaps = [hits_s[i+1][2] - hits_s[i][3] for i in range(len(hits_s)-1)]
+    return parts_strands, gaps, lengths
+
+def sig_str(ps, gs, ls):
+    tokens = []
+    for i, (p, s) in enumerate(ps):
+        tokens.append(f"{p}:{s}:{ls[i]}")
+        if i < len(gs):
+            tokens.append(f"gap:{gs[i]}")
+    return '|'.join(tokens) if tokens else 'empty'
+
+# ── Cluster reads into structural groups ──────────────────────────────────────
+# Prototype-based greedy clustering.
+# Two reads join the same group iff:
+#   1. Identical ordered (part, strand) sequence
+#   2. |gap_i(A) - gap_i(B)| <= gap_merge_threshold for every gap position i
+
+GAP_THR = ~{gap_merge_threshold}
+
+prototypes  = defaultdict(list)   # pattern_key -> [(prototype_gaps, group_id)]
+group_id_ctr = 0
+read_groups  = {}                 # read_name -> (gid, sig_string)
+
+for read_name, hits in sorted(hits_by_read.items()):
+    ps, gs, ls = build_sig(hits)
+    key = tuple(ps)
+
+    matched_gid = None
+    for proto_gs, gid in prototypes[key]:
+        if len(proto_gs) == len(gs) and all(abs(pg - g) <= GAP_THR
+                                            for pg, g in zip(proto_gs, gs)):
+            matched_gid = gid
+            break
+
+    if matched_gid is None:
+        group_id_ctr += 1
+        matched_gid = group_id_ctr
+        prototypes[key].append((gs, matched_gid))
+
+    read_groups[read_name] = (matched_gid, sig_str(ps, gs, ls))
+
+# ── Write output ──────────────────────────────────────────────────────────────
+group_sizes = Counter(gid for gid, _ in read_groups.values())
+
+with open("~{out_prefix}_groups.tsv", 'w') as out:
+    out.write("read_name\tgroup_id\tsignature\tgroup_size\n")
+    for rname, (gid, sig) in sorted(read_groups.items()):
+        out.write(f"{rname}\t{gid}\t{sig}\t{group_sizes[gid]}\n")
+
+with open("~{out_prefix}_hits.tsv", 'w') as out:
+    out.write("read_name\tpart\tstrand\tt_start\tt_end\tmatch_len\n")
+    for rname, hits in sorted(hits_by_read.items()):
+        for part, strand, t_start, t_end in sorted(hits, key=lambda h: h[2]):
+            out.write(f"{rname}\t{part}\t{strand}\t{t_start}\t{t_end}\t{t_end - t_start}\n")
+
+print(f"Reads grouped : {len(read_groups)}")
+print(f"Groups formed : {group_id_ctr}")
+PYEOF
+  >>>
 
-Adds per-query coverage columns computed as ∑n_match / query_len
-----------------------------------------------------------------
+  output {
+    File groups_tsv = "~{out_prefix}_groups.tsv"
+    File hits_tsv   = "~{out_prefix}_hits.tsv"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 1
+    memory: "4 GiB"
+    disk: "10 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
 
-Long table header:
-  target_name target_len query_name query_len coverage num_hits hits
 
-Wide table header:
-  target_name target_len all_one_hit
-              <q1>_len <q1>_cov <q1>_num_hits <q1>_hit_coordinates
-              <q2>_len <q2>_cov <q2>_num_hits <q2>_hit_coordinates ...
-"""
+task prepare_group_fastas {
+  meta {
+    description: "Partition clipped reads into per-group input FASTAs ready for MAFFT; emit singletons separately"
+  }
 
-import argparse, csv
+  input {
+    File clipped_reads_fasta
+    File groups_tsv
+    String sample_id
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_msa"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import os, tarfile
 from collections import defaultdict
-from typing import Dict, List, Tuple
 
-# ──────────── type aliases ──────────── #
-Hit      = Tuple[int, int, int, int, str, int]          # … + n_match
-HitDict  = Dict[str, Dict[str, List[Hit]]]              # hits[target][query] = [Hit, …]
-LenDict  = Dict[str, int]
-
-# ──────────── parsing ──────────── #
-def parse_paf_line(line: str) -> Dict:
-    f = line.rstrip("\n").split("\t")
-    if len(f) < 12:
-        raise ValueError("PAF line has fewer than 12 mandatory columns")
-    return dict(
-        query_name=f[0],  query_len=int(f[1]),
-        q_start=int(f[2]), q_end=int(f[3]), strand=f[4],
-        target_name=f[5], target_len=int(f[6]),
-        t_start=int(f[7]), t_end=int(f[8]),
-        n_match=int(f[9]), n_align=int(f[10]), mapq=int(f[11]),
-    )
-
-def load_hits(paf_path: str, cov_thr: float):
-    hits, faithful = defaultdict(lambda: defaultdict(list)), defaultdict(lambda: defaultdict(list))
-    tlen, qlen = {}, {}
-    with open(paf_path) as fh:
+groups = defaultdict(list)
+
+with open("~{groups_tsv}") as fh:
+    fh.readline()
+    for ln in fh:
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 4:
+            continue
+        rname, gid = f[0], int(f[1])
+        groups[gid].append(rname)
+
+reads = {}
+name = None
+buf  = []
+with open("~{clipped_reads_fasta}") as fh:
+    for ln in fh:
+        ln = ln.rstrip('\n')
+        if ln.startswith('>'):
+            if name is not None:
+                reads[name] = ''.join(buf)
+            name = ln[1:].split()[0]
+            buf  = []
+        else:
+            buf.append(ln.upper())
+    if name is not None:
+        reads[name] = ''.join(buf)
+
+os.makedirs("msa_inputs", exist_ok=True)
+singletons_out = open("~{out_prefix}_singletons.fasta", 'w')
+
+for gid in sorted(groups):
+    members = [r for r in groups[gid] if r in reads]
+    if not members:
+        continue
+    if len(members) == 1:
+        seq = reads[members[0]]
+        singletons_out.write(f">group_{gid}_singleton:{members[0]}\n{seq}\n")
+        continue
+    with open(f"msa_inputs/group_{gid}_input.fasta", 'w') as gf:
+        for r in members:
+            gf.write(f">{r}\n{reads[r]}\n")
+
+singletons_out.close()
+
+with tarfile.open("~{out_prefix}_inputs.tar.gz", "w:gz") as tar:
+    tar.add("msa_inputs", arcname="msa_inputs")
+
+print(f"Groups written: {sum(1 for gid in groups if len([r for r in groups[gid] if r in reads]) > 1)} multi-read, "
+      f"{sum(1 for gid in groups if len([r for r in groups[gid] if r in reads]) == 1)} singleton")
+PYEOF
+  >>>
+
+  output {
+    File group_inputs_tarball = "~{out_prefix}_inputs.tar.gz"
+    File singletons_fasta     = "~{out_prefix}_singletons.fasta"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 1
+    memory: "8 GiB"
+    disk: "20 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+task run_group_msa {
+  meta {
+    description: "Run MAFFT --auto on each per-group input FASTA; pure bash, no Python"
+  }
+
+  input {
+    File group_inputs_tarball
+    String sample_id
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_msa"
+
+  command <<<
+    set -euxo pipefail
+
+    mkdir msa_inputs msa_outputs
+    tar -xzf ~{group_inputs_tarball} -C msa_inputs --strip-components=1
+
+    shopt -s nullglob
+    for input_fasta in msa_inputs/group_*_input.fasta; do
+      gid=$(basename "$input_fasta" _input.fasta | sed 's/^group_//')
+      mafft --auto --quiet "$input_fasta" > "msa_outputs/group_${gid}.fasta"
+    done
+
+    tar -czf ~{out_prefix}_msa.tar.gz -C msa_outputs .
+    echo "MAFFT done: $(ls msa_outputs/*.fasta 2>/dev/null | wc -l) groups aligned"
+  >>>
+
+  output {
+    File msa_tarball = "~{out_prefix}_msa.tar.gz"
+  }
+
+  runtime {
+    docker: "quay.io/biocontainers/mafft@sha256:b8ccc402a9156868fefda5caa8693f847b47dcc69aae848557f7115850e60790"
+    cpu: 4
+    memory: "16 GiB"
+    disk: "50 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+task build_group_consensus {
+  meta {
+    description: "Parse per-group MSA FASTAs; build error-corrected consensus; emit group metadata"
+  }
+
+  parameter_meta {
+    mask_min_support:  "Mask consensus positions where plurality support <= this AND coverage >= mask_min_coverage"
+    mask_min_coverage: "Coverage threshold for masking (see mask_min_support)"
+  }
+
+  input {
+    File msa_tarball
+    File singletons_fasta
+    File groups_tsv
+    String sample_id
+    Int mask_min_support  = 2
+    Int mask_min_coverage = 5
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_msa"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import os, re, tarfile
+from collections import defaultdict
+
+# ── Load group metadata ───────────────────────────────────────────────────────
+groups         = defaultdict(list)
+group_strand   = {}   # gid -> '+' or '-'
+group_category = {}   # gid -> 'HDR'|'WT'|'partial'|'misintegration'|'other'
+
+_FWD_HDR_PARTS = [('left_HA', '+'), ('payload', '+'), ('right_HA', '+')]
+_REV_HDR_PARTS = [('right_HA', '-'), ('payload', '-'), ('left_HA', '-')]
+
+def _classify_sig(sig):
+    """Classify a group signature string into HDR / WT / partial / misintegration.
+    Signature format: '{part}:{strand}:{len}|gap:{bp}|...'. Structural only — does
+    not check gap sizes or flanking (partial consensus may represent a real
+    haplotype for inclusion decisions even if reads are truncated)."""
+    pairs = []
+    for tok in sig.split('|'):
+        if tok.startswith('gap:') or tok == 'empty':
+            continue
+        parts = tok.split(':')
+        if len(parts) >= 2:
+            pairs.append((parts[0], parts[1]))
+    if not pairs:
+        return 'other'
+    strands = {s for _, s in pairs}
+    if len(strands) != 1:
+        return 'misintegration'
+    parts_only = [p for p, _ in pairs]
+    strand = next(iter(strands))
+    if strand == '+':
+        if parts_only == ['left_HA', 'payload', 'right_HA']:
+            return 'HDR'
+        if parts_only == ['left_HA', 'right_HA']:
+            return 'WT'
+        if parts_only in (['left_HA', 'payload'], ['payload', 'right_HA'],
+                          ['left_HA'], ['payload'], ['right_HA']):
+            return 'partial'
+    else:
+        if parts_only == ['right_HA', 'payload', 'left_HA']:
+            return 'HDR'
+        if parts_only == ['right_HA', 'left_HA']:
+            return 'WT'
+        if parts_only in (['payload', 'left_HA'], ['right_HA', 'payload'],
+                          ['left_HA'], ['payload'], ['right_HA']):
+            return 'partial'
+    return 'misintegration'
+
+with open("~{groups_tsv}") as fh:
+    fh.readline()
+    for ln in fh:
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 4:
+            continue
+        rname, gid, sig = f[0], int(f[1]), f[2]
+        groups[gid].append(rname)
+        if gid not in group_strand:
+            m = re.search(r':([+-]):', sig)
+            if m:
+                group_strand[gid] = m.group(1)
+        if gid not in group_category:
+            group_category[gid] = _classify_sig(sig)
+
+def header_tags(gid):
+    tags = []
+    if gid in group_strand:
+        tags.append(f"strand={group_strand[gid]}")
+    if gid in group_category:
+        tags.append(f"category={group_category[gid]}")
+    return (' ' + ' '.join(tags)) if tags else ''
+
+MASK_SUPP = ~{mask_min_support}
+MASK_COV  = ~{mask_min_coverage}
+
+os.makedirs("msa_groups", exist_ok=True)
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+def plurality_consensus(seqs_dict):
+    """Plurality-vote consensus with N-masking over {name: msa_seq}.
+    Gaps are treated as votes for deletion: if gaps are the plurality at a
+    column, that position is omitted from the consensus (correct for a
+    haplotype where most reads carry a deletion or the column is an insertion
+    in a minority of reads).
+    """
+    width = len(next(iter(seqs_dict.values())))
+    bases = []
+    for col in range(width):
+        col_bases = [s[col] for s in seqs_dict.values()]
+        gap_count = col_bases.count('-')
+        non_gap   = [b for b in col_bases if b != '-']
+        if not non_gap or gap_count >= len(non_gap):
+            continue  # all-gap or gap-plurality column → omit from consensus
+        freq = {}
+        for b in non_gap:
+            freq[b] = freq.get(b, 0) + 1
+        best_base, best_count = max(freq.items(), key=lambda x: x[1])
+        bases.append('N' if best_count <= MASK_SUPP and len(non_gap) >= MASK_COV
+                     else best_base)
+    return ''.join(bases)
+
+def hamming(v1, v2):
+    # Gap vs base counts as a mismatch; only N positions are skipped
+    return sum(1 for a, b in zip(v1, v2)
+               if 'N' not in (a, b) and a != b)
+
+def retained_columns(seqs_dict):
+    """Return MSA column indices that plurality_consensus keeps (non-gap-plurality).
+    Used to restrict haplotype clustering to error-corrected positions only."""
+    width = len(next(iter(seqs_dict.values())))
+    cols = []
+    for col in range(width):
+        col_bases = [s[col] for s in seqs_dict.values()]
+        gap_count = col_bases.count('-')
+        non_gap   = [b for b in col_bases if b != '-']
+        if non_gap and gap_count < len(non_gap):
+            cols.append(col)
+    return cols
+
+def cluster_haplotypes(msa_seqs, cols=None):
+    """
+    Detect variable sites (>= 2 distinct non-N alleles — including '-' for
+    indels — each in >= 2 reads) and cluster reads by identical allele vector
+    (Hamming = 0, skipping N positions only).  Returns {read_name: hap_label}
+    where label is 'A', 'B', etc.  Returns all 'A' if no variable sites exist
+    or any resulting cluster has fewer than 2 reads.
+
+    cols: restrict variable-site search to these MSA column indices (pass-1
+    error-corrected columns).  Defaults to all columns.
+    """
+    rnames = list(msa_seqs.keys())
+    width  = len(next(iter(msa_seqs.values())))
+    if cols is None:
+        cols = range(width)
+
+    var_cols = []
+    for col in cols:
+        freq = {}
+        for r in rnames:
+            b = msa_seqs[r][col]
+            if b != 'N':                   # gaps count as a real allele
+                freq[b] = freq.get(b, 0) + 1
+        if sum(1 for cnt in freq.values() if cnt >= 2) >= 2:
+            var_cols.append(col)
+
+    if not var_cols:
+        return {r: 'A' for r in rnames}
+
+    vecs = {r: tuple(msa_seqs[r][col] for col in var_cols) for r in rnames}
+    clusters = []
+    for r in rnames:
+        for cluster in clusters:
+            if hamming(vecs[r], vecs[next(iter(cluster))]) == 0:
+                cluster.add(r)
+                break
+        else:
+            clusters.append({r})
+
+    # Require every cluster to have >= 2 reads; otherwise the split is unreliable
+    if any(len(c) < 2 for c in clusters):
+        return {r: 'A' for r in rnames}
+
+    labels = [chr(65 + i) for i in range(len(clusters))]
+    return {r: labels[i] for i, cluster in enumerate(clusters) for r in cluster}
+
+# ── Sub-haplotype assignments (written at end) ────────────────────────────────
+subhap_assignments = {}   # read_name -> "group_{gid}_hap_{label}"
+
+# ── Singletons pass-through ───────────────────────────────────────────────────
+name = None
+buf  = []
+
+with open("~{out_prefix}_consensus.fasta", 'w') as consensus_out, \
+     open("~{out_prefix}_group_meta.tsv",  'w') as group_meta_out:
+    group_meta_out.write("group_id\tn_reads\thas_consensus\n")
+
+    def flush_singleton(header, buf):
+        tag = header.split()[0]   # group_{gid}_singleton:{read_name}
+        try:
+            gid = int(tag.split('_')[1])
+        except ValueError:
+            return
+        seq       = ''.join(buf)
+        read_name = tag.split(':')[1] if ':' in tag else tag
+        consensus_out.write(f">group_{gid}_singleton n_reads=1{header_tags(gid)}\n{seq}\n")
+        with open(f"msa_groups/group_{gid}.fasta", 'w') as mf:
+            mf.write(f">{read_name}\n{seq}\n")
+        group_meta_out.write(f"{gid}\t1\tFALSE\n")
+        subhap_assignments[read_name] = f"group_{gid}_hap_A"
+
+    with open("~{singletons_fasta}") as fh:
         for ln in fh:
-            if not ln.strip(): continue
-            r = parse_paf_line(ln)
-            tlen.setdefault(r["target_name"], r["target_len"])
-            qlen.setdefault(r["query_name"],  r["query_len"])
-            hit: Hit = (
-                r["q_start"], r["q_end"],
-                r["t_start"], r["t_end"],
-                r["strand"],  r["n_match"]
-            )
-            hits[r["target_name"]][r["query_name"]].append(hit)
-
-            cov_hit = r["n_match"] / r["query_len"]
-            gap_ok  = abs(r["n_match"] - r["n_align"]) <= 0.05 * r["n_match"]
-            if cov_hit >= cov_thr and gap_ok:
-                faithful[r["target_name"]][r["query_name"]].append(hit)
-    return hits, faithful, tlen, qlen
-
-# ──────────── writers ──────────── #
-def _aggregate_coverage(hit_list: List[Hit], q_len: int) -> float:
-    total_nm = sum(h[5] for h in hit_list)
-    return min(1.0, total_nm / q_len) if q_len else 0.0
-
-def write_long(h: HitDict, tlen: LenDict, qlen: LenDict, path: str):
-    with open(path,"w",newline="") as f:
-        w = csv.writer(f, delimiter="\t")
-        w.writerow(["target_name","target_len",
-                    "query_name","query_len","coverage",
-                    "num_hits","hits"])
-        for tgt in sorted(h):
-            for qry in sorted(h[tgt]):
-                hl   = h[tgt][qry]
-                cov  = _aggregate_coverage(hl, qlen[qry])
-                hits_str = ";".join(f"{qs}-{qe}:{ts}-{te}:{s}"
-                                    for qs,qe,ts,te,s,_ in hl)
-                w.writerow([tgt, tlen[tgt],
-                            qry, qlen[qry], f"{cov:.3f}",
-                            len(hl), hits_str])
-
-def write_wide(h: HitDict, tlen: LenDict, qlen: LenDict, path: str):
-    all_q = sorted({q for t in h.values() for q in t})
-    header=["target_name","target_len","all_one_hit"]
-    for q in all_q:
-        header.extend([f"{q}_len", f"{q}_cov", f"{q}_num_hits", f"{q}_hit_coordinates"])
-    with open(path,"w",newline="") as f:
-        w=csv.writer(f,delimiter="\t"); w.writerow(header)
-        for tgt in sorted(h):
-            qdict=h[tgt]
-            num_hits=[len(qdict[q]) if q in qdict else 0 for q in all_q]
-            all_one=all(n==1 for n in num_hits)
-            row=[tgt, tlen[tgt], "TRUE" if all_one else "FALSE"]
-            for q,n in zip(all_q,num_hits):
-                row.append(qlen[q])                 # <query>_len
-                if n:
-                    cov=_aggregate_coverage(qdict[q], qlen[q])
-                    hits_str=";".join(f"{qs}-{qe}:{ts}-{te}:{s}"
-                                      for qs,qe,ts,te,s,_ in qdict[q])
-                    row.extend([f"{cov:.3f}", str(n), hits_str])
+            ln = ln.rstrip('\n')
+            if ln.startswith('>'):
+                if name is not None:
+                    flush_singleton(name, buf)
+                name = ln[1:]
+                buf  = []
+            else:
+                buf.append(ln.upper())
+        if name is not None:
+            flush_singleton(name, buf)
+
+    # ── Multi-read groups: parse MSA, sub-haplotype, build consensus ─────────
+    with tarfile.open("~{msa_tarball}") as tar:
+        for member in tar.getmembers():
+            bn = os.path.basename(member.name)
+            if not bn.endswith('.fasta'):
+                continue
+            try:
+                gid = int(bn.replace('group_', '').replace('.fasta', ''))
+            except ValueError:
+                continue
+
+            fobj = tar.extractfile(member)
+            if not fobj:
+                continue
+
+            msa_seqs = {}
+            mname, mbuf = None, []
+            for ln in fobj.read().decode('utf-8').splitlines():
+                if ln.startswith('>'):
+                    if mname is not None:
+                        msa_seqs[mname] = ''.join(mbuf)
+                    mname = ln[1:]
+                    mbuf  = []
                 else:
-                    row.extend(["", "", ""])        # cov, num_hits, coords blank
-            w.writerow(row)
-
-# ──────────── CLI ──────────── #
-def main():
-    ap=argparse.ArgumentParser(description="PAF summary tables with coverage")
-    ap.add_argument("paf"); ap.add_argument("-o","--output",default="paf_target_summary.tsv")
-    ap.add_argument("-c","--coverage_threshold",type=float,default=0.8)
-    a=ap.parse_args()
-    hits,faithful,tlen,qlen=load_hits(a.paf,a.coverage_threshold)
-    write_long(hits,tlen,qlen,a.output)
-    write_long(faithful,tlen,qlen,a.output.replace(".tsv","_faithful.tsv"))
-    write_wide(hits,tlen,qlen,a.output.replace(".tsv","_wide.tsv"))
-    write_wide(faithful,tlen,qlen,a.output.replace(".tsv","_wide_faithful.tsv"))
-    print("✓ tables with coverage written")
-
-if __name__=="__main__":
-    main()
-EOF
-
-    chmod +x parse_paf_to_table.py
-
-    # Run the parser
-    python3 parse_paf_to_table.py \
-      ~{parts_alignment_paf} \
-      -o ~{out_prefix}.tsv \
-      -c ~{coverage_threshold}
+                    mbuf.append(ln.upper())
+            if mname is not None:
+                msa_seqs[mname] = ''.join(mbuf)
+
+            if not msa_seqs:
+                group_meta_out.write(f"{gid}\t{len(groups[gid])}\tFALSE\n")
+                continue
+
+            with open(f"msa_groups/group_{gid}.fasta", 'w') as mf:
+                for rname, seq in msa_seqs.items():
+                    mf.write(f">{rname}\n{seq}\n")
+
+            # Pass 1: identify error-corrected columns (indel noise filtered by
+            # plurality thresholds), then cluster haplotypes only on those columns.
+            kept_cols  = retained_columns(msa_seqs)
+            hap_map    = cluster_haplotypes(msa_seqs, kept_cols)  # read -> 'A', 'B', ...
+            hap_labels = sorted(set(hap_map.values()))
+            for r, lbl in hap_map.items():
+                subhap_assignments[r] = f"group_{gid}_hap_{lbl}"
+
+            if len(hap_labels) > 1:
+                # Emit one consensus per haplotype
+                for lbl in hap_labels:
+                    hap_seqs = {r: s for r, s in msa_seqs.items()
+                                if hap_map[r] == lbl}
+                    consensus = plurality_consensus(hap_seqs)
+                    consensus_out.write(
+                        f">group_{gid}_hap_{lbl} n_reads={len(hap_seqs)}{header_tags(gid)}\n"
+                        f"{consensus}\n")
+                group_meta_out.write(f"{gid}\t{len(msa_seqs)}\tTRUE\n")
+                continue
+
+            consensus = plurality_consensus(msa_seqs)
+            consensus_out.write(
+                f">group_{gid} n_reads={len(msa_seqs)}{header_tags(gid)}\n{consensus}\n")
+            group_meta_out.write(f"{gid}\t{len(msa_seqs)}\tTRUE\n")
+
+# ── Write sub-haplotype assignments ──────────────────────────────────────────
+read_to_group = {r: gid for gid, rlist in groups.items() for r in rlist}
+with open("~{out_prefix}_subhaps.tsv", 'w') as out:
+    out.write("read_name\tgroup_id\tsub_haplotype\n")
+    for r, hap in sorted(subhap_assignments.items()):
+        out.write(f"{r}\t{read_to_group.get(r, 'NA')}\t{hap}\n")
+
+# Tarball per-group MSA files (for diagnostic download)
+with tarfile.open("~{out_prefix}_all_msa.tar.gz", "w:gz") as tar:
+    tar.add("msa_groups", arcname="msa_groups")
+
+n_multi = sum(1 for gid in set(read_to_group[r] for r in subhap_assignments)
+              if len({subhap_assignments[r]
+                      for r in subhap_assignments if read_to_group.get(r) == gid}) > 1)
+print(f"Consensus build complete; {len(subhap_assignments)} reads assigned sub-haplotypes, "
+      f"{n_multi} group(s) split by haplotype")
+PYEOF
   >>>
 
   output {
-    File summary_table = "~{out_prefix}.tsv"
-    File faithful_table = "~{out_prefix}_faithful.tsv" 
-    File wide_table = "~{out_prefix}_wide.tsv"
-    File wide_faithful_table = "~{out_prefix}_wide_faithful.tsv"
+    File all_msa_tarball  = "~{out_prefix}_all_msa.tar.gz"
+    File consensus_fasta  = "~{out_prefix}_consensus.fasta"
+    File group_meta_tsv   = "~{out_prefix}_group_meta.tsv"
+    File subhaps_tsv      = "~{out_prefix}_subhaps.tsv"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 1
+    memory: "8 GiB"
+    disk: "50 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+task categorize_reads {
+  meta {
+    description: "Assign final edit-outcome category to each read using structural groups, sub-haplotypes, and Pass-2 blastn evidence"
+  }
+
+  parameter_meta {
+    filtered_reads_fasta: "Original (unclipped) filtered reads FASTA — used to enumerate all reads"
+    groups_tsv: "Structural group assignments (from structural_grouping)"
+    subhaps_tsv: "Sub-haplotype assignments (from build_group_consensus)"
+    pass2_alignment_tsv: "Pass-2 blastn tabular output run on unclipped filtered reads"
+    gap_merge_threshold: "Max gap (bp) between abutting HDR parts to call HDR_candidate"
+    wt_gap_threshold: "Max gap (bp) between left_HA and right_HA to call WT_candidate"
+    min_flank_bp: "Minimum bases flanking all part hits required to call HDR or WT (vs inconclusive)"
+    min_part_hit_bp: "Minimum query-span (bp) for a blastn hit to be counted; filters repetitive short matches"
+  }
+
+  input {
+    File filtered_reads_fasta
+    File groups_tsv
+    File subhaps_tsv
+    File pass2_alignment_tsv
+    String sample_id
+    Int gap_merge_threshold = 5
+    Int wt_gap_threshold    = 20
+    Int min_flank_bp = 50
+    Int min_part_hit_bp = 100
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_edit_categories"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+from collections import defaultdict, Counter
+
+HDR_THR = ~{gap_merge_threshold}
+WT_THR  = ~{wt_gap_threshold}
+MIN_HIT = ~{min_part_hit_bp}
+
+# HDR part order constants (used for truncated-read detection)
+_FWD_HDR = [('left_HA', '+'), ('payload', '+'), ('right_HA', '+')]
+_REV_HDR = [('right_HA', '-'), ('payload', '-'), ('left_HA', '-')]
+
+def is_partial_hdr(pairs, ordered):
+    """Return True iff pairs is a non-empty strict contiguous proper subset of ordered."""
+    n = len(pairs)
+    if n == 0 or n >= len(ordered):
+        return False
+    for start in range(len(ordered) - n + 1):
+        if list(ordered[start:start + n]) == pairs:
+            return True
+    return False
+
+# ── Structural category from ordered part hits ────────────────────────────────
+# HiFi reads can be in either orientation. A reverse-strand HDR read has all
+# three parts on '-' in t_start order: right_HA:- → payload:- → left_HA:-.
+# WT on '-': right_HA:- → left_HA:-.
+# Mixed strands → misintegration.
+def structural_category(ps, gs):
+    """ps = [(part, strand), ...] sorted by t_start; gs = gaps between them."""
+    if not ps:
+        return 'no_hit'
+    parts_list   = [p for p, s in ps]
+    strands_list = [s for p, s in ps]
+
+    if len(set(strands_list)) != 1:
+        return 'misintegration'
+
+    strand = strands_list[0]
+    if strand == '+':
+        if parts_list == ['left_HA', 'payload', 'right_HA'] and all(abs(g) <= HDR_THR for g in gs):
+            return 'HDR_candidate'
+        if parts_list == ['left_HA', 'right_HA'] and len(gs) == 1 and abs(gs[0]) <= WT_THR:
+            return 'WT_candidate'
+        if parts_list in (['left_HA', 'payload'], ['payload', 'right_HA']) and all(abs(g) <= HDR_THR for g in gs):
+            return 'partial'
+    else:
+        if parts_list == ['right_HA', 'payload', 'left_HA'] and all(abs(g) <= HDR_THR for g in gs):
+            return 'HDR_candidate'
+        if parts_list == ['right_HA', 'left_HA'] and len(gs) == 1 and abs(gs[0]) <= WT_THR:
+            return 'WT_candidate'
+        if parts_list in (['payload', 'left_HA'], ['right_HA', 'payload']) and all(abs(g) <= HDR_THR for g in gs):
+            return 'partial'
+
+    return 'misintegration'
+
+# ── Load structural groups ────────────────────────────────────────────────────
+group_info = {}   # read_name -> {gid, sig, size}
+with open("~{groups_tsv}") as fh:
+    fh.readline()
+    for ln in fh:
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 4:
+            continue
+        group_info[f[0]] = {'gid': int(f[1]), 'sig': f[2], 'size': int(f[3])}
+
+# ── Load sub-haplotype assignments ────────────────────────────────────────────
+subhap_info = {}
+with open("~{subhaps_tsv}") as fh:
+    fh.readline()
+    for ln in fh:
+        f = ln.rstrip('\n').split('\t')
+        if len(f) >= 3:
+            subhap_info[f[0]] = f[2]
+
+# ── Load Pass-2 blastn hits ───────────────────────────────────────────────────
+# Collect both span tuples (for flanking) and ordered (part, strand) lists (for category).
+# Two-pass: parse all hits first, then apply proximity-aware size filter.
+# A hit is filtered out only if ALL of:
+#   (a) hit query-span < max(MIN_HIT, 0.40 * part_len)
+#   (b) not within WT_THR bp of any other hit on the same read
+pass2_hits    = defaultdict(list)   # read_name -> [(t_start, t_end)]
+hits_by_read  = defaultdict(list)   # read_name -> [(part, strand, t_start, t_end)]
+read_lens     = {}
+
+_raw_hits = defaultdict(list)  # read_name -> [(part, strand, t_start, t_end, hit_size, qlen)]
+_raw_slen = {}                 # read_name -> slen
+
+with open("~{pass2_alignment_tsv}") as fh:
+    for ln in fh:
+        if not ln.strip() or ln.startswith('#'):
+            continue
+        f = ln.rstrip('\n').split('\t')
+        if len(f) < 15:
+            continue
+        qname = f[0]
+        if not (qname.endswith('_left_HA') or qname.endswith('_right_HA')
+                or qname.endswith('_payload')):
+            continue
+        read_name = f[4]
+        qlen      = int(f[1])
+        hit_size  = abs(int(f[3]) - int(f[2]))
+        sstart, send = int(f[6]), int(f[7])
+        t_start = min(sstart, send) - 1
+        t_end   = max(sstart, send)
+        strand  = '+' if f[13] == 'plus' else '-'
+        if   qname.endswith('_left_HA'):  part = 'left_HA'
+        elif qname.endswith('_right_HA'): part = 'right_HA'
+        else:                             part = 'payload'
+        _raw_hits[read_name].append((part, strand, t_start, t_end, hit_size, qlen))
+        _raw_slen[read_name] = int(f[5])
+
+def _keep_hit(hit, others):
+    """Return True if this hit should be kept (passes size OR is near a size-passing hit)."""
+    part, strand, t_start, t_end, hit_size, qlen = hit
+    min_size = max(MIN_HIT, int(0.40 * qlen))
+    if hit_size >= min_size:
+        return True
+    # Keep if within WT_THR bp of another hit that itself passes the size threshold
+    for o in others:
+        o_hit_size, o_qlen = o[4], o[5]
+        o_min_size = max(MIN_HIT, int(0.40 * o_qlen))
+        if o_hit_size < o_min_size:
+            continue
+        o_start, o_end = o[2], o[3]
+        gap = max(t_start, o_start) - min(t_end, o_end)  # <=0 means overlap
+        if gap <= WT_THR:
+            return True
+    return False
+
+for read_name, raw in _raw_hits.items():
+    others_by_idx = [raw[:i] + raw[i+1:] for i in range(len(raw))]
+    for hit, others in zip(raw, others_by_idx):
+        if not _keep_hit(hit, others):
+            continue
+        part, strand, t_start, t_end, hit_size, qlen = hit
+        pass2_hits[read_name].append((t_start, t_end))
+        hits_by_read[read_name].append((part, strand, t_start, t_end))
+    read_lens[read_name] = _raw_slen[read_name]
+
+# Merge overlapping hits to the same (part, strand) per read.
+# The proximity-aware keep filter retains short sub-hits that overlap a
+# full-length hit of the same part (they pass the "near a size-passing hit"
+# condition). Without merging they appear as duplicate part entries in ps,
+# turning a valid HDR signature into an apparent misintegration.
+def _merge_part_hits(hits):
+    by_key = defaultdict(list)
+    for part, strand, t_start, t_end in hits:
+        by_key[(part, strand)].append((t_start, t_end))
+    merged = []
+    for (part, strand), spans in by_key.items():
+        spans.sort()
+        cs, ce = spans[0]
+        for s, e in spans[1:]:
+            if s <= ce:
+                ce = max(ce, e)
+            else:
+                merged.append((part, strand, cs, ce))
+                cs, ce = s, e
+        merged.append((part, strand, cs, ce))
+    return merged
+
+hits_by_read = {r: _merge_part_hits(h) for r, h in hits_by_read.items()}
+
+def get_ps_gs(read_name):
+    hits = sorted(hits_by_read[read_name], key=lambda h: h[2])
+    ps   = [(h[0], h[1]) for h in hits]
+    gs   = [hits[i+1][2] - hits[i][3] for i in range(len(hits)-1)]
+    return ps, gs
+
+# ── Enumerate all reads from FASTA, collecting lengths ───────────────────────
+all_reads  = []
+fasta_lens = {}   # read_name -> sequence length
+with open("~{filtered_reads_fasta}") as fh:
+    cur_name = None
+    cur_len  = 0
+    for ln in fh:
+        if ln.startswith('>'):
+            if cur_name is not None:
+                fasta_lens[cur_name] = cur_len
+            cur_name = ln[1:].split()[0]
+            cur_len  = 0
+            all_reads.append(cur_name)
+        else:
+            cur_len += len(ln.rstrip('\n'))
+    if cur_name is not None:
+        fasta_lens[cur_name] = cur_len
+
+MIN_FLANK = ~{min_flank_bp}
+
+def flanking(read_name):
+    hits = pass2_hits.get(read_name, [])
+    rlen = read_lens.get(read_name, 0)
+    if not hits:
+        return 0, 0
+    return min(h[0] for h in hits), rlen - max(h[1] for h in hits)
+
+def categorize(read_name):
+    info      = group_info.get(read_name)
+    lf, rf    = flanking(read_name)
+    has_flank = lf >= MIN_FLANK or rf >= MIN_FLANK
+
+    if not hits_by_read.get(read_name):
+        return 'inconclusive', 'no_parts_hit', ''
+
+    ps, gs = get_ps_gs(read_name)
+    cat    = structural_category(ps, gs)
+    sig    = info['sig'] if info else '|'.join(f"{p}:{s}" for p, s in ps)
+    gid    = info['gid']  if info else 'NA'
+    size   = info['size'] if info else 1
+
+    if cat == 'no_hit':
+        return 'inconclusive', 'no_parts_hit', ''
+
+    if cat == 'HDR_candidate':
+        if not has_flank:
+            return 'inconclusive', f'HDR_sig_no_flank|{sig}', ''
+        subhap   = subhap_info.get(read_name, f'group_{gid}_hap_A')
+        evidence = f'singleton|{sig}' if size == 1 else sig
+        return 'HDR', evidence, subhap
+
+    if cat == 'WT_candidate':
+        if not has_flank:
+            return 'inconclusive', f'WT_sig_no_flank|{sig}', ''
+        subhap = subhap_info.get(read_name, f'group_{gid}_hap_A')
+        return 'WT', sig, subhap
+
+    # partial or misintegration
+    # A partial signature where the read is truncated on the missing-part side
+    # is inconclusive — the read simply ends before the edit is complete.
+    if lf < MIN_FLANK or rf < MIN_FLANK:
+        if is_partial_hdr(ps, _FWD_HDR) or is_partial_hdr(ps, _REV_HDR):
+            return 'inconclusive', f'truncated_read|{sig}', ''
+    return 'misintegration', sig, ''
+
+# ── Write output ──────────────────────────────────────────────────────────────
+with open("~{out_prefix}.tsv", 'w') as out:
+    out.write("read_name\tcategory\tevidence\tsub_haplotype\t"
+              "group_id\tgroup_size\tread_length\tleft_flank_bp\tright_flank_bp\n")
+    cats = []
+    for rname in all_reads:
+        cat, evidence, subhap = categorize(rname)
+        info  = group_info.get(rname)
+        gid   = info['gid']  if info else 'NA'
+        gsize = info['size'] if info else 0
+        rlen  = fasta_lens.get(rname, 0)
+        lf, rf = flanking(rname)
+        out.write(f"{rname}\t{cat}\t{evidence}\t{subhap}\t{gid}\t{gsize}\t{rlen}\t{lf}\t{rf}\n")
+        cats.append(cat)
+
+counts = Counter(cats)
+print(f"Total reads : {len(all_reads)}")
+for cat, n in sorted(counts.items()):
+    print(f"  {cat}: {n}")
+PYEOF
+  >>>
+
+  output {
+    File categories_tsv = "~{out_prefix}.tsv"
   }
 
   runtime {
     docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
     cpu: 2
-    memory: "4 GB"
+    memory: "4 GiB"
+    disk: "10 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
+
+
+task make_wide_coverage_table {
+  meta {
+    description: "Build wide per-part coverage table from blastn part alignments (backwards-compatible replacement for parse_edit_parts)"
+  }
+
+  parameter_meta {
+    parts_alignment_tsv: "Blastn tabular output from blastn_remap_parts"
+    coverage_threshold: "Minimum per-hit coverage (n_match / query_len) for faithful alignments"
+    min_part_hit_bp: "Minimum query-span (bp) for a blastn hit to be counted; filters repetitive short matches"
+  }
+
+  input {
+    File parts_alignment_tsv
+    Float coverage_threshold = 0.8
+    Int min_part_hit_bp = 100
+    String sample_id
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{sample_id}_edit_parts_coverage"
+
+  command <<<
+    set -euxo pipefail
+
+    python3 <<'PYEOF'
+import csv
+from collections import defaultdict
+from typing import Dict, List, Tuple
+
+# Hit tuple: (q_start, q_end, t_start, t_end, strand, n_match)
+Hit     = Tuple[int, int, int, int, str, int]
+HitDict = Dict[str, Dict[str, List[Hit]]]
+LenDict = Dict[str, int]
+
+# ── blastn tabular columns ────────────────────────────────────────────────────
+# 0 qseqid  1 qlen  2 qstart  3 qend  4 sseqid  5 slen  6 sstart  7 send
+# 8 nident  9 mismatch  10 gapopen  11 gaps  12 length  13 sstrand  14 bitscore
+
+PART_SUFFIXES = ('_left_HA', '_payload', '_right_HA')
+MIN_HIT       = ~{min_part_hit_bp}
+COV_THR       = ~{coverage_threshold}
+
+faithful: HitDict = defaultdict(lambda: defaultdict(list))
+tlen: LenDict = {}
+qlen: LenDict = {}
+
+with open("~{parts_alignment_tsv}") as fh:
+    for ln in fh:
+        ln = ln.rstrip("\n")
+        if not ln or ln.startswith("#"):
+            continue
+        f = ln.split("\t")
+        if len(f) < 15:
+            continue
+        qname = f[0]
+        if not any(qname.endswith(s) for s in PART_SUFFIXES):
+            continue
+        if abs(int(f[3]) - int(f[2])) < MIN_HIT:
+            continue
+        n_match = int(f[8])
+        n_gaps  = int(f[11])
+        q_len   = int(f[1])
+        cov_hit = n_match / q_len
+        gap_ok  = n_gaps <= 0.05 * n_match
+        if cov_hit < COV_THR or not gap_ok:
+            continue
+        sstart, send = int(f[6]), int(f[7])
+        tname = f[4]
+        tlen.setdefault(tname, int(f[5]))
+        qlen.setdefault(qname, q_len)
+        hit: Hit = (
+            int(f[2]) - 1, int(f[3]),
+            min(sstart, send) - 1, max(sstart, send),
+            "+" if f[13] == "plus" else "-",
+            n_match,
+        )
+        faithful[tname][qname].append(hit)
+
+def _agg_cov(hit_list: List[Hit], q_len: int) -> float:
+    return min(1.0, sum(h[5] for h in hit_list) / q_len) if q_len else 0.0
+
+all_q  = sorted({q for t in faithful.values() for q in t})
+header = ["target_name", "target_len", "all_one_hit"]
+for q in all_q:
+    header.extend([f"{q}_len", f"{q}_cov", f"{q}_num_hits", f"{q}_hit_coordinates"])
+
+with open("~{out_prefix}_wide_faithful.tsv", "w", newline="") as f:
+    w = csv.writer(f, delimiter="\t")
+    w.writerow(header)
+    for tgt in sorted(faithful):
+        qdict    = faithful[tgt]
+        num_hits = [len(qdict[q]) if q in qdict else 0 for q in all_q]
+        row      = [tgt, tlen[tgt], "TRUE" if all(n == 1 for n in num_hits) else "FALSE"]
+        for q, n in zip(all_q, num_hits):
+            row.append(qlen[q])
+            if n:
+                cov      = _agg_cov(qdict[q], qlen[q])
+                hits_str = ";".join(f"{qs}-{qe}:{ts}-{te}:{s}"
+                                    for qs, qe, ts, te, s, _ in qdict[q])
+                row.extend([f"{cov:.3f}", str(n), hits_str])
+            else:
+                row.extend(["", "", ""])
+        w.writerow(row)
+
+print(f"Wide faithful table: {len(faithful)} reads, {len(all_q)} parts")
+PYEOF
+  >>>
+
+  output {
+    File wide_faithful_table = "~{out_prefix}_wide_faithful.tsv"
+  }
+
+  runtime {
+    docker: "~{runtime_attributes.container_registry}/pb_wdl_base@sha256:4b889a1f21a6a7fecf18820613cf610103966a93218de772caba126ab70a8e87"
+    cpu: 1
+    memory: "4 GiB"
     disk: "10 GB"
     preemptible: runtime_attributes.preemptible_tries
     maxRetries: runtime_attributes.max_retries
@@ -392,3 +1585,164 @@ EOF
   }
 }
 
+task align_consensus_msa {
+  meta {
+    description: "Align child + parent consensus sequences plus the expected HDR template into a single MAFFT alignment. Reverse-strand consensus sequences are reverse-complemented so all inputs share a common orientation. Group consensus records tagged category=partial are omitted (partial-part signatures do not represent a complete observation of an allele)."
+  }
+
+  parameter_meta {
+    child_consensus_fasta:  "Consensus FASTA from child sample (build_group_consensus); headers must contain strand=+/- and category=HDR|WT|partial|misintegration"
+    parent_consensus_fasta: "Consensus FASTA from parent sample (build_group_consensus); headers must contain strand=+/- and category=..."
+    child_sample_id:        "Sample ID used to prefix child sequence names in the MSA"
+    parent_sample_id:       "Sample ID used to prefix parent sequence names in the MSA"
+    crispr_edit_json:       "CRISPR edit JSON; left_ha + payload + right_ha are concatenated and included in the MSA as the expected HDR template"
+  }
+
+  input {
+    File   child_consensus_fasta
+    File   parent_consensus_fasta
+    String child_sample_id
+    String parent_sample_id
+    File   crispr_edit_json
+    RuntimeAttributes runtime_attributes
+  }
+
+  String out_prefix = "~{child_sample_id}_vs_~{parent_sample_id}_consensus"
+
+  command <<<
+    set -euxo pipefail
+
+    OUT="~{out_prefix}"
+    INPUT="${OUT}_input.fasta"
+
+    # Extract left_ha/payload/right_ha from crispr_edit_json (DNA strings, no embedded quotes)
+    extract_field() {
+      grep -oE "\"$1\"[[:space:]]*:[[:space:]]*\"[^\"]*\"" "~{crispr_edit_json}" \
+        | head -1 \
+        | sed -E 's/^[^:]*:[[:space:]]*"([^"]*)"$/\1/'
+    }
+    LEFT_HA=$(extract_field left_ha)
+    PAYLOAD=$(extract_field payload)
+    RIGHT_HA=$(extract_field right_ha)
+    HDR_TEMPLATE=$(printf '%s%s%s' "$LEFT_HA" "$PAYLOAD" "$RIGHT_HA" | tr 'acgtn' 'ACGTN')
+
+    : > "$INPUT"
+    if [[ -n "$HDR_TEMPLATE" ]]; then
+      printf ">HDR_expected\n%s\n" "$HDR_TEMPLATE" >> "$INPUT"
+    fi
+    echo "HDR template: ${#HDR_TEMPLATE} bp" >&2
+
+    # Parse a consensus FASTA: skip category=partial, RC strand=-, prefix names with label.
+    parse_consensus() {
+      local fasta=$1 label=$2
+      # Use first token of label (e.g. czML910 from czML910_Parental_Ngn2-KOLF) as
+      # name prefix so CLUSTAL 30-char name column fits group/reads/category info.
+      local prefix
+      prefix=$(echo "$label" | cut -d_ -f1)
+      awk -v label="$label" -v prefix="$prefix" '
+        function rc_char(c) {
+          if (c=="A") return "T"; if (c=="T") return "A"
+          if (c=="C") return "G"; if (c=="G") return "C"
+          return "N"
+        }
+        function rc(s,   i,r) {
+          r=""
+          for (i=length(s); i>=1; i--) r = r rc_char(substr(s,i,1))
+          return r
+        }
+        function flush(   out, short_name) {
+          if (name == "" || seq == "") return
+          if (cat == "partial") {
+            print "  skip partial: " label "_" name > "/dev/stderr"
+            return
+          }
+          out = toupper(seq)
+          if (strand == "-") out = rc(out)
+          # Name: {prefix}_{cat}{groupnum}[{hap}] [n]
+          # e.g. czML1098_HDR1A [3], czML910_WT2 [17]
+          # misintegration abbreviated to mis; singleton appended as s
+          short_name = prefix "_" cat_short groupnum hap_suffix singleton_suffix "x" nreads ""
+          print ">" short_name
+          print out
+          kept++
+        }
+        /^>/ {
+          flush()
+          name=""; strand=""; cat=""; cat_short=""; nreads="?"; seq=""
+          groupnum=""; hap_suffix=""; singleton_suffix=""
+          header=substr($0, 2)
+          if (match(header, /group_[0-9]+(_hap_[A-Za-z0-9_]+)?(_singleton)?/))
+            name = substr(header, RSTART, RLENGTH)
+          if (match(header, /strand=[+-]/))
+            strand = substr(header, RSTART+7, 1)
+          if (match(header, /category=[A-Za-z_]+/))
+            cat = substr(header, RSTART+9, RLENGTH-9)
+          if (match(header, /n_reads=[0-9]+/))
+            nreads = substr(header, RSTART+8, RLENGTH-8)
+          # Extract group number
+          groupnum = name
+          gsub(/^.*group_/, "", groupnum)
+          gsub(/[^0-9].*$/, "", groupnum)
+          # Extract hap letter(s): _hap_A -> A, absent -> ""
+          hap_suffix = ""
+          if (match(name, /_hap_[A-Za-z0-9]+/))
+            hap_suffix = substr(name, RSTART+5, RLENGTH-5)
+          # Singleton marker
+          singleton_suffix = (name ~ /_singleton/) ? "s" : ""
+          # Category abbreviation
+          cat_short = cat
+          if (cat == "misintegration") cat_short = "m"
+          if (cat == "partial") cat_short = "p"
+          if (cat == "HDR") cat_short = "h"
+          if (cat == "WT") cat_short = "w"
+          if (name == "" || strand == "") name = ""
+          next
+        }
+        { seq = seq $0 }
+        END {
+          flush()
+          print label ": " kept+0 " non-partial records" > "/dev/stderr"
+        }
+      ' "$fasta"
+    }
+
+    parse_consensus "~{parent_consensus_fasta}" "~{parent_sample_id}" >> "$INPUT"
+    parse_consensus "~{child_consensus_fasta}"  "~{child_sample_id}"  >> "$INPUT"
+
+    OUTPUT_ALN="${OUT}.aln"
+
+    nseq=$(grep -c '^>' "$INPUT" || true)
+
+    if [[ "$nseq" -eq 0 ]]; then
+      echo "No sequences to align" >&2
+      printf "CLUSTAL W\n\n" > "$OUTPUT_ALN"
+    elif [[ "$nseq" -eq 1 ]]; then
+      name=$(grep '^>' "$INPUT" | head -1 | sed 's/^>//')
+      seq=$(grep -v '^>' "$INPUT" | tr -d '\n')
+      stars=$(printf '%*s' "${#seq}" '' | tr ' ' '*')
+      printf "CLUSTAL W\n\n%-30s %s\n%-30s %s\n" \
+        "${name:0:30}" "$seq" "" "$stars" > "$OUTPUT_ALN"
+    else
+      # E-INS-i: generalized affine gap cost (--genafpair) with --ep 0 allows
+      # single large internal gaps cleanly, which is needed when parent WT
+      # sequences lack the ~1kb payload and must gap over it as one block.
+      mafft --ep 0 --genafpair --maxiterate 1000 --quiet --nuc --clustalout "$INPUT" > "$OUTPUT_ALN"
+    fi
+    echo "${nseq} sequences → ${OUTPUT_ALN}"
+  >>>
+
+  output {
+    File? consensus_msa_clustal = "~{out_prefix}.aln"
+  }
+
+  runtime {
+    docker: "quay.io/biocontainers/mafft@sha256:b8ccc402a9156868fefda5caa8693f847b47dcc69aae848557f7115850e60790"
+    cpu: 4
+    memory: "16 GiB"
+    disk: "20 GB"
+    preemptible: runtime_attributes.preemptible_tries
+    maxRetries: runtime_attributes.max_retries
+    awsBatchRetryAttempts: runtime_attributes.max_retries  # !UnknownRuntimeKey
+    zones: runtime_attributes.zones
+  }
+}
diff --git a/workflows/edit-qc/knock_knock.wdl b/workflows/edit-qc/knock_knock.wdl
index b9134091..b40c4697 100644
--- a/workflows/edit-qc/knock_knock.wdl
+++ b/workflows/edit-qc/knock_knock.wdl
@@ -56,7 +56,9 @@ guide_seq = edit_info['guide']
 left_ha   = edit_info.get('left_ha',  '')
 payload   = edit_info.get('payload',  '')
 right_ha  = edit_info.get('right_ha', '')
-donor_seq = left_ha + payload + right_ha
+ctx_before = edit_info.get('donor_context_before', '')
+ctx_after  = edit_info.get('donor_context_after',  '')
+donor_seq = ctx_before + left_ha + payload + right_ha + ctx_after
 donor_name = f"{symbol}_donor"
 
 for fname, val in [
@@ -80,7 +82,7 @@ with open('donor.fasta', 'w') as f:
 
 print(f"Target:  {symbol} at {chrom}:{t_start}-{t_end}", file=sys.stderr)
 print(f"Guide:   {guide_seq}", file=sys.stderr)
-print(f"Donor:   {len(donor_seq)} bp  ({len(left_ha)} HA_L + {len(payload)} payload + {len(right_ha)} HA_R)", file=sys.stderr)
+print(f"Donor:   {len(donor_seq)} bp  ({len(ctx_before)} ctx_5p + {len(left_ha)} HA_L + {len(payload)} payload + {len(right_ha)} HA_R + {len(ctx_after)} ctx_3p)", file=sys.stderr)
 PYEOF
 
     SYMBOL=$(cat symbol.txt)
@@ -146,6 +148,88 @@ CSVEOF
 
     cat "${BASE}/data/${BATCH}/sample_sheet.csv" >&2
 
+    # ── 3b. Patch knock-knock: guard against reads with zero alignments ────────
+    # Upstream bug: ReadDiagram.draw_reference() in visualize/architecture.py
+    # indexes alignment_coordinates[0] in the 'centered on primers' branch
+    # without a length check, crashing with IndexError on unaligned reads.
+    # The conda site-packages dir is read-only in the container, so patch at
+    # runtime via a sitecustomize.py on PYTHONPATH that monkey-patches the
+    # method when the module is imported.
+    KK_PATCH_DIR="$(pwd)/kk_patch"
+    mkdir -p "${KK_PATCH_DIR}"
+    cat > "${KK_PATCH_DIR}/sitecustomize.py" <<'PYEOF'
+# Runtime monkey-patch for knock_knock.visualize.architecture.ReadDiagram.
+# Wraps draw_reference so that an IndexError from alignment_coordinates[0]
+# on a read with no alignments is caught and that ref_name is simply skipped.
+import sys
+
+def _install():
+    try:
+        from knock_knock.visualize import architecture as _arch
+    except Exception:
+        return
+    RD = getattr(_arch, "ReadDiagram", None)
+    if RD is None or getattr(RD.draw_reference, "_kk_patched", False):
+        return
+    _orig = RD.draw_reference
+    def draw_reference(self, ref_name, ref_y, *args, **kwargs):
+        try:
+            return _orig(self, ref_name, ref_y, *args, **kwargs)
+        except IndexError as e:
+            print(f"[kk_patch] skipping draw_reference for {ref_name!r}: {e}",
+                  file=sys.stderr)
+            return None
+    draw_reference._kk_patched = True
+    RD.draw_reference = draw_reference
+
+def _install_num_examples():
+    # Raise default num_examples for generate_all_outcome_example_figures
+    # from 10 → 100 so more per-category example diagrams are emitted.
+    try:
+        from knock_knock import experiment as _exp
+    except Exception:
+        return
+    Exp = getattr(_exp, "Experiment", None)
+    if Exp is None:
+        return
+    fn = getattr(Exp, "generate_all_outcome_example_figures", None)
+    if fn is None or getattr(fn, "_kk_patched", False):
+        return
+    def generate_all_outcome_example_figures(self, num_examples=100, **kwargs):
+        return fn(self, num_examples=num_examples, **kwargs)
+    generate_all_outcome_example_figures._kk_patched = True
+    Exp.generate_all_outcome_example_figures = generate_all_outcome_example_figures
+
+def _hook(modname, installer):
+    if modname in sys.modules:
+        installer()
+        return
+    import importlib.abc, importlib.util
+    class _Hook(importlib.abc.MetaPathFinder):
+        def find_spec(self, name, path, target=None):
+            if name == modname:
+                sys.meta_path.remove(self)
+                spec = importlib.util.find_spec(name)
+                if spec is not None:
+                    loader = spec.loader
+                    _exec = loader.exec_module
+                    def exec_module(module):
+                        _exec(module)
+                        installer()
+                    loader.exec_module = exec_module
+                return None
+            return None
+    sys.meta_path.insert(0, _Hook())
+
+# Install now if already imported; otherwise hook import.
+_install()
+_hook("knock_knock.visualize.architecture", _install)
+_install_num_examples()
+_hook("knock_knock.experiment", _install_num_examples)
+PYEOF
+
+    export PYTHONPATH="${KK_PATCH_DIR}${PYTHONPATH:+:${PYTHONPATH}}"
+
     # ── 4. Build strategies ────────────────────────────────────────────────────
     # Reads sample sheet, aligns protospacer into ctx.fa, extracts the target
     # window, and writes strategies/{genome}_{symbol}/target.gb + parameters.yaml.
diff --git a/workflows/edit-qc/loh.wdl b/workflows/edit-qc/loh.wdl
index 75ab3ccf..13c37e54 100644
--- a/workflows/edit-qc/loh.wdl
+++ b/workflows/edit-qc/loh.wdl
@@ -17,6 +17,7 @@ task check_loh {
     expected_cut_site: "Expected cut site in chr:pos format (e.g. chr15:65869582)"
     cut_strand: "Strand of the guide at the cut site (+ or -)"
     window_size: "Window size in bp upstream and downstream of the cut site (default 200000)"
+    chrom_window_size: "Window size in bp for chromosome-wide het bedgraph (default 1000000)"
   }
 
   input {
@@ -28,6 +29,7 @@ task check_loh {
     String expected_cut_site
     String cut_strand
     Int window_size = 200000
+    Int chrom_window_size = 100000
     Int mem_gb = 8
     RuntimeAttributes runtime_attributes
   }
@@ -136,6 +138,67 @@ with open(f"{sample_id}_loh_summary.tsv", "w") as fh:
     fh.write(f"downstream_both_het\t{dn_num}\n")
     fh.write(f"downstream_het_intact_ratio\t{dn_ratio_str}\n")
     fh.write(f"downstream_het_intact_pct\t{dn_pct:.6f}\n")
+
+# ---------------------------------------------------------------------------
+# Chromosome-wide windowed het analysis → bedgraph
+# ---------------------------------------------------------------------------
+chrom_window_size = ~{chrom_window_size}
+
+# Get chromosome length from VCF header (for clean window ends)
+import re
+header_result = subprocess.run(
+    ["bcftools", "view", "-h", joint_vcf],
+    capture_output=True, check=True,
+)
+chrom_len = None
+for line in header_result.stdout.decode().splitlines():
+    if line.startswith("##contig=") and f"ID={chrom}," in line:
+        m = re.search(r"length=(\d+)", line)
+        if m:
+            chrom_len = int(m.group(1))
+            break
+
+print(f"Chromosome {chrom} length: {chrom_len}", file=sys.stderr)
+
+# Dump all variant positions + GTs on the chromosome in one bcftools pass
+query_result = subprocess.run(
+    ["bcftools", "query", "-r", chrom, "-f", "%POS[\t%GT]\n", joint_vcf],
+    capture_output=True, check=True,
+)
+
+def is_het(gt):
+    alleles = gt.replace("|", "/").split("/")
+    return len(set(alleles)) > 1 and "." not in alleles
+
+from collections import defaultdict
+window_parent_het = defaultdict(int)
+window_both_het   = defaultdict(int)
+
+for line in query_result.stdout.decode().splitlines():
+    parts = line.strip().split("\t")
+    if len(parts) < 2:
+        continue
+    pos = int(parts[0])
+    gts = parts[1:]
+    if parent_sample_idx >= len(gts) or sample_idx >= len(gts):
+        continue
+    if is_het(gts[parent_sample_idx]):
+        win = pos // chrom_window_size
+        window_parent_het[win] += 1
+        if is_het(gts[sample_idx]):
+            window_both_het[win] += 1
+
+bedgraph_file = f"{sample_id}_loh_chrom_het.bedgraph"
+with open(bedgraph_file, "w") as fh:
+    for win in sorted(window_parent_het):
+        start = win * chrom_window_size
+        end   = min((win + 1) * chrom_window_size, chrom_len) if chrom_len else (win + 1) * chrom_window_size
+        phet  = window_parent_het[win]
+        bhet  = window_both_het[win]
+        frac  = bhet / phet
+        fh.write(f"{chrom}\t{start}\t{end}\t{frac:.6f}\n")
+
+print(f"Wrote bedgraph: {bedgraph_file} ({len(window_parent_het)} windows)", file=sys.stderr)
 PYEOF
   >>>
 
@@ -145,6 +208,7 @@ PYEOF
     Float  het_intact_upstream_pct     = read_float("~{sample_id}_loh_het_intact_upstream_pct.txt")
     Float  het_intact_downstream_pct   = read_float("~{sample_id}_loh_het_intact_downstream_pct.txt")
     File   loh_summary                 = "~{sample_id}_loh_summary.tsv"
+    File   loh_chrom_bedgraph          = "~{sample_id}_loh_chrom_het.bedgraph"
   }
 
   runtime {
diff --git a/workflows/edit-qc/truvari_parent_filter.wdl b/workflows/edit-qc/truvari_parent_filter.wdl
index 7f83d374..4d645ff6 100644
--- a/workflows/edit-qc/truvari_parent_filter.wdl
+++ b/workflows/edit-qc/truvari_parent_filter.wdl
@@ -104,7 +104,7 @@ task bcftools_split_for_truvari {
     bcftools +split -W \
       -Oz \
       -o truvari_merge_split \
-      -e 'GT=".|." || GT="./." || GT="." || GT="ref"'
+      -e 'GT=".|." || GT="./." || GT="." || GT="./0" || GT="0/." || GT="ref"'
 
 		for file in truvari_merge_split/*.vcf.gz; do
     	mv "$file" "${file%.vcf.gz}~{out_suffix}.vcf.gz"
@@ -234,6 +234,12 @@ task filter_parent_variants {
     parent_index: {
       name: "Parent index in consistency table"
     }
+    family_merged_vcf: {
+      name: "Full multi-sample merged VCF (pre-split); when provided along with parent_sample_id, parent genotype fields are added as INFO annotations"
+    }
+    parent_sample_id: {
+      name: "Parent sample ID to extract from family_merged_vcf for parent genotype annotations"
+    }
     runtime_attributes: {
       name: "Runtime attribute structure"
     }
@@ -251,12 +257,14 @@ task filter_parent_variants {
     File truvari_consistency_tsv
     Int sample_index
     Int parent_index = 999
+    File? family_merged_vcf
+    String? parent_sample_id
     RuntimeAttributes runtime_attributes
   }
 
   Int threads = 4
   Int mem_gb = 8
-  Int disk_size = ceil(size(sample_vcf, "GB") * 2 + size(truvari_consistency_tsv, "GB") + 20)
+  Int disk_size = ceil(size(sample_vcf, "GB") * 2 + size(truvari_consistency_tsv, "GB") + size(family_merged_vcf, "GB") + 20)
   String filtered_consistency_tsv = sub(basename(truvari_consistency_tsv), "\\.tsv$", "") + ".parent_filtered.tsv"
   String filtered_vcf_file = sub(basename(sample_vcf), "\\.vcf.gz$", "") + ".parent_filtered.vcf.gz"
   String filtered_vcf_index_file = "~{filtered_vcf_file}.tbi"
@@ -291,6 +299,32 @@ EOF
         -h truvari_consistency.hdr \
         -Ob \
         ~{sample_vcf} | bcftools filter -i "FLAG>0" -Ob | bcftools sort -Oz -Wtbi -o ~{filtered_vcf_file}
+
+      # Annotate with parent genotype fields if family merged VCF and parent sample ID are available
+      if [[ -n "~{default="" parent_sample_id}" && -n "~{default="" family_merged_vcf}" ]]; then
+        cat > parent_info.hdr <<-'EOF'
+##INFO=<ID=parent_GT,Number=1,Type=String,Description="Genotype of parent sample at this site">
+##INFO=<ID=parent_AD,Number=.,Type=String,Description="Allelic depths of parent sample (FORMAT/AD)">
+##INFO=<ID=parent_AN,Number=1,Type=Integer,Description="Parent total allele number">
+##INFO=<ID=parent_AC,Number=.,Type=Integer,Description="Parent alt allele count">
+EOF
+
+        bcftools view -s "~{default="" parent_sample_id}" "~{default="" family_merged_vcf}" | \
+          bcftools +fill-tags -- -t AN,AC | \
+          bcftools sort --max-mem "~{mem_gb/2}G" | \
+          bcftools query -f '%CHROM\t%POS\t%REF\t%ALT\t[%GT]\t[%AD]\t%AN\t%AC\n' | \
+          bgzip > parent_annotations.tsv.gz
+        tabix -s1 -b2 -e2 parent_annotations.tsv.gz
+
+        bcftools annotate \
+          -a parent_annotations.tsv.gz \
+          -c 'CHROM,POS,REF,ALT,INFO/parent_GT,INFO/parent_AD,INFO/parent_AN,INFO/parent_AC' \
+          -h parent_info.hdr \
+          ~{filtered_vcf_file} \
+          -Oz -Wtbi -o tmp_parent_annotated.vcf.gz
+        mv tmp_parent_annotated.vcf.gz ~{filtered_vcf_file}
+        mv tmp_parent_annotated.vcf.gz.tbi ~{filtered_vcf_index_file}
+      fi
     else
       # No parent specified, copy original VCF
       cp ~{sample_vcf} ~{filtered_vcf_file}
diff --git a/workflows/family.wdl b/workflows/family.wdl
index e3b7d6fc..94b8ed9f 100755
--- a/workflows/family.wdl
+++ b/workflows/family.wdl
@@ -13,6 +13,7 @@ import "edit-qc/bcftools_aux.wdl" as Bcftools_aux
 import "edit-qc/bcftools_norm.wdl" as Bcftools_norm
 import "edit-qc/truvari_parent_filter.wdl" as TruvariParentFilter
 import "edit-qc/crispr_edit_qc.wdl" as CrisprEditQC
+import "edit-qc/crispr_edit_tasks.wdl" as CrisprEditTasks
 import "edit-qc/crispr_edit_consensus.wdl" as CrisprEditConsensus
 import "edit-qc/plot_cnv.wdl" as PlotCNV
 import "edit-qc/offtarget.wdl" as OffTarget
@@ -255,7 +256,8 @@ workflow humanwgs_family {
   scatter (sample_index in range(length(family.samples))) {
     call Bcftools_aux.bcftools_merge_assembly_align as merge_sv_vcfs_align_assembly {
       input:
-      vcfs = select_all([upstream.large_sv_filtered_vcf[sample_index], select_all(select_first([joint.split_joint_structural_variant_vcfs,upstream.sv_vcf]))[sample_index]]),
+      vcfs        = select_all([upstream.large_sv_filtered_vcf[sample_index], select_all(select_first([joint.split_joint_structural_variant_vcfs,upstream.sv_vcf]))[sample_index]]),
+      sample_id   = sample_id[sample_index],
       out_prefix  = "~{sample_id[sample_index]}.merged_structural_variants",
       runtime_attributes = default_runtime_attributes
     }
@@ -323,8 +325,6 @@ workflow humanwgs_family {
   #############################################################
 
   # Lookup maps for downstream BAM access by sample ID
-  Map[String, File] bam_files_by_id = as_map(zip(sample_id, downstream.merged_haplotagged_bam))
-  Map[String, File] bam_index_by_id = as_map(zip(sample_id, downstream.merged_haplotagged_bam_index))
   Map[String, Int] sidx_by_id = as_map(zip(sample_id, range(length(sample_id))))
 
   scatter (sample_index in range(length(family.samples))) {
@@ -353,48 +353,64 @@ workflow humanwgs_family {
   scatter (sample_index in range(length(family.samples))) {
     # Consensus-based CRISPR edit annotation - runs only if expected_edit is provided
     if (defined(family.samples[sample_index].expected_edit)) {
+      # Annotate MSA group consensus sequences with edit parts
       call CrisprEditConsensus.crispr_edit_consensus {
         input:
-          sample_id           = sample_id[sample_index],
-          haplotagged_bam     = downstream.merged_haplotagged_bam[sample_index],
-          haplotagged_bam_index = downstream.merged_haplotagged_bam_index[sample_index],
-          ref_fasta           = ref_map["fasta"],        # !FileCoercion
-          ref_fasta_index     = ref_map["fasta_index"],  # !FileCoercion
-          small_variant_vcf       = downstream.phased_small_variant_vcf[sample_index],
-          small_variant_vcf_index = downstream.phased_small_variant_vcf_index[sample_index],
-          sv_vcf                  = downstream.phased_sv_vcf[sample_index],
-          sv_vcf_index            = downstream.phased_sv_vcf_index[sample_index],
-          crispr_edit_json    = select_first([family.samples[sample_index].expected_edit]),
-          runtime_attributes  = default_runtime_attributes
+          sample_id        = sample_id[sample_index],
+          consensus_fasta  = select_first([crispr_edit_qc.consensus_fasta[sample_index]]),
+          crispr_edit_json = select_first([family.samples[sample_index].expected_edit]),
+          runtime_attributes = default_runtime_attributes
       }
 
-      # knock-knock HDR outcome classification
+      # knock-knock HDR outcome classification (uses edit-QC filtered reads, no BAM dependency)
       call KnockKnock.knock_knock {
         input:
           sample_id          = sample_id[sample_index],
           crispr_edit_json   = select_first([family.samples[sample_index].expected_edit]),
-          region_reads_fastq = crispr_edit_consensus.region_reads_fastq,
+          region_reads_fastq = select_first([crispr_edit_qc.filtered_reads_fastq[sample_index]]),
           ref_fasta          = ref_map["fasta"],        # !FileCoercion
           ref_fasta_index    = ref_map["fasta_index"],  # !FileCoercion
           runtime_attributes = default_runtime_attributes
       }
 
-      # Parent consensus annotation - for wild type comparison at same locus
+      # Parent: run edit QC at same locus (child's edit JSON) then annotate consensus
       if (defined(resolved_parent_id[sample_index])) {
         Int parent_idx_for_consensus = sidx_by_id[select_first([resolved_parent_id[sample_index]])]
+
+        # Extract parent reads overlapping the edit region from the haplotagged BAM —
+        # much faster than running the full minimap2 filter on the whole-genome FASTA.
+        call CrisprEditTasks.extract_region_reads_fasta as parent_region_reads {
+          input:
+            haplotagged_bam       = downstream.merged_haplotagged_bam[parent_idx_for_consensus],
+            haplotagged_bam_index = downstream.merged_haplotagged_bam_index[parent_idx_for_consensus],
+            crispr_edit_json      = select_first([family.samples[sample_index].expected_edit]),
+            sample_id             = select_first([resolved_parent_id[sample_index]]),
+            runtime_attributes    = default_runtime_attributes
+        }
+
+        call CrisprEditQC.crispr_edit_qc as parent_edit_qc {
+          input:
+            sample_id                = select_first([resolved_parent_id[sample_index]]),
+            pre_filtered_reads_fasta = parent_region_reads.region_reads_fasta,
+            crispr_edit_json         = select_first([family.samples[sample_index].expected_edit]),
+            runtime_attributes       = default_runtime_attributes
+        }
         call CrisprEditConsensus.crispr_edit_consensus as parent_consensus {
           input:
-            sample_id           = select_first([resolved_parent_id[sample_index]]),
-            haplotagged_bam     = bam_files_by_id[select_first([resolved_parent_id[sample_index]])],
-            haplotagged_bam_index = bam_index_by_id[select_first([resolved_parent_id[sample_index]])],
-            ref_fasta           = ref_map["fasta"],        # !FileCoercion
-            ref_fasta_index     = ref_map["fasta_index"],  # !FileCoercion
-            small_variant_vcf       = downstream.phased_small_variant_vcf[parent_idx_for_consensus],
-            small_variant_vcf_index = downstream.phased_small_variant_vcf_index[parent_idx_for_consensus],
-            sv_vcf                  = downstream.phased_sv_vcf[parent_idx_for_consensus],
-            sv_vcf_index            = downstream.phased_sv_vcf_index[parent_idx_for_consensus],
-            crispr_edit_json    = select_first([family.samples[sample_index].expected_edit]),
-            runtime_attributes  = default_runtime_attributes
+            sample_id        = select_first([resolved_parent_id[sample_index]]),
+            consensus_fasta  = select_first([parent_edit_qc.consensus_fasta]),
+            crispr_edit_json = select_first([family.samples[sample_index].expected_edit]),
+            runtime_attributes = default_runtime_attributes
+        }
+
+        call CrisprEditTasks.align_consensus_msa {
+          input:
+            child_consensus_fasta  = select_first([crispr_edit_qc.consensus_fasta[sample_index]]),
+            parent_consensus_fasta = select_first([parent_edit_qc.consensus_fasta]),
+            child_sample_id        = sample_id[sample_index],
+            parent_sample_id       = select_first([resolved_parent_id[sample_index]]),
+            crispr_edit_json       = select_first([family.samples[sample_index].expected_edit]),
+            runtime_attributes     = default_runtime_attributes
         }
       }
     }
@@ -555,6 +571,8 @@ workflow humanwgs_family {
         truvari_consistency_tsv = truvari_consistency_small_variants.truvari_consistency_tsv,
         sample_index            = sample_index,
         parent_index            = parent_index,
+        family_merged_vcf       = if parent_index != 999 then truvari_collapse_small_variants.truvari_merge_vcf else None,
+        parent_sample_id        = if parent_index != 999 then resolved_parent_id[sample_index] else None,
         runtime_attributes      = default_runtime_attributes
       }
 
@@ -565,21 +583,38 @@ workflow humanwgs_family {
         truvari_consistency_tsv = truvari_consistency_sv.truvari_consistency_tsv,
         sample_index            = sample_index,
         parent_index            = parent_index,
+        family_merged_vcf       = if parent_index != 999 then truvari_collapse_sv.truvari_merge_vcf else None,
+        parent_sample_id        = if parent_index != 999 then resolved_parent_id[sample_index] else None,
         runtime_attributes      = default_runtime_attributes
       }
 
+      # Quality filtering: GQ > 20 and FILTER=PASS, applied per-sample after parent filtering
+      call Bcftools_aux.bcftools_qual_filter as qual_filter_small_variants {
+        input:
+          vcf                = parent_filter_small_variants.filtered_vcf,
+          out_prefix         = "~{sample_id[sample_index]}.small_variants.qual_filtered",
+          runtime_attributes = default_runtime_attributes
+      }
+
+      call Bcftools_aux.bcftools_qual_filter as qual_filter_sv {
+        input:
+          vcf                = parent_filter_sv.filtered_vcf,
+          out_prefix         = "~{sample_id[sample_index]}.sv.qual_filtered",
+          runtime_attributes = default_runtime_attributes
+      }
+
       # AnnotSV after split because we need its tsv conversion
       call Somatic_annotation.annotsv as annotate_parent_filter_sv {
         input:
-        sv_vcf        = parent_filter_sv.filtered_vcf,
-        sv_vcf_index  = parent_filter_sv.filtered_vcf_index,
+        sv_vcf        = qual_filter_sv.filtered_vcf,
+        sv_vcf_index  = qual_filter_sv.filtered_vcf_index,
         annotsv_cache = somatic_map["annotsv_cache"],                             # !FileCoercion
         threads       = 2
       }
 
       call Somatic_annotation.prioritize_small_variants as prioritizeSomatic {
         input:
-        vep_annotated_vcf   = parent_filter_small_variants.filtered_vcf,
+        vep_annotated_vcf   = qual_filter_small_variants.filtered_vcf,
         threads             = 2,
         sample              = family.samples[sample_index].sample_id
       }
@@ -601,6 +636,18 @@ workflow humanwgs_family {
           runtime_attributes = default_runtime_attributes
       }
 
+      # Count variants after GQ/VAF filtering
+      call Bcftools_aux.count_vcf_variants as count_qual_filtered_small_variants {
+        input:
+          vcf = qual_filter_small_variants.filtered_vcf,
+          runtime_attributes = default_runtime_attributes
+      }
+      call Bcftools_aux.count_vcf_variants as count_qual_filtered_sv {
+        input:
+          vcf = qual_filter_sv.filtered_vcf,
+          runtime_attributes = default_runtime_attributes
+      }
+
       # Count variants after concern annotation
       call Bcftools_aux.count_tsv_rows as count_concerning_snv {
         input:
@@ -653,6 +700,8 @@ workflow humanwgs_family {
       flatten([['quality_filtered_SV_count'], select_first([count_tertiary_sv.variant_count, []])]),
       flatten([['parent_filtered_SNV_count'], select_first([count_parent_filtered_snv.variant_count, []])]),
       flatten([['parent_filtered_SV_count'], select_first([count_parent_filtered_sv.variant_count, []])]),
+      flatten([['qual_filtered_small_variant_count'], select_first([count_qual_filtered_small_variants.variant_count, []])]),
+      flatten([['qual_filtered_SV_count'], select_first([count_qual_filtered_sv.variant_count, []])]),
       flatten([['concerning_SNV_count'], select_first([count_concerning_snv.row_count, []])]),
       flatten([['concerning_SV_count'], select_first([count_concerning_sv.row_count, []])])
   ]
@@ -835,13 +884,20 @@ workflow humanwgs_family {
     Array[File]? parent_filtered_sv_vcf                  = parent_filter_sv.filtered_vcf
     Array[File]? parent_filtered_sv_vcf_index            = parent_filter_sv.filtered_vcf_index
 
+    Array[File]? qual_filtered_small_variant_vcf       = qual_filter_small_variants.filtered_vcf
+    Array[File]? qual_filtered_small_variant_vcf_index = qual_filter_small_variants.filtered_vcf_index
+    Array[File]? qual_filtered_sv_vcf                  = qual_filter_sv.filtered_vcf
+    Array[File]? qual_filtered_sv_vcf_index            = qual_filter_sv.filtered_vcf_index
+
     # Variant count summary stats at each filtering stage
-    Array[String]? stat_quality_filtered_SNV_count  = count_tertiary_snv.variant_count
-    Array[String]? stat_quality_filtered_SV_count   = count_tertiary_sv.variant_count
-    Array[String]? stat_parent_filtered_SNV_count   = count_parent_filtered_snv.variant_count
-    Array[String]? stat_parent_filtered_SV_count    = count_parent_filtered_sv.variant_count
-    Array[String]? stat_concerning_SNV_count         = count_concerning_snv.row_count
-    Array[String]? stat_concerning_SV_count          = count_concerning_sv.row_count
+    Array[String]? stat_quality_filtered_SNV_count              = count_tertiary_snv.variant_count
+    Array[String]? stat_quality_filtered_SV_count               = count_tertiary_sv.variant_count
+    Array[String]? stat_parent_filtered_SNV_count               = count_parent_filtered_snv.variant_count
+    Array[String]? stat_parent_filtered_SV_count                = count_parent_filtered_sv.variant_count
+    Array[String]? stat_qual_filtered_small_variant_count     = count_qual_filtered_small_variants.variant_count
+    Array[String]? stat_qual_filtered_SV_count                = count_qual_filtered_sv.variant_count
+    Array[String]? stat_concerning_SNV_count                    = count_concerning_snv.row_count
+    Array[String]? stat_concerning_SV_count                     = count_concerning_sv.row_count
 
     # Somatic SV calling
     # Array[File] Severus_somatic_vcf                           = select_all(select_first([phased_severus.output_vcf]))
@@ -885,14 +941,21 @@ workflow humanwgs_family {
 
     # CRISPR edit QC outputs (read-based)
     Array[File?] edit_qc_filtered_reads_fasta   = crispr_edit_qc.filtered_reads_fasta
-    Array[File?] edit_qc_parts_alignment_paf    = crispr_edit_qc.parts_alignment_paf
+    Array[File?] edit_qc_parts_alignment_tsv    = crispr_edit_qc.parts_alignment_tsv
     Array[File?] edit_qc_wide_faithful_tsv      = crispr_edit_qc.wide_faithful_table
+    Array[File?] edit_qc_consensus_fasta        = crispr_edit_qc.consensus_fasta
+    Array[File?] edit_qc_categories_tsv         = crispr_edit_qc.categories_tsv
 
     # CRISPR edit consensus annotation outputs
     Array[File?] edit_consensus_genbank          = crispr_edit_consensus.genbank_annotations
     Array[File?] edit_consensus_summary          = crispr_edit_consensus.annotation_summary
-    Array[File?] parent_consensus_genbank       = parent_consensus.genbank_annotations
-    Array[File?] parent_consensus_summary       = parent_consensus.annotation_summary
+
+    # Parent edit QC outputs (WT at same locus, using child's edit JSON)
+    Array[File?] parent_edit_qc_categories_tsv  = parent_edit_qc.categories_tsv
+    Array[File?] parent_edit_qc_consensus_fasta = parent_edit_qc.consensus_fasta
+    Array[File?] edit_consensus_msa_clustal     = align_consensus_msa.consensus_msa_clustal
+    Array[File?] parent_consensus_genbank        = parent_consensus.genbank_annotations
+    Array[File?] parent_consensus_summary        = parent_consensus.annotation_summary
 
     # knock-knock HDR outcome classification outputs
     Array[File?] knock_knock_outcome_list_txt              = knock_knock.outcome_list_txt
@@ -907,6 +970,7 @@ workflow humanwgs_family {
     Array[Float?]  loh_het_intact_upstream_pct     = check_loh.het_intact_upstream_pct
     Array[Float?]  loh_het_intact_downstream_pct   = check_loh.het_intact_downstream_pct
     Array[File?]   loh_summary                     = check_loh.loh_summary
+    Array[File?]   loh_chrom_bedgraph              = check_loh.loh_chrom_bedgraph
 
     # Off-target analysis outputs
     Array[String?] predicted_cut_site   = convert_offtarget_to_bed.expected_cut_site