Skip to content

Commit 2726315

Browse files
dpark01claude
andcommitted
Update expected test outputs for FreeBayes+minimap2; skip novoalign when not needed
Generate new expected FASTA files from FreeBayes+minimap2 pipeline: - refine1: 6bp insertion at pos 1539 (82% of reads support it, QUAL=300) - refine2: 49bp longer consensus (minimap2 maps more reads to ends) Make novoalign instantiation conditional on already_realigned_bam so refine_assembly works on ARM64 when using pre-aligned BAMs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 27c0415 commit 2726315

File tree

3 files changed

+38
-27
lines changed

3 files changed

+38
-27
lines changed

src/viral_ngs/assembly.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,10 @@ def refine_assembly(
699699
picard_index = viral_ngs.core.picard.CreateSequenceDictionaryTool()
700700
picard_mkdup = viral_ngs.core.picard.MarkDuplicatesTool()
701701
samtools = viral_ngs.core.samtools.SamtoolsTool()
702-
novoalign = viral_ngs.core.novoalign.NovoalignTool(license_path=novoalign_license_path)
703702
fb = viral_ngs.assemble.freebayes.FreeBayesTool()
703+
novoalign = None
704+
if not already_realigned_bam:
705+
novoalign = viral_ngs.core.novoalign.NovoalignTool(license_path=novoalign_license_path)
704706

705707
# Sanitize fasta header & create deambiguated genome for GATK
706708
deambigFasta = viral_ngs.core.file.mkstempfname('.deambig.fasta')
@@ -771,7 +773,8 @@ def refine_assembly(
771773
# has a non-zero size
772774
if (os.path.getsize(outFasta) > 0):
773775
samtools.faidx(outFasta, overwrite=True)
774-
novoalign.index_fasta(outFasta)
776+
if already_realigned_bam is None:
777+
novoalign.index_fasta(outFasta)
775778

776779
return 0
777780

tests/input/TestRefineAssembly/expected.ebov.refine1.freebayes.fasta

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ GCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATC
2424
ACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCC
2525
AATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGGATACCA
2626
TATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGT
27-
TTAATCTGTGGGTTGAGGCAGCTGGCCAACGAACAAGCTCTCCAACTGTTCCTGAGAGCC
28-
ACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTGCTGCAG
29-
CGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGG
30-
ACCAAGAACAT
27+
TTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTG
28+
AGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTG
29+
CTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACAT
30+
GATTGGACCAAGAACAT
Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
>test_sub-EBOV.genome-1
2-
ATTGAGATCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTGCCATCTGTGACTAAAAGATGGGGCTTC
3-
AGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAATCAAAA
4-
AACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGGTGCCGGTATGTGCACAAAGT
5-
ATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCC
6-
ACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCT
7-
TCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGATA
8-
TCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCA
9-
AGATTCACACCACAGTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAAC
10-
TAATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCACTAGA
11-
AAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAACGGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAA
12-
CTTCTTCCGACCCAGAGACCAACACAACAAATGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCA
13-
AGTGCACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACGAGTCCTCAACCTCCC
14-
ACAACCAAAACAGGTCCGGACAACAGCACCCGTAATACACCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTG
15-
GACAACATCACCGTAGAGCNGACAACGACNNNNNNGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAA
16-
AGCAGAGAACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCAAAACTACAGCGAG
17-
ACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCA
18-
ATACTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCAACCCAA
19-
ATGCAACCCCAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGGATACCATATTTCGGG
20-
CCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGTTTAATCTGTGGGTTGAGGCAGCTGGCCA
21-
ACGAAACGACTCAAGCTCTCCAACTGTTCCTGAGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGC
22-
AATTGACTTCCTGCTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAAC
2+
TCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGTTGGACTGAATCTCGAGGGGAA
3+
TGGAGTGGCAACTGACGTGCCATCTGTGACTAAAAGATGGGGCTTCAGGTCCGGTGTCCC
4+
ACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCTTGAAAT
5+
CAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCC
6+
CCGGTGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTT
7+
CCACAAAGAGGGTGCTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGG
8+
AACGACTTTCGCTGAAGGTGTCGTTGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTT
9+
CTTCAGCTCACACCCCTTGAGAGAGCCGGTCAATGCAACGGAGGACCCGTCGAGTGGCTA
10+
TTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACTAATGAGACAGAGTACTT
11+
GTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACAGTTTCT
12+
GCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAACT
13+
AATTTGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAAC
14+
TAAAAAAACCTCACTAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAAC
15+
GGACCCAAAAACATCAGTGGTCAGAGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAAC
16+
ACAACAAATGAAGACCACAAAATCATGGCTTCAGAAAATTCCTCTGCAATGGTTCAAGTG
17+
CACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCTTGCCACAATCTCCACG
18+
AGTCCTCAACCTCCCACAACCAAAACAGGTCCGGACAACAGCACCCGTAATACACCCGTG
19+
TATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAACATCACCGTAGAGCNGACAAC
20+
GNCNNNNNNGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAAAGCAGAG
21+
AACACCAACACGAGTAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCAA
22+
AACTACAGCGAGACTGCTGGCAACAACAACACTCATCACCAAGATACCGGAGAAGAGAGT
23+
GCCAGCAGCGGGAAGCTAGGCTTAATTACCAATACTATTGCTGGAGTAGCAGGACTGATC
24+
ACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCAACCCAAATGCAACCCC
25+
AATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGGATACCA
26+
TATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGT
27+
TTAATCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTG
28+
AGAGCCACAACTGAGCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTG
29+
CTGCAGCGATGGGGTGGCACATGCCACATTTTGGGACCGGACTGCTGTATCGAACCACAT
30+
GATTGGACCAAGAACAT

0 commit comments

Comments
 (0)