Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ gene_coverage:
id_field: "seqName"
coverage:
cdsCoverage_field: "cdsCoverage"
genes: "p48,NTPase,p22,VPg,3CLpro,RdRp,VP1,VP2"
genes: "p48,NTPase,p22,VPg,3CLpro,RdRp,VP1,VP2,ORF1,ORF2,ORF3"
round_digits: 3

# Nextclade parameters to include if you are running Nextclade as a part of your ingest workflow
Expand Down
2 changes: 1 addition & 1 deletion phylogenetic/build-configs/ci/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# for the CI workflow to run with the example data.

# {group} represents different genotypes to be analyzed
groups: ['all']
groups: ['GII.2']
# {gene} represents the norovirus genes to focus on in each build
genes: ['genome', 'VP1']

Expand Down
13 changes: 13 additions & 0 deletions phylogenetic/defaults/GII.4/exclude.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
KX158286
JX846924
MK073894
KJ407072
NC_039475
KC597139
KJ196286
MH218591
KT589391
MG557657 # Low quality, may break all/VPg
KX158285 # Perhaps too diverged, may break GII.4/genome
OR951134 # False positive GII.4, remove after nextclade tuning
MF373609 # False positive GII.4, remove after nextclade tuning
6 changes: 5 additions & 1 deletion phylogenetic/defaults/all/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@
# {group} represents different genotypes to be analyzed
groups: ['all']
# {gene} represents the norovirus genes to focus on in each build
genes: ['3CLpro', 'NTPase', 'p22', 'p48', 'RdRp', 'VP1', 'VP2', 'VPg']
genes: ['genome', '3CLpro', 'NTPase', 'p22', 'p48', 'RdRp', 'VP1', 'VP2', 'VPg']

# Timetree still performs better than outgroup
# refine:
# outgroup: NC_027026_outgroup
4 changes: 4 additions & 0 deletions phylogenetic/defaults/all/reference.gff3
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ NC_039477_REF Genbank gene 3029 3571 . + . ID=gene-3CLpro;Name=3CLpro
NC_039477_REF Genbank CDS 3029 3571 . + . ID=cds-3CLpro;Parent=gene-3CLpro;Name=3CLpro
NC_039477_REF Genbank gene 3572 5101 . + . ID=gene-RdRp;Name=RdRp
NC_039477_REF Genbank CDS 3572 5101 . + . ID=cds-RdRp;Parent=gene-RdRp;Name=RdRp
NC_039477_REF Genbank gene 5085 6707 . + . ID=gene-ORF2;Name=ORF2
NC_039477_REF Genbank CDS 5085 6707 . + . ID=cds-ORF2;Parent=gene-ORF2;Name=ORF2
NC_039477_REF Genbank gene 5085 6707 . + . ID=gene-VP1;Name=VP1
NC_039477_REF Genbank CDS 5085 6707 . + . ID=cds-VP1;Parent=gene-VP1;Name=VP1
NC_039477_REF Genbank gene 6707 7513 . + . ID=gene-ORF3;Name=ORF3
NC_039477_REF Genbank CDS 6707 7513 . + . ID=cds-ORF3;Parent=gene-ORF3;Name=ORF3
NC_039477_REF Genbank gene 6707 7513 . + . ID=gene-VP2;Name=VP2
NC_039477_REF Genbank CDS 6707 7513 . + . ID=cds-VP2;Parent=gene-VP2;Name=VP2
23 changes: 12 additions & 11 deletions phylogenetic/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

# Define wildcards used for building trees in the workflow, also used in the intermediate files or auxillariary default file names.
# {group} represents different genotypes to be analyzed
groups: ['GII.6', 'GII.4', 'GII.2', 'GII.3', 'GII.17', 'all']
groups: ['GII.6', 'GII.4', 'GII.2', 'GII.3', 'GII.17']
# {gene} represents the norovirus genes to focus on in each build
genes: ['genome']
genes: ['ORF1', 'ORF2', 'ORF3', 'RdRp', 'VP1']

inputs:
- name: ncbi
Expand All @@ -20,20 +20,20 @@ strain_id_field: "accession"
reference: defaults/{group}/reference.gb

filter:
exclude: defaults/exclude.txt
exclude:
default: defaults/exclude.txt
GII.4: defaults/GII.4/exclude.txt
min_coverage: 0.8
filter_params: --group-by year VP1_nextclade RdRp_nextclade --sequences-per-group 30 --min-date 1950 --exclude-ambiguous-dates-by year --query-columns is_lab_host:str --exclude-where is_lab_host='true'

refine:
root:
default: best --timetree --date-confidence --clock-filter-iqd 4 --date-inference marginal
# Estimated from auspice build https://github.com/nextstrain/norovirus/issues/22#issuecomment-3221851561
all:
p48: DQ366347 --timetree --date-confidence --clock-filter-iqd 4 --date-inference marginal
VP2: best --timetree --date-confidence
genome: best --timetree --date-confidence
GII.4:
genome: mid_point --timetree --date-confidence
# Estimated from auspice build https://github.com/nextstrain/norovirus/issues/22#issuecomment-3221851561
clock_rate:
all:
genome: 0.00328
Expand All @@ -46,15 +46,16 @@ refine:
VP1: 0.00136
VP2: 0.00228
GII.2:
genome: 0.000211
RdRp: 0.000346
GII.3:
genome: 0.000688
RdRp: 0.00156
GII.4:
genome: 0.000874
RdRp: 0.00177
GII.6:
genome: 0.000135
RdRp: 0.00199
GII.17:
genome: 0.00142
RdRp: 0.00133


traits:
default: region country ORF1_type ORF2_type VP1_nextclade VP1_group VP1_type VP1_variant RdRp_nextclade RdRp_group RdRp_type RdRp_variant host
Expand Down
19 changes: 13 additions & 6 deletions phylogenetic/defaults/description.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
We gratefully acknowledge the authors, originating and submitting laboratories of the genetic sequences and metadata for sharing their work. Please note that although data generators have generously shared data in an open fashion, that does not mean there should be free license to publish on this data. Data generators should be cited where possible and collaborations should be sought in some circumstances. Please try to avoid scooping someone else's work. Reach out if uncertain.

**We maintain 14 views of norovirus evolution:**
**We provide divergence tree views of norovirus evolution across all samples:**

| group | genome | p48 | NTPase | p22 | VPg | 3CLpro | RdRp | VP1 | VP2 |
|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|
| all | [genome](https://nextstrain.org/norovirus/all/genome) | [p48](https://nextstrain.org/norovirus/all/p48) | [NTPase](https://nextstrain.org/norovirus/all/NTPase) | [p22](https://nextstrain.org/norovirus/all/p22) | [VPg](https://nextstrain.org/norovirus/all/VPg) | [3CLpro](https://nextstrain.org/norovirus/all/3CLpro) | [RdRp](https://nextstrain.org/norovirus/all/RdRp/) | [VP1](https://nextstrain.org/norovirus/all/VP1) | [VP2](https://nextstrain.org/norovirus/all/VP2) |
| GII.2 | [genome](https://nextstrain.org/norovirus/GII.2/genome) | | | | | | | | |
| GII.3 | [genome](https://nextstrain.org/norovirus/GII.3/genome) | | | | | | | | |
| GII.4 | [genome](https://nextstrain.org/norovirus/GII.4/genome) | | | | | | | | |
| GII.6 | [genome](https://nextstrain.org/norovirus/GII.6/genome) | | | | | | | | |
| GII.17 | [genome](https://nextstrain.org/norovirus/GII.17/genome) | | | | | | | | |

**We provide timetree views by genogroup:**

Since novovirus is known to have recombination breakpoints between ORF1-ORF2 (between the RdRp and VP1 genes) and less frequently between ORF2-ORF3, the treetime views are by breakpoint region.

| group | ORF1 | ORF2 | ORF3 | RdRp | VP1 |
|:--|:-- |:--|:--|:--|:--|
| GII.2 | [ORF1](https://nextstrain.org/norovirus/GII.2/ORF1/) | [ORF2](https://nextstrain.org/norovirus/GII.2/ORF2/) | [ORF3](https://nextstrain.org/norovirus/GII.2/ORF3/) | [RdRp](https://nextstrain.org/norovirus/GII.2/RdRp/) | [VP1](https://nextstrain.org/norovirus/GII.2/VP1/) |
| GII.3 | [ORF1](https://nextstrain.org/norovirus/GII.3/ORF1/) | [ORF2](https://nextstrain.org/norovirus/GII.3/ORF2/) | [ORF3](https://nextstrain.org/norovirus/GII.3/ORF3/) | [RdRp](https://nextstrain.org/norovirus/GII.3/RdRp/) | [VP1](https://nextstrain.org/norovirus/GII.3/VP1/) |
| GII.4 | [ORF1](https://nextstrain.org/norovirus/GII.4/ORF1/) | [ORF2](https://nextstrain.org/norovirus/GII.4/ORF2/) | [ORF3](https://nextstrain.org/norovirus/GII.4/ORF3/) | [RdRp](https://nextstrain.org/norovirus/GII.4/RdRp/) | [VP1](https://nextstrain.org/norovirus/GII.4/VP1/) |
| GII.6 | [ORF1](https://nextstrain.org/norovirus/GII.6/ORF1/) | [ORF2](https://nextstrain.org/norovirus/GII.6/ORF2/) | [ORF3](https://nextstrain.org/norovirus/GII.6/ORF3/) | [RdRp](https://nextstrain.org/norovirus/GII.6/RdRp/) | [VP1](https://nextstrain.org/norovirus/GII.6/VP1/) |
| GII.17 | [ORF1](https://nextstrain.org/norovirus/GII.17/ORF1/) | [ORF2](https://nextstrain.org/norovirus/GII.17/ORF2/) | [ORF3](https://nextstrain.org/norovirus/GII.17/ORF3/) | [RdRp](https://nextstrain.org/norovirus/GII.17/RdRp/) | [VP1](https://nextstrain.org/norovirus/GII.17/VP1/) |

#### Nextclade group, type, and variant assignment

Expand Down
2 changes: 2 additions & 0 deletions phylogenetic/defaults/outgroup.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>NC_027026_outgroup
gtgatcaccttgggatggcttccaagccattccaagttgagtcttgcgacttcatgtttgaagtccatgttctccacatgtgctacctcagggtagcaccgagagaacagtttcttgatgactgtactataagtgcagttaccaaatattatttgcaccacgtgcccctacataatagaaatggcatggactgcgagtttgaggctgaggggttgctgtcccggttattcgggacggccggttccccctcactggattcgcagtccgctttcaaggagcttttcggctttgacaccgatgagcaaatgccattgtcccttgaagaattggccaaattgcaaggcgaaatcaccgcctcgcttcaaatgaacaataaccaattcgttgctaaacatggtaaggccaaggtgcaggcccttttggaccagctcaacacactggtcccccgtgacatcagtgaggctgaaagacgccgtcgagaattctttgagaggcagactgctgctgccttcgctgaactcccaaatgatgacacatttaccgaacaagattggaagtcctattggtatgcaatgtggcggcgagttgtgggtggttgcaaatcatactaccatggtctgccccgttggagctctttcaagacacggctctcgcgtgccacagagcccctccgccaggtgttagcagttgctgcacagacctttgatcaatgtgtacaaactgaccctcgcatcttggctatgaattgtgtgactgccttgaaacccactgttttaaccatgatataccagcaacatcacaacacacccagtgggtggcttgcaacccttacggccctctgggaggttttccaaccttccctccctgcattaggcactttaggtactggtgttgccacaaccttggggctcgtcgtcaacaccttgtcacggttcttccagaaactttgtgcttacatttctgaaacatttcttcccgaatctccaacaaccccaggctgggttgccattgtggctggtgttctgctgttacttttgaaactctcctgcatccccaaagtgttttcccactggagcacgcttctcaagttggccagtggtatcacaacagtcattggagcaacacgagccgtggactggatcatggggaaaattcgcgacgcgcgccattcttccatgtgtaaacagttccttagccgtgtgtctgcactgttggagctccactactcaaagactgtcacaggtgttgcagagaacacagagctcctcaagtgtttcgaccagctcattgatgaaggagaagaattggtctctgaaataggaggtggttctttagccgcaataatacgcagtggtgttgacacactccagagggtttccacagaaatcaaagccacaatccaactggacaacccgcgtccagtgcccgtatgtgtgatattctccggacctccaggtattggaaaaacctctttagcataccacatggccaagggcattggtctcacctccaacttttctttggcaaacgatcatcatgatggatatacaggtaaccctgtagctatatgggatgaatatgatactgacaaggatggcaaatttgtggagcagatgatatccctggtcaacacacagccttgtgtcctcaattgtgaccgaccagaaaataagggcaagctttttacatccaagttcattttctgcactaccaattacacgaccagtgtgctcccagacaatccacgtgctggcgcattctaccgtcgagtcatcactgtagatgtgcgatctcctgaaattgaggattggatggcagcacaccctggtaggtcccctccgaaaactctcttcaaaagcgattgctcccacttgaaattaatggttaggccatatatggggtacaaccctgacggggatactctagctggcaaacgcgtaaagcccaccccaatcaccatagctgggcttcatgacatgattgacaaaaagtttgaggagcaatcaggggaggtccggggcatttggatcactgtgccccggcgtagtgtgcaaacagcccttgttgctgtcaagaagttttgtgtagcccatcaagcgttgtgccacgtcacatccactccttctcctgaaattctacagtgtgccactttctcgtgtgttgttgtgtcagatgcgcacccgccacctggtgctccacttttgcacatcaagaatgcacatcttgaagtagaccacactggccaagctgtcaccagcatttcagagagcctgcttggtatgtttattacagagcaacgtgtttcatccaagttgcaaagggatatcatgtacaaagtgtggtcaccttttacattaatgcagactgagccattgaatacacagtccctacccccagttaggcgcataatttatgctgacacgcccatggatttcattgggggtttaaggcaccaccttggctttagttcaattcctggtctgtggcgtgcccttaagcatctcccagacacgccctcaatgatagagtggatcactgaccacctttctcaagtgcagtttcccaacaaccccgaatcaaccctatttagaacaggcaatggtgatgtcatattctatacttatggttctttttatgctctaggcacttgtgcccgtgtgcctgtagttagtggtgacacagttagccctctcccaaatgtccccctaaaaatgacttggtttgaaacactgaaggccctttgctcgtcagcattgcgcttgttcacggccatctcaccatttgccatagccgttgctaatgtcacttatctcaccacccgaggatcacgtgaagaacaagccaaaggcaagacaaaacatggtcgtggcgcccgccatgcccgtgggcgatccacagcgcttaatgatgatgagtacaatgagtggatggacttgcgccgcgactggcgtgaagaaatgactgctgacgagttcttacacctaagggatgaagcttatgaaggtatcatcaatgaccgcacccagcggtacaacacctggcttaacctgcgcaacatgcgcttaggcgctggagcgtaccaacatgcaacaatcataggcaagggtggtgtgcgcaatgagatcatacgcacccaagtgctgaaggctcccaggaaaggaaaatggtcacacattgattcttctggccccatgaattattttgatgaggcaccgaccccactagtggaatttgagtgtgatggtgcccatgttgggtggggggtgcaccttgggaacgggcgcgtggtgacagtcacccatgtggccacatcatctaacactgtgaacgggctgccatttaaggtcaaggacaccgacggggagacctgccaggtttatgccactctaggtaacttgccacactaccagcttggggatggtgcgcctgtttactacacaaccaggtaccaccctgtgcttgtgattggtgaagggcaatttgacacacctacaacgactgtcaacggtttccacgttcgcattaccaattcatatccaacaaagaaaggtgattgtggactaccatacttcaatgcactgcgccaggtggttgcccttcatgctgctggcagtacagatgggtcaaccaagttggcgcaacgagttgctgagaaaccgcaaacaggggatgcattcgtctggaagggattgcccgttgttcgtggtagtgatgtcggcggactccccactggcactcgctaccatcgctccccagcttggccagagatgcgatctgacgagactcattcaccagctccttttggttctggtgacaagcggtatgaattctcacaagtggagatgcttgttaacaacttgagaccataccttgaggaagttccaggggtgcctccagcattacttaatagagctattgttcacacccgcaattacctacaatcaataattggcactgaacaaagtgaaccacttacttatgccatggcatcatcaatgttggaaaaaggcacctcctgtggtccccacattactggccttaagggtgattactgggatgatgaaacacaacagtacacagggtcacttcgagaacatctggaagcggtctggaacaaggctatgttagggacaccaccaagccacgattacaaattggcactcaaagatgagttaagaccaaatgagaaaaatgaacagggtaagaggcgcctgctgtggggtgctgatgctgggcttactctcgtgtgttgtgctgctctcaaaccagcagccgcacgactgcaaactgttgtgcctatgacacctgttgcagtcggcatcaacatggattctgcacacatagaagttatgaatgagtcccttaaggggcgtgtgctttatgcattagattatagcaagtgggacagcacccaatctgcagctgtgacagcagcctcacttgagatccttgcatccttcatgacaccaactccaattgtctcatcagctattgaggccctgaaggcacccgctagaggcatggtcaacgatgccatcttcatagctcgatccggtttgccatctggcatgccgttcacaagtgtggtcaactccatcaaccacatgctttacatctctgcagctattcttcaggcatacgaagcacataatctcccatatagtgacaatgtttttaacattgaaaccattcacacctatggtgatgattgcctgtacgggttcacccccgcaacagcctctctcatgcaggtaatcatagaaaacctacgctcatatggcctgaaacccacagcagctgataagggcgaaaccattgccccagtgcaaactcctgtttttctcaagcgcacgttcgcaacaacaccacatggactgcgcgctttgcttgacacctcttccattcttaggcagttcttctgggtgaaagcccaacgcacatgtgatgtctattctgctcctacaattgacaccaaatctcgggctgcgcagctcgaggtggcattggcctatgctagtcaacatggtcatgagttcttcaacaaagcacgtgaaatcgctgagaaaacaagtgcagctgaggggtatgttctggtgaataccaactacgaacaagcaactgcatgttacaacagctggtacataggaggcactacaccagaaatgcccgctaccaatgaaggctgcgggctaatagtgtttgagatggagggcaatggctccccacgaggaggtaaccagcctcaatcccacaatggtggcacttcacctgctcaggctgcaccgcctggcacgactggccctgcggaagcaccccttgtacctgttaatcctgaacagcccaattccattgcccaacgcatggagctggctgttgccacaggagcaacaacctcaaatgtccctgaatgtgtgcggaactgctttgctctccttcgtacgattccttggaattctcgacagccccagggatctcttctcacagctgtttctttacaccctgacatcaacccgtacacaaaacatcttgctcaaatgtttgccggatggggaggggcaatggatgtccgtgtcacgatctcaggctcgggcttatttgcagggaagcttgtgtgtgggatacttcctcctggagttaatcccactcttgtcagcgatccgggggttctgccacacgctttggtagatgcccgcgtgactgaaccagcatgcttcaacgtgcctgatgtccgcgccgtcgactaccatcgcactgacggcgatgaggcaactgcaacacttggcatttgggtgctccaaccactaatcaaccccttctccactgaagctgtttcaaccgcttggatttcaattgagaccaaaccaggtggtgattttgatctttgcttgatgaaacctcctaaccaagccatggataacggcacatctccatcgttcctcttgccacgccgtttacaacgttcaagaggcaaccgcgctggtggctatgcagttggaatggtcattgtgggctcagcacaccaggtgaaccgccacttcactgctcttggcacaacatttggttggtctaccgcaccttacgaacccatgcgatgtgcttttggtggagtacatcaagggcgtgacaccaacccaaaaattggctactactgggaggttggtgctgaccagcgtggcccgctttttccaaacattgttaatcattggcctgattttgcagtcaacaccaaatacacatggccagatgccgactatataccccatagtgcggttgttggaaccttggtttccttccaggacaatggagatgtttctgaggatcaagtggctactgcatttgcaatttcaatgaacactccaagtgggagcacaactggacgcggcacagtcagagaagcatttgatccatccacaatgcatttggtccgcaccaatggtaccactcaaccatctgggtggccaaccggctcaaacactggaaatggttacttcacaccaatgtggggtcatggtcagggtaatgcaattaatgataagatcaccaacatggagggggctaattatacatttggaggctctggccaaaacaacatagtcttgtgggttgaaagaatcttctcagaccatcctggtaaaaccacactttactcatcacaattggacagcactgcggccattttccagtcaggcccagtcaatatacctgagaacatgatggctgtctacaatgtcaccactaatggggctgactttcaagttggcatccgtcgtgatggctacatggtcacatctggaacaattggtactcagcaggagcttgaccctgacaccactttcacttatgttggacttttccccctttctgcctcattggttggcccacatgggaattctggacgggcccagatagcatggtcatgagctggttggtaggcactttgcaatctcttggtgggctcactgacgttgcgtccaccatttctggcatggtttatcaacatagacatcttgaccaactgaaaagacagaatgatctacaagaacagtggatggcccgcaatgaacaactgcagcgagatgcaatgcaattaactcaggatttggcagtcaatgccccagcgatgagagtgcaggccgctctcaatgctgggtttgatgtggttagtgcgcgccagcttgccggttcaactgagcgaaggatcaacggctatttggatcagccaattcgcactattgatcaggcaatggcagtgcagtctaggggcaatttgacttcgttgtcgaacgcccttgccacttaccaaaaaggcactcaatttggactcaaacaacccaaaggatttaagagcccaattgctgctgaacaatctcgtggtccaaccattacacttggccctcccccaccatctactaatctataaatcaatcttttataaatttgtgcaaatttctttttcttcctcatggtcgcacacgcgttcgggtgcgttgcagtcaattaagcgattgacgccatctttgg
48 changes: 42 additions & 6 deletions phylogenetic/rules/construct_phylogeny.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,41 @@ This part of the workflow usually includes the following steps:
See Augur's usage docs for these commands for more details.
"""

rule add_outgroup:
"""Add outgroup"""
input:
alignment = "results/{group}/{gene}/aligned.fasta",
outgroup = "defaults/outgroup.fasta",
output:
alignment_with_outgroup = "results/{group}/{gene}/aligned_with_outgroup.fasta",
log:
"logs/{group}/{gene}/add-outgroup.txt",
benchmark:
"benchmarks/{group}/{gene}/add-outgroup.txt",
shell:
"""
augur align \
--sequences {input.outgroup} \
--existing-alignment {input.alignment} \
--output {output.alignment_with_outgroup} \
2>&1 | tee {log}
"""

def _alignment(wildcards):
"""
Based on if outgroup rooting is specified in the config file, return the needed alignment file
"""
outgroup = config['refine'].get('outgroup', "")
if outgroup != "":
return "results/{group}/{gene}/aligned_with_outgroup.fasta"
else:
return "results/{group}/{gene}/aligned.fasta"


rule tree:
"""Building tree"""
input:
alignment = "results/{group}/{gene}/aligned.fasta"
alignment = lambda wildcards: _alignment(wildcards),
output:
tree = "results/{group}/{gene}/tree_raw.nwk"
benchmark:
Expand Down Expand Up @@ -52,20 +82,26 @@ def _clock_rate_params(wildcards):

else leave blank
"""
clock_rate = config['refine']['clock_rate'].get(wildcards.group, {}).get(wildcards.gene, "")
clock_rate = config['refine'].get('clock_rate', {}).get(wildcards.group, {}).get(wildcards.gene, "")
if clock_rate !="":
return f' --clock-rate {clock_rate} '
else:
return ""

def _root_params(wildcards):
outgroup = config['refine'].get('outgroup', '')
if outgroup !="":
return f'{outgroup} --remove-outgroup'
else:
return config['refine']['root'].get(wildcards.group, {}).get(wildcards.gene, config['refine']['root']['default']),

rule refine:
"""
Refining tree
"""
input:
tree = "results/{group}/{gene}/tree_raw.nwk",
alignment = "results/{group}/{gene}/aligned.fasta",
alignment = lambda wildcards: _alignment(wildcards),
metadata = "results/{group}/{gene}/filtered.tsv"
output:
tree = "results/{group}/{gene}/tree.nwk",
Expand All @@ -75,7 +111,7 @@ rule refine:
log:
"logs/{group}/{gene}/refine.txt",
params:
root = lambda wildcards: config['refine']['root'].get(wildcards.group, {}).get(wildcards.gene, config['refine']['root']['default']),
root = lambda wildcards: _root_params(wildcards),
clock_rate_params = lambda wildcards: _clock_rate_params(wildcards),
id_field = config['strain_id_field'],
shell:
Expand All @@ -90,6 +126,6 @@ rule refine:
--metadata-id-columns {params.id_field} \
--output-tree {output.tree:q} \
--output-node-data {output.node_data:q} \
--stochastic-resolve \
{params.clock_rate_params}
{params.clock_rate_params} \
--stochastic-resolve
"""
2 changes: 1 addition & 1 deletion phylogenetic/rules/prepare_sequences.smk
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rule filter:
input:
sequences = "results/sequences.fasta",
metadata = "results/metadata.tsv",
exclude = config['filter']['exclude']
exclude = lambda wildcards: config['filter']['exclude'].get(wildcards.group, config['filter']['exclude']['default']),
output:
sequences = "results/{group}/{gene}/filtered.fasta",
metadata = "results/{group}/{gene}/filtered.tsv",
Expand Down