-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathconfig.yaml
More file actions
67 lines (54 loc) · 1.55 KB
/
config.yaml
File metadata and controls
67 lines (54 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
reference: "clade-i/reference.fasta"
genome_annotation: "clade-i/genome_annotation.gff3"
genbank_reference: "clade-i/reference.gb"
include: "clade-i/include.txt"
exclude: "exclude.txt"
clades: "clades.tsv"
lat_longs: "lat_longs.tsv"
color_ordering: "color_ordering.tsv"
color_scheme: "color_schemes.tsv"
auspice_config: "clade-i/auspice_config.json"
description: "description.md"
tree_mask: "clade-i/tree_mask.tsv"
# Use `accession` as the ID column since `strain` currently contains duplicates¹.
# ¹ https://github.com/nextstrain/mpox/issues/33
strain_id_field: "accession"
build_name: "clade-i"
auspice_name: "mpox_clade-I"
filter:
min_date: 1900
min_length: 170000
query: "(QC_rare_mutations == 'good' | QC_rare_mutations == 'mediocre')"
### Filter to only Clade I sequences
subsample:
everything: >-
--query 'clade in ["I", "Ia", "Ib"]'
## align
max_indel: 10000
seed_spacing: 1000
## treefix
fix_tree: true
treefix_root: "" # without a root we'll midpoint root which should work great for clade I
## refine
timetree: false # Disable as rates vary too much depending on clade/outbreak
root: "best"
# Clock rate chosen via treetime inference on Clade-I data excluding Clade-Ib seqs (n=73)
# TODO: update this once more public data is available.
clock_rate: 1.465e-06
clock_std_dev: 6.7e-07
divergence_units: "mutations"
traits:
columns:
- region
- country
sampling_bias_correction: 3
## recency
recency: true
mask:
from_beginning: 800
from_end: 6422
maskfile: "clade-i/mask.bed"
colors:
ignore_categories:
- division
- location