|
| 1 | +""" |
| 2 | +This part of the workflow handles running Nextclade on the curated metadata |
| 3 | +and sequences. |
| 4 | +
|
| 5 | +See Nextclade docs for more details on usage, inputs, and outputs if you would |
| 6 | +like to customize the rules |
| 7 | +""" |
| 8 | +DATASET_NAME = config["nextclade"]["dataset_name"] |
| 9 | + |
| 10 | + |
| 11 | +rule get_nextclade_dataset: |
| 12 | + """Download Nextclade dataset""" |
| 13 | + output: |
| 14 | + dataset=f"data/nextclade_data/{DATASET_NAME}.zip", |
| 15 | + params: |
| 16 | + dataset_name=DATASET_NAME |
| 17 | + shell: |
| 18 | + """ |
| 19 | + nextclade3 dataset get \ |
| 20 | + --name={params.dataset_name:q} \ |
| 21 | + --output-zip={output.dataset} \ |
| 22 | + --verbose |
| 23 | + """ |
| 24 | + |
| 25 | + |
| 26 | +rule run_nextclade: |
| 27 | + input: |
| 28 | + dataset=f"data/nextclade_data/{DATASET_NAME}.zip", |
| 29 | + sequences="results/sequences.fasta", |
| 30 | + output: |
| 31 | + nextclade="results/nextclade.tsv", |
| 32 | + alignment="results/alignment.fasta", |
| 33 | + translations="results/translations.zip", |
| 34 | + params: |
| 35 | + translations=lambda w: "results/translations/{cds}.fasta", |
| 36 | + shell: |
| 37 | + """ |
| 38 | + nextclade3 run \ |
| 39 | + {input.sequences} \ |
| 40 | + --input-dataset {input.dataset} \ |
| 41 | + --output-tsv {output.nextclade} \ |
| 42 | + --output-fasta {output.alignment} \ |
| 43 | + --output-translations {params.translations} |
| 44 | +
|
| 45 | + zip -rj {output.translations} results/translations |
| 46 | + """ |
| 47 | + |
| 48 | + |
| 49 | +rule join_metadata_and_nextclade: |
| 50 | + input: |
| 51 | + nextclade="results/nextclade.tsv", |
| 52 | + metadata="data/subset_metadata.tsv", |
| 53 | + nextclade_field_map=config["nextclade"]["field_map"], |
| 54 | + output: |
| 55 | + metadata="results/metadata.tsv", |
| 56 | + params: |
| 57 | + metadata_id_field=config["curate"]["output_id_field"], |
| 58 | + nextclade_id_field=config["nextclade"]["id_field"], |
| 59 | + shell: |
| 60 | + """ |
| 61 | + export SUBSET_FIELDS=`grep -v '^#' {input.nextclade_field_map} | awk '{{print $1}}' | tr '\n' ',' | sed 's/,$//g'` |
| 62 | +
|
| 63 | + csvtk -tl cut -f $SUBSET_FIELDS \ |
| 64 | + {input.nextclade} \ |
| 65 | + | csvtk -tl rename2 \ |
| 66 | + -F \ |
| 67 | + -f '*' \ |
| 68 | + -p '(.+)' \ |
| 69 | + -r '{{kv}}' \ |
| 70 | + -k {input.nextclade_field_map} \ |
| 71 | + | tsv-join -H \ |
| 72 | + --filter-file - \ |
| 73 | + --key-fields {params.nextclade_id_field} \ |
| 74 | + --data-fields {params.metadata_id_field} \ |
| 75 | + --append-fields '*' \ |
| 76 | + --write-all ? \ |
| 77 | + {input.metadata} \ |
| 78 | + | tsv-select -H --exclude {params.nextclade_id_field} \ |
| 79 | + > {output.metadata} |
| 80 | + """ |
0 commit comments