From 0b235be363401bc1faf690de22042ecf711fcf77 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 20 Feb 2026 18:55:20 -0800 Subject: [PATCH] Set threads for augur subsample Take advantage of built-in parallelism. --- CHANGELOG.md | 2 ++ phylogenetic/rules/prepare_sequences.smk | 4 +++- phylogenetic/rules/prepare_sequences_N450.smk | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b86913..5c17629 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Changes for this project _do not_ currently follow the [Semantic Versioning rule Instead, changes appear below grouped by the date they were added to the workflow. ## 2026 +* TBD: Optimized thread usage for `augur subsample`. [#109][] * 25 February 2026: Changes to files referenced in `subsample` config will trigger a re-run of the rule. [#90][] * 25 February 2026: Changes to non-subsampling config will no longer trigger a re-run starting from subsampling. [#91][] * 09 February 2026: *MAJOR CHANGES* Switched ingest outputs to separate OPEN vs RESTRICTED files. @@ -38,6 +39,7 @@ Instead, changes appear below grouped by the date they were added to the workflo [#105]: https://github.com/nextstrain/measles/pull/105 [#107]: https://github.com/nextstrain/measles/pull/107 [#108]: https://github.com/nextstrain/measles/pull/108 +[#109]: https://github.com/nextstrain/measles/pull/109 [530da56]: https://github.com/nextstrain/measles/commit/530da568d8014c08e73f31065a8fa96e5c2d2f20 [1cf1299...0313508]: https://github.com/nextstrain/measles/compare/1cf1299e1658140d9317fc9063f1e06ef04a6ee1...03135085aed310f1cb0d3ecb2dca342e6ec8f51d [Pathoplexus]: https://pathoplexus.org diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 66d897b..7df84c2 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -3,7 +3,7 @@ This part of the workflow prepares sequences for constructing the phylogenetic t See Augur's usage docs for these commands for more details. """ -from augur.subsample import get_referenced_files +from augur.subsample import get_referenced_files, get_parallelism rule subsample: input: @@ -15,6 +15,7 @@ rule subsample: sequences = "results/genome/subsampled.fasta" params: strain_id = config["strain_id_field"] + threads: get_parallelism("results/genome/subsample_config.yaml", limit=workflow.cores) shell: """ augur subsample \ @@ -22,6 +23,7 @@ rule subsample: --sequences {input.sequences} \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ + --nthreads {threads} \ --output-sequences {output.sequences} \ """ diff --git a/phylogenetic/rules/prepare_sequences_N450.smk b/phylogenetic/rules/prepare_sequences_N450.smk index 3629983..ff33afd 100644 --- a/phylogenetic/rules/prepare_sequences_N450.smk +++ b/phylogenetic/rules/prepare_sequences_N450.smk @@ -3,7 +3,7 @@ This part of the workflow prepares sequences for constructing the phylogenetic t See Augur's usage docs for these commands for more details. """ -from augur.subsample import get_referenced_files +from augur.subsample import get_referenced_files, get_parallelism rule align_and_extract_N450: input: @@ -35,6 +35,7 @@ rule subsample_N450: sequences = "results/N450/aligned.fasta" params: strain_id = config["strain_id_field"] + threads: get_parallelism("results/N450/subsample_config.yaml", limit=workflow.cores) shell: """ augur subsample \ @@ -42,5 +43,6 @@ rule subsample_N450: --sequences {input.sequences} \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ + --nthreads {threads} \ --output-sequences {output.sequences} \ """