From 0b235be363401bc1faf690de22042ecf711fcf77 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Fri, 20 Feb 2026 18:55:20 -0800
Subject: [PATCH] Set threads for augur subsample

Take advantage of built-in parallelism.
---
 CHANGELOG.md                                  | 2 ++
 phylogenetic/rules/prepare_sequences.smk      | 4 +++-
 phylogenetic/rules/prepare_sequences_N450.smk | 4 +++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b86913..5c17629 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ Changes for this project _do not_ currently follow the [Semantic Versioning rule
 Instead, changes appear below grouped by the date they were added to the workflow.
 
 ## 2026
+* TBD: Optimized thread usage for `augur subsample`. [#109][]
 * 25 February 2026: Changes to files referenced in `subsample` config will trigger a re-run of the rule. [#90][]
 * 25 February 2026: Changes to non-subsampling config will no longer trigger a re-run starting from subsampling. [#91][]
 * 09 February 2026: *MAJOR CHANGES* Switched ingest outputs to separate OPEN vs RESTRICTED files.
@@ -38,6 +39,7 @@ Instead, changes appear below grouped by the date they were added to the workflo
 [#105]: https://github.com/nextstrain/measles/pull/105
 [#107]: https://github.com/nextstrain/measles/pull/107
 [#108]: https://github.com/nextstrain/measles/pull/108
+[#109]: https://github.com/nextstrain/measles/pull/109
 [530da56]: https://github.com/nextstrain/measles/commit/530da568d8014c08e73f31065a8fa96e5c2d2f20
 [1cf1299...0313508]: https://github.com/nextstrain/measles/compare/1cf1299e1658140d9317fc9063f1e06ef04a6ee1...03135085aed310f1cb0d3ecb2dca342e6ec8f51d
 [Pathoplexus]: https://pathoplexus.org
diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk
index 66d897b..7df84c2 100644
--- a/phylogenetic/rules/prepare_sequences.smk
+++ b/phylogenetic/rules/prepare_sequences.smk
@@ -3,7 +3,7 @@ This part of the workflow prepares sequences for constructing the phylogenetic t
 
 See Augur's usage docs for these commands for more details.
 """
-from augur.subsample import get_referenced_files
+from augur.subsample import get_referenced_files, get_parallelism
 
 rule subsample:
     input:
@@ -15,6 +15,7 @@ rule subsample:
         sequences = "results/genome/subsampled.fasta"
     params:
         strain_id = config["strain_id_field"]
+    threads: get_parallelism("results/genome/subsample_config.yaml", limit=workflow.cores)
     shell:
         """
         augur subsample \
@@ -22,6 +23,7 @@ rule subsample:
             --sequences {input.sequences} \
             --metadata {input.metadata} \
             --metadata-id-columns {params.strain_id} \
+            --nthreads {threads} \
             --output-sequences {output.sequences} \
         """
 
diff --git a/phylogenetic/rules/prepare_sequences_N450.smk b/phylogenetic/rules/prepare_sequences_N450.smk
index 3629983..ff33afd 100644
--- a/phylogenetic/rules/prepare_sequences_N450.smk
+++ b/phylogenetic/rules/prepare_sequences_N450.smk
@@ -3,7 +3,7 @@ This part of the workflow prepares sequences for constructing the phylogenetic t
 
 See Augur's usage docs for these commands for more details.
 """
-from augur.subsample import get_referenced_files
+from augur.subsample import get_referenced_files, get_parallelism
 
 rule align_and_extract_N450:
     input:
@@ -35,6 +35,7 @@ rule subsample_N450:
         sequences = "results/N450/aligned.fasta"
     params:
         strain_id = config["strain_id_field"]
+    threads: get_parallelism("results/N450/subsample_config.yaml", limit=workflow.cores)
     shell:
         """
         augur subsample \
@@ -42,5 +43,6 @@ rule subsample_N450:
             --sequences {input.sequences} \
             --metadata {input.metadata} \
             --metadata-id-columns {params.strain_id} \
+            --nthreads {threads} \
             --output-sequences {output.sequences} \
         """