From 6ea24447281adbdc09106c9a96a17ed44e090ddb Mon Sep 17 00:00:00 2001
From: Jennifer Chang <jennifer.chang.bioinform@gmail.com>
Date: Wed, 19 Nov 2025 14:16:27 -0800
Subject: [PATCH 1/3] Register workflows in nextstrain-pathogen.yaml

---
 nextstrain-pathogen.yaml | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/nextstrain-pathogen.yaml b/nextstrain-pathogen.yaml
index b74c50d3..5186e369 100644
--- a/nextstrain-pathogen.yaml
+++ b/nextstrain-pathogen.yaml
@@ -1,5 +1,16 @@
-# This is currently an empty file to indicate the top level pathogen repo.
-# The inclusion of this file allows the Nextstrain CLI to run the
-# `nextstrain build` from any directory regardless of runtime.
+# This file's *existence* marks the top level of a Nextstrain pathogen repo,
+# which allows `nextstrain build` to be run from any subdirectory of the repo
+# regardless of runtime.  For more details, see
+# <https://github.com/nextstrain/cli/releases/tag/8.2.0>.
 #
-# See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details.
+# This file's *contents* is the "registration metadata" for the pathogen repo,
+# used by `nextstrain setup` and `nextstrain run`.
+---
+$schema: https://nextstrain.org/schemas/pathogen/v0
+workflows:
+  ingest:
+    compatibility:
+      nextstrain run: True
+  phylogenetic:
+    compatibility:
+      nextstrain run: True
\ No newline at end of file

From 66b36ae49b0ef23ea1670189b72d65b87c1abc7d Mon Sep 17 00:00:00 2001
From: Jennifer Chang <jennifer.chang.bioinform@gmail.com>
Date: Wed, 19 Nov 2025 14:26:25 -0800
Subject: [PATCH 2/3] Ingest: Support workflows as programs

---
 ingest/Snakefile            | 15 ++++++++++++---
 ingest/defaults/config.yaml |  4 ++--
 ingest/rules/curate.smk     | 10 +++++-----
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/ingest/Snakefile b/ingest/Snakefile
index ad7e0f7c..90bead2c 100644
--- a/ingest/Snakefile
+++ b/ingest/Snakefile
@@ -4,10 +4,13 @@ min_version(
     "7.7.0"
 )  # Snakemake 7.7.0 introduced `retries` directive used in fetch-sequences
 
-configfile: "defaults/config.yaml"
+# Utility functions shared across all workflows.
+include: "../shared/vendored/snakemake/config.smk"
 
-serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4']
+# Use default configuration values. Extend with Snakemake's --configfile/--config options.
+configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
 
+serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4']
 
 rule all:
     input:
@@ -23,4 +26,10 @@ include: "rules/nextclade.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        # Relative custom rule paths in the config are relative to the analysis
+        # directory (i.e. the current working directory, or workdir, usually
+        # given by --directory), but the "include" directive treats relative
+        # paths as relative to the workflow (e.g. workflow.current_basedir).
+        # Convert to an absolute path based on the analysis/current directory
+        # to avoid this mismatch of expectations.
+        include: os.path.join(os.getcwd(), rule_file)
diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
index a80563c7..1bb08b2d 100644
--- a/ingest/defaults/config.yaml
+++ b/ingest/defaults/config.yaml
@@ -33,7 +33,7 @@ ncbi_datasets_fields:
 curate:
   # The path to the local geolocation rules within the pathogen repo
   # The path should be relative to the ingest directory.
-  local_geolocation_rules: "defaults/geolocation-rules.tsv"
+  local_geolocation_rules: "geolocation-rules.tsv"
   # List of field names to change where the key is the original field name and the value is the new field name
   # The original field names should match the ncbi_datasets_fields provided above.
   # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
@@ -86,7 +86,7 @@ curate:
   abbr_authors_field: "authors"
   # Path to the manual annotations file
   # The path should be relative to the ingest directory
-  annotations: "defaults/annotations.tsv"
+  annotations: "annotations.tsv"
   # Serotype field name inferred from NCBI Genbank annotation
   serotype_field: "serotype_genbank"
   # The ID field in the metadata to use to merge the manual annotations
diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
index 7754de84..07af4995 100644
--- a/ingest/rules/curate.smk
+++ b/ingest/rules/curate.smk
@@ -29,9 +29,9 @@ def format_field_map(field_map: dict[str, str]) -> str:
 rule curate:
     input:
         sequences_ndjson="data/ncbi.ndjson",
-        geolocation_rules=config["curate"]["local_geolocation_rules"],
-        annotations=config["curate"]["annotations"],
-        manual_mapping="defaults/host_hostgenus_hosttype_map.tsv",
+        geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]),
+        annotations=resolve_config_path(config["curate"]["annotations"]),
+        manual_mapping=resolve_config_path("host_hostgenus_hosttype_map.tsv"),
     output:
         metadata="data/all_metadata_curated.tsv",
         sequences="results/sequences_all.fasta",
@@ -80,9 +80,9 @@ rule curate:
                 --abbr-authors-field {params.abbr_authors_field} \
             | augur curate apply-geolocation-rules \
                 --geolocation-rules {input.geolocation_rules} \
-            | ./scripts/infer-dengue-serotype.py \
+            | {workflow.basedir}/scripts/infer-dengue-serotype.py \
                 --out-col {params.serotype_field} \
-            | ./scripts/transform-new-fields \
+            | {workflow.basedir}/scripts/transform-new-fields \
                 --map-tsv {input.manual_mapping} \
                 --map-id host \
                 --metadata-id host \

From e43838f27a0d1fc5894911ac86d77005de72c1a0 Mon Sep 17 00:00:00 2001
From: Jennifer Chang <jennifer.chang.bioinform@gmail.com>
Date: Wed, 19 Nov 2025 14:37:08 -0800
Subject: [PATCH 3/3] Phylogenetic: Support workflows as programs

---
 phylogenetic/Snakefile                     |  7 +++++--
 phylogenetic/defaults/config_dengue.yaml   | 16 ++++++++--------
 phylogenetic/rules/annotate_phylogeny.smk  |  4 ++--
 phylogenetic/rules/export.smk              | 10 +++++-----
 phylogenetic/rules/prepare_sequences.smk   |  4 ++--
 phylogenetic/rules/prepare_sequences_E.smk |  4 ++--
 6 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
index 8b3743f2..f88ef4f1 100644
--- a/phylogenetic/Snakefile
+++ b/phylogenetic/Snakefile
@@ -1,4 +1,7 @@
-configfile: "defaults/config_dengue.yaml"
+# Utility functions shared across all workflows.
+include: "../shared/vendored/snakemake/config.smk"
+
+configfile: os.path.join(workflow.basedir, "defaults/config_dengue.yaml")
 
 include: "../shared/vendored/snakemake/config.smk"
 include: "rules/config.smk"
@@ -29,7 +32,7 @@ include: "rules/export.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        include: os.path.join(os.getcwd(), rule_file)
 
 rule clean:
     """Removing directories: {params}"""
diff --git a/phylogenetic/defaults/config_dengue.yaml b/phylogenetic/defaults/config_dengue.yaml
index f4f84717..d89f4832 100644
--- a/phylogenetic/defaults/config_dengue.yaml
+++ b/phylogenetic/defaults/config_dengue.yaml
@@ -14,8 +14,8 @@ strain_id_field: "accession"
 display_strain_field: "strain"
 
 filter:
-  exclude: "defaults/exclude.txt"
-  include: "defaults/{serotype}/include.txt"
+  exclude: "exclude.txt"
+  include: "{serotype}/include.txt"
   group_by: "year region"
   min_length:
     genome: 5000
@@ -33,11 +33,11 @@ traits:
 
 clades:
   clade_definitions:
-    all: 'defaults/clades_serotypes.tsv'
-    denv1: 'defaults/clades_genotypes.tsv'
-    denv2: 'defaults/clades_genotypes.tsv'
-    denv3: 'defaults/clades_genotypes.tsv'
-    denv4: 'defaults/clades_genotypes.tsv'
+    all: 'clades_serotypes.tsv'
+    denv1: 'clades_genotypes.tsv'
+    denv2: 'clades_genotypes.tsv'
+    denv3: 'clades_genotypes.tsv'
+    denv4: 'clades_genotypes.tsv'
 
 tip_frequencies:
     min_date: "1980-01-01"
@@ -46,4 +46,4 @@ tip_frequencies:
     wide_bandwidth: 0.6
 
 export:
-  description: "defaults/description.md"
+  description: "description.md"
diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk
index f21e0dc5..88405708 100644
--- a/phylogenetic/rules/annotate_phylogeny.smk
+++ b/phylogenetic/rules/annotate_phylogeny.smk
@@ -46,7 +46,7 @@ rule translate:
     input:
         tree = "results/{serotype}/{gene}/tree.nwk",
         node_data = "results/{serotype}/{gene}/nt-muts.json",
-        reference = lambda wildcard: "defaults/{serotype}/reference.gb" if wildcard.gene in ['genome'] else "results/defaults/reference_{serotype}_{gene}.gb"
+        reference = lambda wildcard: resolve_config_path("{serotype}/reference.gb") if wildcard.gene in ['genome'] else "results/defaults/reference_{serotype}_{gene}.gb"
     output:
         node_data = "results/{serotype}/{gene}/aa-muts.json"
     benchmark:
@@ -94,7 +94,7 @@ rule clades:
         tree = "results/{serotype}/genome/tree.nwk",
         nt_muts = "results/{serotype}/genome/nt-muts.json",
         aa_muts = "results/{serotype}/genome/aa-muts.json",
-        clade_defs = lambda wildcards: config['clades']['clade_definitions'][wildcards.serotype],
+        clade_defs = lambda wildcards: resolve_config_path(config['clades']['clade_definitions'][wildcards.serotype]),
     output:
         clades = "results/{serotype}/genome/clades.json"
     benchmark:
diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk
index d262d9b3..5109ab15 100644
--- a/phylogenetic/rules/export.smk
+++ b/phylogenetic/rules/export.smk
@@ -23,17 +23,17 @@ import json
 
 rule colors:
     input:
-        color_schemes = "defaults/color_schemes.tsv",
-        color_orderings = "defaults/color_orderings.tsv",
+        color_schemes = resolve_config_path("color_schemes.tsv"),
+        color_orderings = resolve_config_path("color_orderings.tsv"),
         metadata = "results/{serotype}/metadata.tsv",
-        manual_colors = "defaults/colors.tsv"
+        manual_colors = resolve_config_path("colors.tsv")
     output:
         colors = "results/{serotype}/colors.tsv"
     benchmark:
         "benchmarks/{serotype}/colors.txt"
     shell:
         """
-        python3 scripts/assign-colors.py \
+        python3 {workflow.basedir}/scripts/assign-colors.py \
             --color-schemes {input.color_schemes} \
             --ordering {input.color_orderings} \
             --metadata {input.metadata} \
@@ -180,7 +180,7 @@ rule export:
         clades = lambda wildcard: "results/{serotype}/{gene}/clades.json" if wildcard.gene in ['genome'] else [],
         nt_muts = "results/{serotype}/{gene}/nt-muts.json",
         aa_muts = "results/{serotype}/{gene}/aa-muts.json",
-        description = config["export"]["description"],
+        description = resolve_config_path(config["export"]["description"]),
         auspice_config = "results/defaults/{serotype}/{gene}/auspice_config.json",
         colors = "results/{serotype}/colors.tsv",
     output:
diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk
index 58966f18..0b5f5792 100644
--- a/phylogenetic/rules/prepare_sequences.smk
+++ b/phylogenetic/rules/prepare_sequences.smk
@@ -31,8 +31,8 @@ rule filter:
             if wildcard.gene in ['genome']
             else "results/{serotype}/{gene}/sequences.fasta"),
         metadata = "results/{serotype}/metadata.tsv",
-        exclude = config["filter"]["exclude"],
-        include = config["filter"]["include"],
+        exclude = resolve_config_path(config["filter"]["exclude"]),
+        include = resolve_config_path(config["filter"]["include"]),
     output:
         sequences = "results/{serotype}/{gene}/filtered.fasta"
     benchmark:
diff --git a/phylogenetic/rules/prepare_sequences_E.smk b/phylogenetic/rules/prepare_sequences_E.smk
index 0473d1e9..069d324e 100644
--- a/phylogenetic/rules/prepare_sequences_E.smk
+++ b/phylogenetic/rules/prepare_sequences_E.smk
@@ -20,7 +20,7 @@ rule generate_E_reference_files:
     Generating reference files for the E gene
     """
     input:
-        reference = "defaults/{serotype}/reference.gb",
+        reference = resolve_config_path("{serotype}/reference.gb"),
     output:
         fasta = "results/defaults/reference_{serotype}_E.fasta",
         genbank = "results/defaults/reference_{serotype}_E.gb",
@@ -30,7 +30,7 @@ rule generate_E_reference_files:
         gene = "E",
     shell:
         """
-        python3 scripts/newreference.py \
+        python3 {workflow.basedir}/scripts/newreference.py \
             --reference {input.reference} \
             --output-fasta {output.fasta} \
             --output-genbank {output.genbank} \