nextstrain · j23414 · Nov 19, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/ingest/Snakefile b/ingest/Snakefile
@@ -4,10 +4,13 @@ min_version(
     "7.7.0"
 )  # Snakemake 7.7.0 introduced `retries` directive used in fetch-sequences
 
-configfile: "defaults/config.yaml"
+# Utility functions shared across all workflows.
+include: "../shared/vendored/snakemake/config.smk"
 
-serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4']
+# Use default configuration values. Extend with Snakemake's --configfile/--config options.
+configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
 
+serotypes = ['all', 'denv1', 'denv2', 'denv3', 'denv4']
 
 rule all:
     input:
@@ -23,4 +26,10 @@ include: "rules/nextclade.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        # Relative custom rule paths in the config are relative to the analysis
+        # directory (i.e. the current working directory, or workdir, usually
+        # given by --directory), but the "include" directive treats relative
+        # paths as relative to the workflow (e.g. workflow.current_basedir).
+        # Convert to an absolute path based on the analysis/current directory
+        # to avoid this mismatch of expectations.
+        include: os.path.join(os.getcwd(), rule_file)
diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
@@ -33,7 +33,7 @@ ncbi_datasets_fields:
 curate:
   # The path to the local geolocation rules within the pathogen repo
   # The path should be relative to the ingest directory.
-  local_geolocation_rules: "defaults/geolocation-rules.tsv"
+  local_geolocation_rules: "geolocation-rules.tsv"
   # List of field names to change where the key is the original field name and the value is the new field name
   # The original field names should match the ncbi_datasets_fields provided above.
   # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
@@ -86,7 +86,7 @@ curate:
   abbr_authors_field: "authors"
   # Path to the manual annotations file
   # The path should be relative to the ingest directory
-  annotations: "defaults/annotations.tsv"
+  annotations: "annotations.tsv"
   # Serotype field name inferred from NCBI Genbank annotation
   serotype_field: "serotype_genbank"
   # The ID field in the metadata to use to merge the manual annotations

diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
@@ -29,9 +29,9 @@ def format_field_map(field_map: dict[str, str]) -> str:
 rule curate:
     input:
         sequences_ndjson="data/ncbi.ndjson",
-        geolocation_rules=config["curate"]["local_geolocation_rules"],
-        annotations=config["curate"]["annotations"],
-        manual_mapping="defaults/host_hostgenus_hosttype_map.tsv",
+        geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]),
+        annotations=resolve_config_path(config["curate"]["annotations"]),
+        manual_mapping=resolve_config_path("host_hostgenus_hosttype_map.tsv"),
     output:
         metadata="data/all_metadata_curated.tsv",
         sequences="results/sequences_all.fasta",
@@ -80,9 +80,9 @@ rule curate:
                 --abbr-authors-field {params.abbr_authors_field} \
             | augur curate apply-geolocation-rules \
                 --geolocation-rules {input.geolocation_rules} \
-            | ./scripts/infer-dengue-serotype.py \
+            | {workflow.basedir}/scripts/infer-dengue-serotype.py \
                 --out-col {params.serotype_field} \
-            | ./scripts/transform-new-fields \
+            | {workflow.basedir}/scripts/transform-new-fields \
                 --map-tsv {input.manual_mapping} \
                 --map-id host \
                 --metadata-id host \

diff --git a/nextstrain-pathogen.yaml b/nextstrain-pathogen.yaml
@@ -1,5 +1,16 @@
-# This is currently an empty file to indicate the top level pathogen repo.
-# The inclusion of this file allows the Nextstrain CLI to run the
-# `nextstrain build` from any directory regardless of runtime.
+# This file's *existence* marks the top level of a Nextstrain pathogen repo,
+# which allows `nextstrain build` to be run from any subdirectory of the repo
+# regardless of runtime.  For more details, see
+# <https://github.com/nextstrain/cli/releases/tag/8.2.0>.
 #
-# See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details.
+# This file's *contents* is the "registration metadata" for the pathogen repo,
+# used by `nextstrain setup` and `nextstrain run`.
+---
+$schema: https://nextstrain.org/schemas/pathogen/v0
+workflows:
+  ingest:
+    compatibility:
+      nextstrain run: True
+  phylogenetic:
+    compatibility:
+      nextstrain run: True
diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -1,4 +1,7 @@
-configfile: "defaults/config_dengue.yaml"
+# Utility functions shared across all workflows.
+include: "../shared/vendored/snakemake/config.smk"
+
+configfile: os.path.join(workflow.basedir, "defaults/config_dengue.yaml")
 
 include: "../shared/vendored/snakemake/config.smk"
 include: "rules/config.smk"
@@ -29,7 +32,7 @@ include: "rules/export.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
+        include: os.path.join(os.getcwd(), rule_file)
 
 rule clean:
     """Removing directories: {params}"""

diff --git a/phylogenetic/defaults/config_dengue.yaml b/phylogenetic/defaults/config_dengue.yaml
@@ -14,8 +14,8 @@ strain_id_field: "accession"
 display_strain_field: "strain"
 
 filter:
-  exclude: "defaults/exclude.txt"
-  include: "defaults/{serotype}/include.txt"
+  exclude: "exclude.txt"
+  include: "{serotype}/include.txt"
   group_by: "year region"
   min_length:
     genome: 5000
@@ -33,11 +33,11 @@ traits:
 
 clades:
   clade_definitions:
-    all: 'defaults/clades_serotypes.tsv'
-    denv1: 'defaults/clades_genotypes.tsv'
-    denv2: 'defaults/clades_genotypes.tsv'
-    denv3: 'defaults/clades_genotypes.tsv'
-    denv4: 'defaults/clades_genotypes.tsv'
+    all: 'clades_serotypes.tsv'
+    denv1: 'clades_genotypes.tsv'
+    denv2: 'clades_genotypes.tsv'
+    denv3: 'clades_genotypes.tsv'
+    denv4: 'clades_genotypes.tsv'
 
 tip_frequencies:
     min_date: "1980-01-01"
@@ -46,4 +46,4 @@ tip_frequencies:
     wide_bandwidth: 0.6
 
 export:
-  description: "defaults/description.md"
+  description: "description.md"
diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk
@@ -46,7 +46,7 @@ rule translate:
     input:
         tree = "results/{serotype}/{gene}/tree.nwk",
         node_data = "results/{serotype}/{gene}/nt-muts.json",
-        reference = lambda wildcard: "defaults/{serotype}/reference.gb" if wildcard.gene in ['genome'] else "results/defaults/reference_{serotype}_{gene}.gb"
+        reference = lambda wildcard: resolve_config_path("{serotype}/reference.gb") if wildcard.gene in ['genome'] else "results/defaults/reference_{serotype}_{gene}.gb"
     output:
         node_data = "results/{serotype}/{gene}/aa-muts.json"
     benchmark:
@@ -94,7 +94,7 @@ rule clades:
         tree = "results/{serotype}/genome/tree.nwk",
         nt_muts = "results/{serotype}/genome/nt-muts.json",
         aa_muts = "results/{serotype}/genome/aa-muts.json",
-        clade_defs = lambda wildcards: config['clades']['clade_definitions'][wildcards.serotype],
+        clade_defs = lambda wildcards: resolve_config_path(config['clades']['clade_definitions'][wildcards.serotype]),
     output:
         clades = "results/{serotype}/genome/clades.json"
     benchmark:

diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk
@@ -23,17 +23,17 @@ import json
 
 rule colors:
     input:
-        color_schemes = "defaults/color_schemes.tsv",
-        color_orderings = "defaults/color_orderings.tsv",
+        color_schemes = resolve_config_path("color_schemes.tsv"),
+        color_orderings = resolve_config_path("color_orderings.tsv"),
         metadata = "results/{serotype}/metadata.tsv",
-        manual_colors = "defaults/colors.tsv"
+        manual_colors = resolve_config_path("colors.tsv")
     output:
         colors = "results/{serotype}/colors.tsv"
     benchmark:
         "benchmarks/{serotype}/colors.txt"
     shell:
         """
-        python3 scripts/assign-colors.py \
+        python3 {workflow.basedir}/scripts/assign-colors.py \
             --color-schemes {input.color_schemes} \
             --ordering {input.color_orderings} \
             --metadata {input.metadata} \
@@ -180,7 +180,7 @@ rule export:
         clades = lambda wildcard: "results/{serotype}/{gene}/clades.json" if wildcard.gene in ['genome'] else [],
         nt_muts = "results/{serotype}/{gene}/nt-muts.json",
         aa_muts = "results/{serotype}/{gene}/aa-muts.json",
-        description = config["export"]["description"],
+        description = resolve_config_path(config["export"]["description"]),
         auspice_config = "results/defaults/{serotype}/{gene}/auspice_config.json",
         colors = "results/{serotype}/colors.tsv",
     output:

diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk
@@ -31,8 +31,8 @@ rule filter:
             if wildcard.gene in ['genome']
             else "results/{serotype}/{gene}/sequences.fasta"),
         metadata = "results/{serotype}/metadata.tsv",
-        exclude = config["filter"]["exclude"],
-        include = config["filter"]["include"],
+        exclude = resolve_config_path(config["filter"]["exclude"]),
+        include = resolve_config_path(config["filter"]["include"]),
     output:
         sequences = "results/{serotype}/{gene}/filtered.fasta"
     benchmark:

diff --git a/phylogenetic/rules/prepare_sequences_E.smk b/phylogenetic/rules/prepare_sequences_E.smk
@@ -20,7 +20,7 @@ rule generate_E_reference_files:
     Generating reference files for the E gene
     """
     input:
-        reference = "defaults/{serotype}/reference.gb",
+        reference = resolve_config_path("{serotype}/reference.gb"),
     output:
         fasta = "results/defaults/reference_{serotype}_E.fasta",
         genbank = "results/defaults/reference_{serotype}_E.gb",
@@ -30,7 +30,7 @@ rule generate_E_reference_files:
         gene = "E",
     shell:
         """
-        python3 scripts/newreference.py \
+        python3 {workflow.basedir}/scripts/newreference.py \
             --reference {input.reference} \
             --output-fasta {output.fasta} \
             --output-genbank {output.genbank} \