Skip to content

Commit 1ab9e80

Browse files
authored
Merge pull request #56 from nextstrain/fix-species-ingest
Add species to ingest/nextstrain-automation
2 parents 87524d4 + f077ccb commit 1ab9e80

5 files changed

Lines changed: 30 additions & 22 deletions

File tree

.github/workflows/ingest-to-phylogenetic.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,22 +41,22 @@ jobs:
4141
permissions:
4242
id-token: write
4343
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
44-
secrets: inherit
44+
secrets: inherit
4545
with:
4646
runtime: docker
4747
env: |
4848
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
4949
run: |
5050
nextstrain build ingest \
5151
upload_all \
52-
--configfile build-configs/nextstrain-automation/config.yaml
52+
--configfile build-configs/nextstrain-automation/config.yaml
5353
# Specifying artifact name to differentiate ingest build outputs from the phylogenetic build outputs
5454
artifact-name: ingest-output
5555
artifact-paths: |
5656
ingest/benchmarks/
5757
ingest/logs/
5858
ingest/.snakemake/log/
59-
59+
6060
6161
# Check if ingest results include new data by checking for the cache
6262
# of the file with the results' Metadata.sh256sum (which should have been added within upload-to-s3)
@@ -74,7 +74,7 @@ jobs:
7474
AWS_DEFAULT_REGION: ${{ vars.AWS_DEFAULT_REGION }}
7575
run: |
7676
s3_urls=(
77-
"s3://nextstrain-data/files/workflows/ebola/metadata.tsv.zst"
77+
"s3://nextstrain-data/files/workflows/ebola/ebov/metadata.tsv.zst"
7878
)
7979
8080
# Code below is modified from ingest/upload-to-s3
@@ -102,7 +102,7 @@ jobs:
102102

103103
phylogenetic:
104104
needs: [check-new-data]
105-
if: ${{ needs.check-new-data.outputs.cache-hit != 'true' }}
105+
if: ${{ github.event_name == 'workflow_dispatch' || needs.check-new-data.outputs.cache-hit != 'true' }}
106106
permissions:
107107
id-token: write
108108
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
@@ -120,4 +120,4 @@ jobs:
120120
artifact-paths: |
121121
phylogenetic/benchmarks/
122122
phylogenetic/logs/
123-
phylogenetic/.snakemake/log/
123+
phylogenetic/.snakemake/log/

ingest/Snakefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ workdir: workflow.current_basedir
88
# Use default configuration values. Override with Snakemake's --configfile/--config options.
99
configfile: "defaults/config.yaml"
1010

11-
SPECIES = ['ebov', 'bdbv', 'sudv']
11+
SPECIES = config["species"]
1212

1313
# This is the default rule that Snakemake will run when there are no specified targets.
1414
# The default output of the ingest workflow is usually the curated metadata and sequences.
Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# This configuration file should contain all required configuration parameters
22
# for the ingest workflow to run with additional Nextstrain automation rules.
33

4+
# Only run ebov because the bdbv/sudv workflows are not set up to pull data from S3
5+
species:
6+
- ebov
7+
48
# Custom rules to run as part of the Nextstrain automated workflow
59
# The paths should be relative to the ingest directory.
610
custom_rules:
@@ -15,10 +19,10 @@ s3_dst: "s3://nextstrain-data/files/workflows/ebola"
1519

1620
# Mapping of files to upload
1721
files_to_upload:
18-
metadata.tsv.zst: results/metadata.tsv
19-
metadata_open.tsv.zst: results/metadata_open.tsv
20-
sequences.fasta.zst: results/sequences.fasta
21-
sequences_open.fasta.zst: results/sequences_open.fasta
22-
alignment.fasta.zst: results/alignment.fasta
23-
alignment_open.fasta.zst: results/alignment_open.fasta
24-
translations.zip: results/translations.zip # note - no 'open' translations yet
22+
ebov/metadata.tsv.zst: results/ebov/metadata.tsv
23+
ebov/metadata_open.tsv.zst: results/ebov/metadata_open.tsv
24+
ebov/sequences.fasta.zst: results/ebov/sequences.fasta
25+
ebov/sequences_open.fasta.zst: results/ebov/sequences_open.fasta
26+
ebov/alignment.fasta.zst: results/ebov/alignment.fasta
27+
ebov/alignment_open.fasta.zst: results/ebov/alignment_open.fasta
28+
ebov/translations.zip: results/ebov/translations.zip # note - no 'open' translations yet

ingest/defaults/config.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
#
44
# Define optional config parameters with their default values here so that users
55
# do not have to dig through the workflows to figure out the default values
6+
species:
7+
- ebov
8+
- bdbv
9+
- sudv
610

711
ppx_fetch:
812
ebov: # zaire

phylogenetic/all-outbreaks/Snakefile

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ Note: This does not conform to our modern config-based workflow.
55
"""
66

77
inputs = {
8-
"metadata": "s3://nextstrain-data/files/workflows/ebola/metadata.tsv.zst",
9-
"alignment": "s3://nextstrain-data/files/workflows/ebola/alignment.fasta.zst",
10-
"translations": "s3://nextstrain-data/files/workflows/ebola/translations.zip",
8+
"metadata": "s3://nextstrain-data/files/workflows/ebola/ebov/metadata.tsv.zst",
9+
"alignment": "s3://nextstrain-data/files/workflows/ebola/ebov/alignment.fasta.zst",
10+
"translations": "s3://nextstrain-data/files/workflows/ebola/ebov/translations.zip",
1111
} if config.get('s3', False) is True else {
12-
"metadata": "../ingest/results/metadata.tsv",
13-
"alignment": "../ingest/results/alignment.fasta",
14-
"translations": "../ingest/results/translations.zip",
12+
"metadata": "../ingest/results/ebov/metadata.tsv",
13+
"alignment": "../ingest/results/ebov/alignment.fasta",
14+
"translations": "../ingest/results/ebov/translations.zip",
1515
}
1616

1717
include: "../../shared/vendored/snakemake/remote_files.smk"
@@ -100,7 +100,7 @@ rule refine:
100100
shell:
101101
r"""
102102
exec &> >(tee {log:q})
103-
103+
104104
augur refine \
105105
--tree {input.tree:q} \
106106
--keep-root \
@@ -119,7 +119,7 @@ rule unzip_translations:
119119
exec &> >(tee {log:q})
120120
121121
mkdir -p results/translations
122-
122+
123123
unzip -o {input[0]} -d results/translations
124124
"""
125125

0 commit comments

Comments
 (0)