Skip to content

Run GenoFLU on curated GISAID data #43

Run GenoFLU on curated GISAID data

Run GenoFLU on curated GISAID data #43

name: Run GenoFLU on curated GISAID data
defaults:
run:
# This is the same as GitHub Action's `bash` keyword as of 20 June 2023:
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
#
# Completely spelling it out here so that GitHub can't change it out from under us
# and we don't have to refer to the docs to know the expected behavior.
shell: bash --noprofile --norc -eo pipefail {0}
on:
workflow_call:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string
# The workflow_dispatch will be triggered by seasonal-flu when a new curated dataset is available
# <https://github.com/nextstrain/seasonal-flu/blob/master/.github/workflows/upload.yaml>
workflow_dispatch:
inputs:
image:
description: 'Specific container image to use for ingest workflow (will override the default of "nextstrain build")'
required: false
type: string
trial_name:
description: |
Trial name for outputs.
If not set, outputs will overwrite files at s3://nextstrain-data/files/workflows/avian-flu/
If set, outputs will be uploaded to s3://nextstrain-data/files/workflows/avian-flu/trial/<trial_name>/
required: false
type: string
jobs:
ingest:
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
# Starting with the default docker runtime
# We can migrate to AWS Batch when/if we need to for more resources or if
# the job runs longer than the GH Action limit of 6 hours.
runtime: docker
run: |
declare -a config;
if [[ "$TRIAL_NAME" ]]; then
# Create JSON string for the nested upload config
S3_DST="s3://nextstrain-data-private/files/workflows/avian-flu/trial/$TRIAL_NAME"
config+=(
s3_dst=$(jq -cn --arg S3_DST "$S3_DST" '{"gisaid": $S3_DST}')
)
fi;
nextstrain build \
ingest \
--snakefile gisaid/Snakefile \
upload_all \
--config "${config[@]}"
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
TRIAL_NAME: ${{ inputs.trial_namee }}
# Explicitly excluding `ingest/gisaid/results` and `ingest/gisaid/data`
# since this is private data and should not available through the public artifacts
artifact-name: genoflu-gisaid
artifact-paths: |
ingest/.snakemake/log/
ingest/gisaid/logs/
ingest/gisaid/benchmarks/
!ingest/gisaid/results
!ingest/gisaid/data