diff --git a/ingest/README.md b/ingest/README.md
index 883d299..ae60a59 100644
--- a/ingest/README.md
+++ b/ingest/README.md
@@ -1,7 +1,7 @@
 # Ingest
 
-This workflow ingests public data from NCBI and outputs curated metadata and
-sequences that can be used as input for the phylogenetic workflow.
+This workflow ingests public data from Pathoplexus and outputs curated metadata
+and sequences that can be used as input for the phylogenetic workflow.
 
 If you have another data source or private data that needs to be formatted for
 the phylogenetic workflow, then you can use a similar workflow to curate your
@@ -25,18 +25,6 @@ This produces the default outputs of the ingest workflow:
 - metadata      = results/metadata_all.tsv
 - sequences     = results/sequences_all.fasta
 
-### Dumping the full raw metadata from NCBI Datasets
-
-The workflow has a target for dumping the full raw metadata from NCBI Datasets.
-
-```
-nextstrain build ingest dump_ncbi_dataset_report
-```
-
-This will produce the file `ingest/data/ncbi_dataset_report_raw.tsv`,
-which you can inspect to determine what fields and data to use if you want to
-configure the workflow for your pathogen.
-
 ## Defaults
 
 The defaults directory contains all of the default configurations for the ingest workflow.
diff --git a/ingest/Snakefile b/ingest/Snakefile
index 090f793..95216fa 100644
--- a/ingest/Snakefile
+++ b/ingest/Snakefile
@@ -10,6 +10,8 @@ rule all:
     input:
         sequences="results/sequences.fasta",
         metadata="results/metadata.tsv",
+        sequences_open="results/sequences_open.fasta",
+        metadata_open="results/metadata_open.tsv",
 
 # Shared Snakemake files with generic functions are shared across pathogens
 include: "../shared/vendored/snakemake/config.smk"
@@ -18,7 +20,7 @@ include: "../shared/vendored/snakemake/config.smk"
 # If there are build-specific customizations, they should be added with the
 # custom_rules imported below to ensure that the core workflow is not complicated
 # by build-specific rules.
-include: "rules/fetch_from_ncbi.smk"
+include: "rules/fetch.smk"
 include: "rules/curate.smk"
 include: "rules/nextclade.smk"
 
@@ -35,4 +37,4 @@ include: "rules/nextclade.smk"
 if "custom_rules" in config:
     for rule_file in config["custom_rules"]:
 
-        include: rule_file
\ No newline at end of file
+        include: rule_file
diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv
index 45ab88a..d454abf 100644
--- a/ingest/defaults/annotations.tsv
+++ b/ingest/defaults/annotations.tsv
@@ -272,25 +272,25 @@ ON694341	institution	Centre for Biological Threats, Highly Pathogenic Viruses, R
 ON694342	institution	Centre for Biological Threats, Highly Pathogenic Viruses, Robert Koch Institute, Germany
 ON720848	institution	Microbial Genomics, Hospital General Universitario Gregorio Marañón, Madrid, Spain
 ON720849	institution	Microbial Genomics, Hospital General Universitario Gregorio Marañón, Madrid, Spain
-KT163243	date	1968-XX-XX
-AF260968	date	1951-XX-XX
-AF260968	region	Africa
-AF260968	country	Egypt
-AF260968	host	Homo sapians
-AF196835	host	Phoenicopterus chilensis
-AF196835	date	1999-XX-XX
-AY765264	date	1997-XX-XX
-AY765264	country	Czech Republic
-AY765264	region	Europe
-DQ318020	date	1972-XX-XX
-DQ318020	host	Culex tigripes
-D00246	country	Australia
-D00246	date	1960-XX-XX
-EF631122	date	XXXX-XX-XX
-EF631123	date	XXXX-XX-XX
-DQ116961	date	2004-XX-XX
-AY603654	date	1976-XX-XX
-AM404308	date	1971-XX-XX
-AF260968	date	1951-XX-XX
-AY660002	date	2003-XX-XX
-AY268132	date	2000-XX-XX
+PP_0001F2D	date	1968-XX-XX
+PP_000HJBT	date	1951-XX-XX
+PP_000HJBT	region	Africa
+PP_000HJBT	country	Egypt
+PP_000HJBT	host	Homo sapians
+PP_000HHL9	host	Phoenicopterus chilensis
+PP_000HHL9	date	1999-XX-XX
+PP_000HY01	date	1997-XX-XX
+PP_000HY01	country	Czech Republic
+PP_000HY01	region	Europe
+PP_000JBDU	date	1972-XX-XX
+PP_000JBDU	host	Culex tigripes
+PP_000HZ4S	country	Australia
+PP_000HZ4S	date	1960-XX-XX
+PP_000JSDD	date	XXXX-XX-XX
+PP_000JSEB	date	XXXX-XX-XX
+PP_000J96A	date	2004-XX-XX
+PP_000HXJZ	date	1976-XX-XX
+PP_000HQ6X	date	1971-XX-XX
+PP_000HJBT	date	1951-XX-XX
+PP_000HXRK	date	2003-XX-XX
+PP_000HRSP	date	2000-XX-XX
diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml
index 486503b..158b96a 100644
--- a/ingest/defaults/config.yaml
+++ b/ingest/defaults/config.yaml
@@ -4,70 +4,38 @@
 # Define optional config parameters with their default values here so that users
 # do not have to dig through the workflows to figure out the default values
 
-# Required to fetch from NCBI Datasets
-ncbi_taxon_id: "11082"
-
-# The list of NCBI Datasets fields to include from NCBI Datasets output
-# These need to be the "mnemonics" of the NCBI Datasets fields, see docs for full list of fields
-# https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/command-line/dataformat/tsv/dataformat_tsv_virus-genome/#fields
-# Note: the "accession" field MUST be provided to match with the sequences
-ncbi_datasets_fields:
-  - accession
-  - sourcedb
-  - isolate-lineage
-  - geo-region
-  - geo-location
-  - isolate-collection-date
-  - release-date
-  - update-date
-  - length
-  - host-name
-  - is-lab-host
-  - isolate-lineage-source
-  - bioprojects
-  - biosample-acc
-  - sra-accs
-  - submitter-names
-  - submitter-affiliation
+ppx_fetch:
+  seqs: https://lapis.pathoplexus.org/west-nile/sample/unalignedNucleotideSequences?versionStatus=LATEST_VERSION
+  meta: https://lapis.pathoplexus.org/west-nile/sample/details?dataFormat=csv&versionStatus=LATEST_VERSION
 
 # Config parameters related to the curate pipeline
 curate:
   # The path to the local geolocation rules within the pathogen repo
   # The path should be relative to the ingest directory.
   local_geolocation_rules: "defaults/geolocation-rules.tsv"
-  # The original field names should match the ncbi_datasets_fields provided above.
   # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
   field_map:
-    accession: accession
-    accession_version: accession_version
-    sourcedb: database
-    isolate-lineage: strain
-    geo-region: region
-    geo-location: location
-    isolate-collection-date: date
-    release-date: date_released
-    update-date: date_updated
-    length: length
-    host-name: host
-    is-lab-host: is_lab_host
-    isolate-lineage-source: sample_type
-    biosample-acc: biosample_accessions
-    sra-accs: sra_accessions
-    submitter-names: full_authors
-    submitter-affiliation: institution
-  # Standardized strain name regex
-  # Currently accepts any characters because we do not have a clear standard for strain names across pathogens
-  strain_regex: "^.+$"
-  # Back up strain name field to use if "strain" doesn"t match regex above
-  strain_backup_fields: ["accession"]
+    accessionVersion: PPX_accession
+    insdcAccessionFull: INSDC_accession
+    insdcRawReadsAccession: sra_accession
+    displayName: strain
+    geoLocCountry: country
+    geoLocAdmin1: division
+    geoLocAdmin2: location
+    sampleCollectionDate: date
+    earliestReleaseDate: date_submitted
+    hostNameCommon: host
+    isLabHost: is_lab_host
+    dataUseTermsRestrictedUntil: restrictedUntil
+    dataUseTermsUrl: dataUseTerms__url
+    authors: full_authors
+    authorAffiliations: institution
   # List of date fields to standardize to ISO format YYYY-MM-DD
-  date_fields: ["date", "date_released", "date_updated"]
+  date_fields: ["date", "date_submitted"]
   # List of expected date formats that are present in the date fields provided above
   # These date formats should use directives expected by datetime
   # See https://docs.python.org/3.9/library/datetime.html#strftime-and-strptime-format-codes
   expected_date_formats: ["%Y", "%Y-%m", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ"]
-  # The expected field that contains the GenBank geo_loc_name
-  genbank_location_field: location
   titlecase:
     # List of string fields to titlecase
     fields: ["region", "country", "division", "location"]
@@ -93,16 +61,19 @@ curate:
   output_id_field: "accession"
   # The field in the NDJSON record that contains the actual genomic sequence
   output_sequence_field: "sequence"
-  # The field in the NDJSON record that contains the actual GenBank accession
-  genbank_accession: 'accession'
+  # The field in the NDJSON record that contains the actual Pathoplexus accession
+  pathoplexus_accession: 'PPX_accession'
+  # The field in the NDJSON record that contains the actual INSDC accession
+  insdc_accession: 'INSDC_accession'
 
   # The list of metadata columns to keep in the final output of the curation pipeline.
   metadata_columns: [
     'accession',
-    #'genbank_accession_rev',
+    'PPX_accession',
+    'PPX_accession__url',
+    'INSDC_accession',
+    'INSDC_accession__url',
     #'strain',
-    #'strain_s',
-    #'viruslineage_ids',
     'date',
     #'updated',
     'region',
@@ -116,15 +87,11 @@ curate:
     'is_lab_host',
     #'date_submitted',
     #'sra_accession',
-    #'full_authors',
-    #'reverse',
     'authors',
-    #'institution',
-    #'title',
-    #'journal',
-    #'publications',
-    #'paper_url',
-    'url',
+    'institution',
+    'dataUseTerms',
+    'dataUseTerms__url',
+    'restrictedUntil',
     'length',
   ]
 
@@ -135,5 +102,72 @@ nextclade:
 
 pathoplexus:
   URL: 'https://lapis.pathoplexus.org/west-nile/sample/details'
-  fields: 'insdcAccessionBase,lineage'
-  accession_field: 'insdcAccessionBase'
+  fields: 'accession,lineage'
+  accession_field: 'accession'
+
+ppx_metadata_fields:
+ - "accessionVersion"
+ - "accession"
+ - "version"
+ - "submitter"
+ - "groupName"
+ - "submittedDate"
+ - "releasedDate"
+ - "dataUseTerms"
+ - "dataUseTermsRestrictedUntil"
+ - "dataUseTermsUrl"
+ - "assemblyReferenceGenomeAccession"
+ - "authorAffiliations"
+ - "authors"
+ - "bioprojectAccession"
+ - "biosampleAccession"
+ - "completeness"
+ - "displayName"
+ - "earliestReleaseDate"
+ - "frameShifts"
+ - "geoLocAdmin1"
+ - "geoLocAdmin2"
+ - "geoLocCity"
+ - "geoLocCountry"
+ - "geoLocLatitude"
+ - "geoLocLongitude"
+ - "geoLocSite"
+ - "hostAge"
+ - "hostAgeBin"
+ - "hostDisease"
+ - "hostGender"
+ - "hostHealthOutcome"
+ - "hostHealthState"
+ - "hostNameCommon"
+ - "hostOriginCountry"
+ - "hostVaccinationStatus"
+ - "insdcAccessionBase"
+ - "insdcAccessionFull"
+ - "insdcRawReadsAccession"
+ - "insdcVersion"
+ - "isLabHost"
+ - "length"
+ - "ncbiReleaseDate"
+ - "ncbiSourceDb"
+ - "ncbiSubmitterCountry"
+ - "ncbiUpdateDate"
+ - "ncbiVirusName"
+ - "ncbiVirusTaxId"
+ - "purposeOfSampling"
+ - "purposeOfSequencing"
+ - "qualityControlDetails"
+ - "qualityControlDetermination"
+ - "qualityControlIssues"
+ - "qualityControlMethodName"
+ - "qualityControlMethodVersion"
+ - "sampleCollectionDate"
+ - "sampleCollectionDateRangeLower"
+ - "sampleCollectionDateRangeUpper"
+ - "sampleType"
+ - "totalAmbiguousNucs"
+ - "totalDeletedNucs"
+ - "totalFrameShifts"
+ - "totalInsertedNucs"
+ - "totalSnps"
+ - "totalUnknownNucs"
+ - "travelHistory"
diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk
index a3f6ba3..361771b 100644
--- a/ingest/rules/curate.smk
+++ b/ingest/rules/curate.smk
@@ -1,15 +1,15 @@
 """
-This part of the workflow handles transforming the data into standardized
-formats and expects input file
+This part of the workflow handles the curation of data from Pathoplexus
 
-    sequences_ndjson = "data/sequences_{serotype}.ndjson"
+REQUIRED INPUTS:
 
-This will produce output files as
+    sequences_ndjson = data/sequences.ndjson
 
-    metadata = "results/metadata_{serotype}.tsv"
-    sequences = "results/sequences_{serotype}.fasta"
+OUTPUTS:
+
+    metadata         = data/subset_metadata.tsv
+    sequences        = results/sequences.fasta
 
-Parameters are expected to be defined in `config.curate`.
 """
 
 
@@ -21,7 +21,7 @@ def format_field_map(field_map: dict[str, str]) -> str:
 
 rule curate:
     input:
-        sequences_ndjson="data/genbank.ndjson",
+        sequences_ndjson="data/sequences.ndjson",
         geolocation_rules=config["curate"]["local_geolocation_rules"],
         annotations=config["curate"]["annotations"],
         manual_mapping="defaults/host_hostgenus_hosttype_map.tsv",
@@ -34,11 +34,8 @@ rule curate:
         "benchmarks/curate.txt",
     params:
         field_map=format_field_map(config["curate"]["field_map"]),
-        strain_regex=config["curate"]["strain_regex"],
-        strain_backup_fields=config["curate"]["strain_backup_fields"],
         date_fields=config["curate"]["date_fields"],
         expected_date_formats=config["curate"]["expected_date_formats"],
-        genbank_location_field=config["curate"]["genbank_location_field"],
         articles=config["curate"]["titlecase"]["articles"],
         abbreviations=config["curate"]["titlecase"]["abbreviations"],
         titlecase_fields=config["curate"]["titlecase"]["fields"],
@@ -54,14 +51,9 @@ rule curate:
             | augur curate rename \
                 --field-map {params.field_map} \
             | augur curate normalize-strings \
-            | augur curate transform-strain-name \
-                --strain-regex {params.strain_regex} \
-                --backup-fields {params.strain_backup_fields} \
             | augur curate format-dates \
                 --date-fields {params.date_fields} \
                 --expected-date-formats {params.expected_date_formats} \
-            | augur curate parse-genbank-location \
-                --location-field {params.genbank_location_field} \
             | augur curate titlecase \
                 --titlecase-fields {params.titlecase_fields} \
                 --articles {params.articles} \
@@ -88,23 +80,34 @@ rule curate:
                 --output-id-field {params.id_field} \
                 --output-seq-field {params.sequence_field} ) 2>> {log}
         """
-rule add_metadata_columns:
+rule add_accession_urls:
     """Add columns to metadata
     Notable columns:
-    - [NEW] url: URL linking to the NCBI GenBank record ('https://www.ncbi.nlm.nih.gov/nuccore/*').
+    - PPX_accession__url: URL linking to the Pathoplexus record.
+    - INSDC_accession__url: URL linking to the NCBI GenBank record.
+    - url: URL linking to the NCBI GenBank record (kept for backwards compatibility).
     """
     input:
         metadata = "data/all_metadata.tsv"
     output:
         metadata = temp("data/all_metadata_added.tsv")
     params:
-        accession=config['curate']['genbank_accession']
+        pathoplexus_accession=config['curate']['pathoplexus_accession'],
+        pathoplexus_accession_url=config['curate']['pathoplexus_accession'] + "__url",
+        insdc_accession=config['curate']['insdc_accession'],
+        insdc_accession_url=config['curate']['insdc_accession'] + "__url",
     shell:
         """
-        csvtk mutate2 -t \
-          -n url \
-          -e '"https://www.ncbi.nlm.nih.gov/nuccore/" + ${params.accession}' \
-          {input.metadata} \
+        cat {input.metadata} \
+            | csvtk mutate2 -t \
+                -n {params.pathoplexus_accession_url} \
+                -e '"https://pathoplexus.org/seq/" + ${params.pathoplexus_accession}' \
+            | csvtk mutate2 -t \
+                -n {params.insdc_accession_url} \
+                -e '"https://www.ncbi.nlm.nih.gov/nuccore/" + ${params.insdc_accession}' \
+            | csvtk mutate2 -t \
+                -n url \
+                -e '"https://www.ncbi.nlm.nih.gov/nuccore/" + ${params.insdc_accession}' \
         > {output.metadata}
         """
 
@@ -121,6 +124,30 @@ rule subset_metadata:
             {input.metadata} > {output.metadata}
         """
 
+rule extract_open_data:
+    input:
+        metadata = "results/metadata.tsv",
+        sequences = "results/sequences.fasta"
+    output:
+        metadata = "results/metadata_open.tsv",
+        sequences = "results/sequences_open.fasta"
+    benchmark:
+        "benchmarks/extract_open_data.txt"
+    log:
+        "logs/extract_open_data.txt"
+    shell:
+        r"""
+        exec &> >(tee {log:q})
+
+        augur filter \
+            --metadata {input.metadata:q} \
+            --sequences {input.sequences:q} \
+            --metadata-id-columns accession \
+            --exclude-where "dataUseTerms=RESTRICTED" \
+            --output-metadata {output.metadata:q} \
+            --output-sequences {output.sequences:q}
+        """
+
 rule compress:
     input:
         file="{a_file}",
diff --git a/ingest/rules/fetch.smk b/ingest/rules/fetch.smk
new file mode 100644
index 0000000..5d9ce5a
--- /dev/null
+++ b/ingest/rules/fetch.smk
@@ -0,0 +1,68 @@
+"""
+This part of the workflow handles fetching sequences and metadata from Pathoplexus.
+
+REQUIRED INPUTS:
+
+    None
+
+OUTPUTS:
+
+    ndjson = data/sequences.ndjson
+
+"""
+workflow.global_resources.setdefault("concurrent_deploys", 2)
+
+rule download_ppx_seqs:
+    output:
+        sequences= "data/ppx_sequences.fasta",
+    params:
+        sequences_url=config["ppx_fetch"]["seqs"],
+    # Allow retries in case of network errors
+    retries: 5
+    benchmark:
+        "benchmarks/download_ppx_seqs.txt"
+    log:
+        "logs/download_ppx_seqs.txt"
+    shell:
+        """
+        curl {params.sequences_url} -o {output.sequences}
+        """
+
+rule download_ppx_meta:
+    output:
+        metadata= "data/ppx_metadata.csv"
+    params:
+        metadata_url=config["ppx_fetch"]["meta"],
+        fields = ",".join(config["ppx_metadata_fields"])
+    # Allow retries in case of network errors
+    retries: 5
+    benchmark:
+        "benchmarks/download_ppx_meta.txt"
+    log:
+        "logs/download_ppx_meta.txt"
+    shell:
+        """
+        curl '{params.metadata_url}&fields={params.fields}' -o {output.metadata}
+        """
+
+rule format_ppx_ndjson:
+    input:
+        sequences = "data/ppx_sequences.fasta",
+        metadata = "data/ppx_metadata.csv",
+    output:
+        ndjson = "data/sequences.ndjson",
+    log:
+        "logs/format_ppx_ndjson.txt"
+    benchmark:
+        "benchmarks/format_ppx_ndjson.txt"
+    shell:
+        """
+        augur curate passthru \
+            --metadata {input.metadata} \
+            --fasta {input.sequences} \
+            --seq-id-column accessionVersion \
+            --seq-field sequence \
+            --unmatched-reporting warn \
+            --duplicate-reporting warn \
+            2> {log} > {output.ndjson}
+        """
diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk
deleted file mode 100644
index d1d4b90..0000000
--- a/ingest/rules/fetch_from_ncbi.smk
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-This part of the workflow handles fetching sequences from various sources.
-Uses `config.sources` to determine which sequences to include in final output.
-
-Currently only fetches sequences from GenBank, but other sources can be
-defined in the config. If adding other sources, add a new rule upstream
-of rule `fetch_all_sequences` to create the file `data/{source}.ndjson` or the
-file must exist as a static file in the repo.
-
-Produces final output as
-
-    sequences_ndjson = "data/sequences.ndjson"
-
-"""
-workflow.global_resources.setdefault("concurrent_deploys", 2)
-
-rule fetch_ncbi_dataset_package:
-    output:
-        dataset_package = temp("data/ncbi_dataset.zip")
-    retries: 5 # Requires snakemake 7.7.0 or later
-    log:
-        "logs/fetch_ncbi_dataset_package.txt"
-    benchmark:
-        "benchmarks/fetch_ncbi_dataset_package.txt"
-    params:
-        ncbi_taxon_id = config["ncbi_taxon_id"]
-    shell:
-        """
-        datasets download virus genome taxon {params.ncbi_taxon_id} \
-            --no-progressbar \
-            --filename {output.dataset_package} 2>&1 | tee {log}
-        """
-
-# Note: This rule is not part of the default workflow!
-# It is intended to be used as a specific target for users to be able
-# to inspect and explore the full raw metadata from NCBI Datasets.
-rule dump_ncbi_dataset_report:
-    input:
-        dataset_package="data/ncbi_dataset.zip",
-    output:
-        ncbi_dataset_tsv="data/ncbi_dataset_report_raw.tsv",
-    shell:
-        """
-        dataformat tsv virus-genome \
-            --package {input.dataset_package} > {output.ncbi_dataset_tsv}
-        """
-
-rule extract_ncbi_dataset_sequences:
-    input:
-        dataset_package = "data/ncbi_dataset.zip"
-    output:
-        ncbi_dataset_sequences = temp("data/ncbi_dataset_sequences.fasta")
-    benchmark:
-        "benchmarks/extract_ncbi_dataset_sequences.txt"
-    shell:
-        """
-        unzip -jp {input.dataset_package} \
-            ncbi_dataset/data/genomic.fna > {output.ncbi_dataset_sequences}
-        """
-
-rule format_ncbi_dataset_report:
-    input:
-        dataset_package = "data/ncbi_dataset.zip",
-    output:
-        ncbi_dataset_tsv = temp("data/ncbi_dataset_report.tsv")
-    params:
-        ncbi_dataset_fields = ",".join(config["ncbi_datasets_fields"]),
-    benchmark:
-        "benchmarks/format_ncbi_dataset_report.txt"
-    shell:
-        """
-        dataformat tsv virus-genome \
-            --package {input.dataset_package} \
-            --fields {params.ncbi_dataset_fields:q} \
-            --elide-header \
-            | csvtk fix-quotes -Ht \
-            | csvtk add-header -t -n {params.ncbi_dataset_fields} \
-            | csvtk rename -t -f accession -n accession_version \
-            | csvtk -t mutate -f accession_version -n accession -p "^(.+?)\." --at 1 \
-            > {output.ncbi_dataset_tsv}
-        """
-
-
-rule format_ncbi_datasets_ndjson:
-    input:
-        ncbi_dataset_sequences = "data/ncbi_dataset_sequences.fasta",
-        ncbi_dataset_tsv = "data/ncbi_dataset_report.tsv",
-    output:
-        ndjson = "data/genbank.ndjson",
-    log:
-        "logs/format_ncbi_datasets_ndjson.txt"
-    benchmark:
-        "benchmarks/format_ncbi_datasets_ndjson.txt"
-    shell:
-        """
-        augur curate passthru \
-            --metadata {input.ncbi_dataset_tsv} \
-            --fasta {input.ncbi_dataset_sequences} \
-            --seq-id-column accession_version \
-            --seq-field sequence \
-            --unmatched-reporting warn \
-            --duplicate-reporting warn \
-            2> {log} > {output.ndjson}
-        """
diff --git a/ingest/rules/nextclade.smk b/ingest/rules/nextclade.smk
index 982e32c..7f6a308 100644
--- a/ingest/rules/nextclade.smk
+++ b/ingest/rules/nextclade.smk
@@ -3,16 +3,17 @@ This part of the workflow handles running Nextclade on the curated metadata
 and sequences.
 REQUIRED INPUTS:
     metadata    = data/subset_metadata.tsv
-    sequences   = data/sequences_all.fasta
-    nextclade_datasets = ../nextclade/dataset
+    sequences   = results/sequences.fasta
+    dataset     = (from config)
 OUTPUTS:
-    metadata        = data/metadata_all.tsv
-    nextclade       = data/nextclade_clades.tsv
+    metadata    = results/metadata.tsv
 See Nextclade docs for more details on usage, inputs, and outputs if you would
 like to customize the rules:
 https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
 """
 
+# TODO: This separate fetch should not be necessary - 'lineage' can be added
+# to data/subset_metadata.tsv.
 rule pathoplexus_classify:
     """
     Pulls global lineage calls from Pathoplexus API
@@ -26,7 +27,7 @@ rule pathoplexus_classify:
         id_field=config["curate"]["output_id_field"],
     shell:
         r"""
-        curl "{params.URL}?dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
+        curl "{params.URL}?versionStatus=LATEST_VERSION&dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
         | tsv-filter -H --not-empty {params.accession_field} \
         | uniq \
         | csvtk -t rename -f {params.accession_field} -n {params.id_field} \
diff --git a/phylogenetic/defaults/all-lineages/auspice_config.json b/phylogenetic/defaults/all-lineages/auspice_config.json
index d406200..269c6e4 100644
--- a/phylogenetic/defaults/all-lineages/auspice_config.json
+++ b/phylogenetic/defaults/all-lineages/auspice_config.json
@@ -1,6 +1,10 @@
 {
   "title": "Genomic epidemiology of West Nile Virus",
   "data_provenance": [
+    {
+      "name": "Pathoplexus",
+      "url": "https://pathoplexus.org"
+    },
     {
       "name": "GenBank",
       "url": "https://www.ncbi.nlm.nih.gov/genbank/"
@@ -16,6 +20,7 @@
     {"key": "lineage", "title": "Lineage", "type": "categorical"},
     {"key": "clade_membership", "title": "Clade", "type": "categorical"},
     {"key": "author", "title": "Authors", "type": "categorical"},
+    {"key": "dataUseTerms", "title": "Data use terms", "type": "categorical"},
     {"key": "host", "title": "Host Species", "type": "categorical"},
     {"key": "host_genus", "title": "Host Genus", "type": "categorical"},
     {"key": "host_type", "title": "Host Type", "type": "categorical"}
@@ -52,9 +57,10 @@
     "geo_resolution": "country"
   },
   "metadata_columns": [
-    "accession",
+    "PPX_accession",
+    "INSDC_accession",
     "division",
-    "url"
+    "restrictedUntil"
   ],
   "extensions": {
     "nextclade": {
diff --git a/phylogenetic/defaults/all-lineages/include.txt b/phylogenetic/defaults/all-lineages/include.txt
index 1e92593..6471a7f 100644
--- a/phylogenetic/defaults/all-lineages/include.txt
+++ b/phylogenetic/defaults/all-lineages/include.txt
@@ -1,91 +1,91 @@
-AF260968 # Egypt 1951 all-lineages reference
-NC_001563 # Lineage 2 reference
-NC_009942 # Lineage 1 reference
-HM051416 # Isreal 1953
-GQ851607 # Nigeria 1965
-GQ851606 # Senegal 1979
-AF481864 # pre-NY
-MH166901 # NY99
-MH166903 # NY99
-MH166904 # NY99
-KX547395 # NY99
-KX547519 # NY99
-KX547602 # NY99
-HM488130 # NY99
-HM488132 # NY99
-HQ671707 # NY99
-AF202541 # NY99
-AF206518 # NY99
-HM488127 # NY99
-HM488126 # NY99
-KX547410 # WN02
-KJ501434 # WN02
-KX547456 # WN02
-KY216155 # WN02
-KX547460 # WN02
-MF175829 # WN02
-KX547482 # WN02
-MF175827 # WN02
-MF175839 # WN02
-KT020853 # WN02
-KX547548 # WN02
-MF175863 # WN02
-KX547286 # WN02
-MF175873 # WN02
-MF175865 # WN02
-MF175831 # WN02
-MF175858 # WN02
-KJ501117 # SW03
-KJ501120 # SW03
-MF175815 # SW03
-MG004533 # SW03
-KF704147 # SW03
-KF704153 # SW03
-KR348940 # SW03
-KR348937 # SW03
-KX547361 # SW03
-JX015523 # SW03
-KR348944 # SW03
-KJ501124 # SW03
-KX547552 # SW03
-KJ145829 # SW03
-KR348981 # SW03
-KJ501118 # SW03
-KR348938 # SW03
-KR348976 # SW03
-KJ501170 # SW03
-KR348993 # SW03
-JQ700438 # SW03
-KR348977 # SW03
-KR348942 # SW03
-KR348941 # SW03
-KJ501121 # SW03
-KJ501122 # SW03
-KX547375 # SW03
-KM012172 # SW03
-KC333375 # SW03
-KJ501222 # SW03
-MG004537 # SW03
-MF175866 # SW03
-MG004540 # SW03
-MW383507 # Lineage 2
-HM147822 # Lineage 2
-GQ903680 # Lineage 2
-DQ176636 # Lineage 2
-KU978767 # Lineage 2
-HM147823 # Lineage 2
-PP445212 # Lineage 3
-AY765264 # Lineage 3
-AY277251 # Lineage 4
-FJ159131 # Lineage 4
-FJ159129 # Lineage 4
-FJ159130 # Lineage 4
-KJ831223 # Lineage 4
-KU978770 # Lineage 5
-DQ256376 # Lineage 5
-JX041632 # Lineage 5
-GQ851604 # Lineage 5
-GQ851605 # Lineage 5
-KY703855 # Lineage 7
-OP846972 # Lineage 7
-KY703856 # Lineage 8
+PP_000HJBT # Egypt 1951 all-lineages reference
+PP_0003ASZ # Lineage 2 reference
+PP_0003ATX # Lineage 1 reference
+PP_0008AWF # Isreal 1953
+PP_000K976 # Nigeria 1965
+PP_000K968 # Senegal 1979
+PP_000HP18 # pre-NY
+PP_0002EDQ # NY99
+PP_0002EFL # NY99
+PP_0002EGJ # NY99
+PP_0001RJ4 # NY99
+PP_0001V6R # NY99
+PP_0001XMS # NY99
+PP_0008D7R # NY99
+PP_0008D9M # NY99
+PP_0008M3R # NY99
+PP_000HHM7 # NY99
+PP_000HHXM # NY99
+PP_0008D4X # NY99
+PP_0008D3Z # NY99
+PP_0001RZ8 # WN02
+PP_00012ZX # WN02
+PP_0001TBH # WN02
+PP_0001Z6M # WN02
+PP_0001TF9 # WN02
+PP_0002AES # WN02
+PP_0001U3Y # WN02
+PP_0002ACW # WN02
+PP_0002AQ5 # WN02
+PP_0001F1F # WN02
+PP_0001W10 # WN02
+PP_0002BER # WN02
+PP_0001NBN # WN02
+PP_0002BQ4 # WN02
+PP_0002BGM # WN02
+PP_0002AGN # WN02
+PP_0002B91 # WN02
+PP_0000T23 # SW03
+PP_0000T6V # SW03
+PP_0002A0L # SW03
+PP_0002DFM # SW03
+PP_0000Q26 # SW03
+PP_0000Q8U # SW03
+PP_0001C3E # SW03
+PP_0001BZN # SW03
+PP_0001QJ5 # SW03
+PP_0000FKE # SW03
+PP_0001C76 # SW03
+PP_0000TAM # SW03
+PP_0001W5S # SW03
+PP_0000RRS # SW03
+PP_0001DFP # SW03
+PP_0000T31 # SW03
+PP_0001C0L # SW03
+PP_0001DAZ # SW03
+PP_0000UWD # SW03
+PP_0001DVU # SW03
+PP_0000DT0 # SW03
+PP_0001DBX # SW03
+PP_0001C5A # SW03
+PP_0001C4C # SW03
+PP_0000T7T # SW03
+PP_0000T8R # SW03
+PP_0001QYB # SW03
+PP_00017WY # SW03
+PP_0000HXN # SW03
+PP_0000WPR # SW03
+PP_0002DLA # SW03
+PP_0002BHJ # SW03
+PP_0002DP4 # SW03
+PP_000370M # Lineage 2
+PP_0008CDE # Lineage 2
+PP_000K9BY # Lineage 2
+PP_000JB76 # Lineage 2
+PP_0001H9X # Lineage 2
+PP_0008CEC # Lineage 2
+PP_000RH4S # Lineage 3
+PP_000HY01 # Lineage 3
+PP_000HRWF # Lineage 4
+PP_000JWG3 # Lineage 4
+PP_000JWE7 # Lineage 4
+PP_000JWF5 # Lineage 4
+PP_00017EX # Lineage 4
+PP_0001HCR # Lineage 5
+PP_000JBA0 # Lineage 5
+PP_0000FR2 # Lineage 5
+PP_000K94C # Lineage 5
+PP_000K95A # Lineage 5
+PP_0001ZMQ # Lineage 7
+PP_0003L7U # Lineage 7
+PP_0001ZNN # Lineage 8
diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml
index b50e603..0d08cd4 100644
--- a/phylogenetic/defaults/config.yaml
+++ b/phylogenetic/defaults/config.yaml
@@ -56,7 +56,7 @@ build_params:
 
   lineage-1A:
     reference: "defaults/lineage-1A/reference.gb"
-    root: "KX394399"
+    root: "PP_0001JCQ"
 
     subsample:
       samples:
diff --git a/phylogenetic/defaults/lineage-1A/auspice_config.json b/phylogenetic/defaults/lineage-1A/auspice_config.json
index 945cdff..a0f3d12 100644
--- a/phylogenetic/defaults/lineage-1A/auspice_config.json
+++ b/phylogenetic/defaults/lineage-1A/auspice_config.json
@@ -1,6 +1,10 @@
 {
   "title": "Genomic epidemiology of West Nile Virus lineage 1A",
   "data_provenance": [
+    {
+      "name": "Pathoplexus",
+      "url": "https://pathoplexus.org"
+    },
     {
       "name": "GenBank",
       "url": "https://www.ncbi.nlm.nih.gov/genbank/"
@@ -16,6 +20,7 @@
     {"key": "lineage", "title": "Lineage", "type": "categorical"},
     {"key": "clade_membership", "title": "Clade", "type": "categorical"},
     {"key": "author", "title": "Authors", "type": "categorical"},
+    {"key": "dataUseTerms", "title": "Data use terms", "type": "categorical"},
     {"key": "host", "title": "Host Species", "type": "categorical"},
     {"key": "host_genus", "title": "Host Genus", "type": "categorical"},
     {"key": "host_type", "title": "Host Type", "type": "categorical"}
@@ -53,9 +58,10 @@
     "distance_measure": "div"
   },
   "metadata_columns": [
-    "accession",
+    "PPX_accession",
+    "INSDC_accession",
     "division",
-    "url"
+    "restrictedUntil"
   ],
   "extensions": {
     "nextclade": {
diff --git a/phylogenetic/defaults/lineage-1A/include.txt b/phylogenetic/defaults/lineage-1A/include.txt
index 2bc4e0b..f4634a9 100644
--- a/phylogenetic/defaults/lineage-1A/include.txt
+++ b/phylogenetic/defaults/lineage-1A/include.txt
@@ -1,66 +1,66 @@
-KX394399 # Lineage 1B outgroup
-NC_009942 # Lineage 1 reference
-AF481864 # pre-NY
-MH166901 # NY99
-MH166903 # NY99
-MH166904 # NY99
-KX547395 # NY99
-KX547519 # NY99
-KX547602 # NY99
-HM488130 # NY99
-HM488132 # NY99
-HQ671707 # NY99
-AF202541 # NY99
-AF206518 # NY99
-HM488127 # NY99
-HM488126 # NY99
-KX547410 # WN02
-KJ501434 # WN02
-KX547456 # WN02
-KY216155 # WN02
-KX547460 # WN02
-MF175829 # WN02
-KX547482 # WN02
-MF175827 # WN02
-MF175839 # WN02
-KT020853 # WN02
-KX547548 # WN02
-MF175863 # WN02
-KX547286 # WN02
-MF175873 # WN02
-MF175865 # WN02
-MF175831 # WN02
-MF175858 # WN02
-KJ501117 # SW03
-KJ501120 # SW03
-MF175815 # SW03
-MG004533 # SW03
-KF704147 # SW03
-KF704153 # SW03
-KR348940 # SW03
-KR348937 # SW03
-KX547361 # SW03
-JX015523 # SW03
-KR348944 # SW03
-KJ501124 # SW03
-KX547552 # SW03
-KJ145829 # SW03
-KR348981 # SW03
-KJ501118 # SW03
-KR348938 # SW03
-KR348976 # SW03
-KJ501170 # SW03
-KR348993 # SW03
-JQ700438 # SW03
-KR348977 # SW03
-KR348942 # SW03
-KR348941 # SW03
-KJ501121 # SW03
-KJ501122 # SW03
-KX547375 # SW03
-KM012172 # SW03
-KC333375 # SW03
-KJ501222 # SW03
-MG004537 # SW03
-MF175866 # SW03
-MG004540 # SW03
+PP_0001JCQ # Lineage 1B outgroup
+PP_0003ATX # Lineage 1 reference
+PP_000HP18 # pre-NY
+PP_0002EDQ # NY99
+PP_0002EFL # NY99
+PP_0002EGJ # NY99
+PP_0001RJ4 # NY99
+PP_0001V6R # NY99
+PP_0001XMS # NY99
+PP_0008D7R # NY99
+PP_0008D9M # NY99
+PP_0008M3R # NY99
+PP_000HHM7 # NY99
+PP_000HHXM # NY99
+PP_0008D4X # NY99
+PP_0008D3Z # NY99
+PP_0001RZ8 # WN02
+PP_00012ZX # WN02
+PP_0001TBH # WN02
+PP_0001Z6M # WN02
+PP_0001TF9 # WN02
+PP_0002AES # WN02
+PP_0001U3Y # WN02
+PP_0002ACW # WN02
+PP_0002AQ5 # WN02
+PP_0001F1F # WN02
+PP_0001W10 # WN02
+PP_0002BER # WN02
+PP_0001NBN # WN02
+PP_0002BQ4 # WN02
+PP_0002BGM # WN02
+PP_0002AGN # WN02
+PP_0002B91 # WN02
+PP_0000T23 # SW03
+PP_0000T6V # SW03
+PP_0002A0L # SW03
+PP_0002DFM # SW03
+PP_0000Q26 # SW03
+PP_0000Q8U # SW03
+PP_0001C3E # SW03
+PP_0001BZN # SW03
+PP_0001QJ5 # SW03
+PP_0000FKE # SW03
+PP_0001C76 # SW03
+PP_0000TAM # SW03
+PP_0001W5S # SW03
+PP_0000RRS # SW03
+PP_0001DFP # SW03
+PP_0000T31 # SW03
+PP_0001C0L # SW03
+PP_0001DAZ # SW03
+PP_0000UWD # SW03
+PP_0001DVU # SW03
+PP_0000DT0 # SW03
+PP_0001DBX # SW03
+PP_0001C5A # SW03
+PP_0001C4C # SW03
+PP_0000T7T # SW03
+PP_0000T8R # SW03
+PP_0001QYB # SW03
+PP_00017WY # SW03
+PP_0000HXN # SW03
+PP_0000WPR # SW03
+PP_0002DLA # SW03
+PP_0002BHJ # SW03
+PP_0002DP4 # SW03
diff --git a/phylogenetic/defaults/lineage-2/auspice_config.json b/phylogenetic/defaults/lineage-2/auspice_config.json
index 3453014..4d2e362 100644
--- a/phylogenetic/defaults/lineage-2/auspice_config.json
+++ b/phylogenetic/defaults/lineage-2/auspice_config.json
@@ -1,6 +1,10 @@
 {
   "title": "Genomic epidemiology of West Nile Virus lineage 2",
   "data_provenance": [
+    {
+      "name": "Pathoplexus",
+      "url": "https://pathoplexus.org"
+    },
     {
       "name": "GenBank",
       "url": "https://www.ncbi.nlm.nih.gov/genbank/"
@@ -16,6 +20,7 @@
     {"key": "lineage", "title": "Lineage", "type": "categorical"},
     {"key": "clade_membership", "title": "Clade", "type": "categorical"},
     {"key": "author", "title": "Authors", "type": "categorical"},
+    {"key": "dataUseTerms", "title": "Data use terms", "type": "categorical"},
     {"key": "host", "title": "Host Species", "type": "categorical"},
     {"key": "host_genus", "title": "Host Genus", "type": "categorical"},
     {"key": "host_type", "title": "Host Type", "type": "categorical"}
@@ -53,9 +58,10 @@
     "distance_measure": "div"
   },
   "metadata_columns": [
-    "accession",
+    "PPX_accession",
+    "INSDC_accession",
     "division",
-    "url"
+    "restrictedUntil"
   ],
   "extensions": {
     "nextclade": {
diff --git a/phylogenetic/defaults/lineage-2/include.txt b/phylogenetic/defaults/lineage-2/include.txt
index e33db5b..23924ce 100644
--- a/phylogenetic/defaults/lineage-2/include.txt
+++ b/phylogenetic/defaults/lineage-2/include.txt
@@ -1,7 +1,7 @@
-NC_001563 # Lineage 2 reference
-MW383507 # Lineage 2
-HM147822 # Lineage 2
-GQ903680 # Lineage 2
-DQ176636 # Lineage 2
-KU978767 # Lineage 2
-HM147823 # Lineage 2
+PP_0003ASZ # Lineage 2 reference
+PP_000370M # Lineage 2
+PP_0008CDE # Lineage 2
+PP_000K9BY # Lineage 2
+PP_000JB76 # Lineage 2
+PP_0001H9X # Lineage 2
+PP_0008CEC # Lineage 2