diff --git a/DESCRIPTION b/DESCRIPTION
index 1af74463..9cefb6b5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,7 +20,7 @@ License: Apache License (>= 2)
 Encoding: UTF-8
 Remotes:
     github::epiforecasts/EpiNow2@9b8cd4fcceca41ac34545a38989f6f295ddeeaf7,
-    github::stan-dev/cmdstanr
+    github::stan-dev/cmdstanr@da99e2ba954658bdad63bffb738c4444c33a4e0e
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
 Suggests:
@@ -51,7 +51,9 @@ Imports:
     tidyr,
     tidybayes,
     optparse,
-    Microsoft365R
+    Microsoft365R,
+    stringr,
+    glue
 Additional_repositories:
 	https://stan-dev.r-universe.dev
 URL: https://cdcgov.github.io/cfa-epinow2-pipeline/
diff --git a/Makefile b/Makefile
index 7252cd40..9b183768 100644
--- a/Makefile
+++ b/Makefile
@@ -3,12 +3,21 @@ IMAGE_NAME=cfa-epinow2-pipeline
 BRANCH=$(shell git branch --show-current)
 CONFIG_CONTAINER=rt-epinow2-config
 CNTR_MGR=docker
+DATA_API=v1
 ifeq ($(BRANCH), main)
 TAG=latest
 else
 TAG=$(BRANCH)
 endif
 
+ifeq ($(DATA_API),v1)
+API_CONTAINER := nssp-etl
+else ifeq ($(DATA_API),v2)
+API_CONTAINER := nssp-etl-api-v2
+else
+$(error Unknown DATA_API '$(DATA_API)'. Expected v1 or v2)
+endif
+
 CONFIG=test.json
 POOL="cfa-epinow2-$(TAG)"
 TIMESTAMP:=$(shell  date -u +"%Y%m%d_%H%M%S")
@@ -38,12 +47,14 @@ config: ## Generates a configuration file for running the model
 		--disease="COVID-19,Influenza,RSV" \
 		--state=all \
 		--output-container=nssp-rt-v2 \
+		--input-container=$(API_CONTAINER) \
 		--job-id=$(JOB) \
 		--report-date-str=$(REPORT_DATE)
 
 rerun-config: ## Generate a configuration file to rerun a previous model
 	uv run azure/generate_rerun_configs.py \
 		--output-container=nssp-rt-v2 \
+		--input-container=$(API_CONTAINER) \
 		--job-id=$(JOB) \
 		--report-date-str=$(REPORT_DATE)
 
@@ -84,6 +95,7 @@ test-batch: ## Run GitHub Actions workflow and then job.py for testing on Azure
 		--disease="COVID-19,Influenza,RSV" \
 		--state=NY \
 		--output-container=nssp-rt-testing \
+		--input-container=$(API_CONTAINER) \
 		--job-id=$(JOB) \
 		--report-date-str=$(REPORT_DATE)
 	uv run --env-file .env \
diff --git a/NEWS.md b/NEWS.md
index 87e3c43d..beef39a1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,8 @@
 # CFAEpiNow2Pipeline v0.2.0
 
 ## Features
+
+* Add the ability to read data from API v2 as well as v1
 * dependabot updating package docker/login-action from version 3 to version 4 new
 * Adding image tag validation when dependabot PRs are opened and automatic update to NEWs md
 * Add a utility script for reading, preparing, and uploading Rt review decisions
diff --git a/R/config.R b/R/config.R
index ac55276e..07ee4bdb 100644
--- a/R/config.R
+++ b/R/config.R
@@ -158,6 +158,12 @@ Data <- S7::new_class(
 #' "YYYY-MM-DD".
 #' @param output_container An optional string specifying the output blob storage
 #' container.
+#' @param facility_active_proportion A numeric value between 0 and 1 specifying
+#' the proportion of days during the modeling period that facilities must have
+#' reported at least one informative discharge diagnosis (DDI) to be included in
+#' the analysis. Default is 0.94 (require
+#' active reporting for >=53 of 56 days in the training period).
+#' Lower values allow inclusion of facilities with fewer active days.
 #' @family config
 #' @export
 Config <- S7::new_class(
@@ -190,7 +196,26 @@ Config <- S7::new_class(
     # Would add default values, but Roxygen isn't happy about them yet.
     sampler_opts = S7::class_list,
     exclusions = S7::S7_class(Exclusions()),
-    output_container = character_or_null
+    output_container = character_or_null,
+    facility_active_proportion = S7::new_property(
+      S7::class_double,
+      default = quote(0.94),
+      validator = \(value) {
+        if (
+          rlang::is_bare_numeric(value) &&
+            length(value) == 1 &&
+            value >= 0 &&
+            value <= 1
+        ) {
+          NULL
+        } else {
+          paste0(
+            "Invalid value for facility_active_proportion. ",
+            "It must be a single numeric value between 0 and 1."
+          )
+        }
+      }
+    )
   )
 )
 
diff --git a/R/pipeline.R b/R/pipeline.R
index 34d649f9..053e67ec 100644
--- a/R/pipeline.R
+++ b/R/pipeline.R
@@ -85,7 +85,7 @@ orchestrate_pipeline <- function(
       )
       read_json_into_config(
         config_path,
-        c("exclusions", "output_container")
+        c("exclusions", "output_container", "facility_active_proportion")
       )
     },
     error = function(con) {
@@ -94,6 +94,7 @@ orchestrate_pipeline <- function(
     }
   )
   if (typeof(config) == "logical") {
+    cli::cli_warn("Failed to read config", class = "Bad_config")
     return(invisible(FALSE))
   }
 
@@ -113,6 +114,7 @@ orchestrate_pipeline <- function(
   logfile_connection <- file(file.path(logfile_path, "logs.txt"), open = "wt")
   sink(
     logfile_connection,
+    # "output" means general (non-error) output
     type = "output",
     append = TRUE,
     # Send output to logs and to console
@@ -120,8 +122,11 @@ orchestrate_pipeline <- function(
   )
   sink(
     logfile_connection,
+    # "message" means error messages
     type = "message",
     append = TRUE
+    # Unfortunately, we can't split messages to both console and log file.
+    # It will error out if we try
   )
   on.exit(sink(file = NULL))
   cli::cli_alert_info("Starting run at {Sys.time()}")
@@ -189,7 +194,8 @@ execute_model_logic <- function(config, input_dir, output_dir) {
     geo_value = config@geo_value,
     report_date = config@report_date,
     max_reference_date = config@max_reference_date,
-    min_reference_date = config@min_reference_date
+    min_reference_date = config@min_reference_date,
+    facility_active_proportion = config@facility_active_proportion
   )
 
   # rlang::is_empty() checks for empty and NULL values
@@ -289,7 +295,8 @@ execute_model_logic <- function(config, input_dir, output_dir) {
       config@exclusions@blob_storage_container
     ),
     # Add the config container here when refactoring out to outer func
-    run_at = format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z")
+    run_at = format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z"),
+    facility_active_proportion = config@facility_active_proportion
   )
 
   write_model_outputs(
diff --git a/R/read_data.R b/R/read_data.R
index 0c18d28d..28ba55d2 100644
--- a/R/read_data.R
+++ b/R/read_data.R
@@ -1,8 +1,13 @@
 #' Read in the dataset of incident case counts
 #'
-#' Each row of the table corresponds to a single facilities' cases for a
-#' reference-date/report-date/disease tuple. We want to aggregate these counts
-#' to the level of geographic aggregate/report-date/reference-date/disease.
+#' Reads in data from either data API v1 or v2. Data API version is
+#' intuited by read_data by the presence of the `any_visits_this_day` column
+#' in the underlying data. Each row of the table corresponds to a single
+#' facilities' cases for a reference-date/report-date/disease tuple.
+#' We want to aggregate these counts to the level of geographic
+#' aggregate/report-date/reference-date/disease. The
+#' _facility_active_proportion_ field is used to filter facilities with data
+#' outages from the data API v2 (this field is not used for data API v1).
 #'
 #' We handle two distinct cases for geographic aggregates:
 #'
@@ -29,102 +34,171 @@ read_data <- function(
   geo_value,
   report_date,
   max_reference_date,
-  min_reference_date
+  min_reference_date,
+  facility_active_proportion = 0.94
 ) {
   rlang::arg_match(disease)
-  # NOTE: this is temporary workaround until we switch to the new API. I'm not
-  # sure if there's a better way to do this without a whole bunch of special
-  # casing -- which is its own code smell. I think this should really be handled
-  # upstream in the ETL job and standardize on "COVID-19", but that's beyond
-  # scope here and we need to do _something_ in the meantime so this runs.
-  disease_map <- c(
-    "COVID-19" = "COVID-19/Omicron",
-    "Influenza" = "Influenza",
-    "RSV" = "RSV",
-    "test" = "test"
+  check_file_exists(data_path)
+
+  con <- DBI::dbConnect(duckdb::duckdb())
+  on.exit(DBI::dbDisconnect(con), add = TRUE)
+
+  # any_visits_this_day is calculated for API v2
+  # True if a given facility, on a given reference_date had a DDI count > 0
+  # Same across all diseases and metrics for (facility, reference_date)
+  is_api_v2 <- rlang::try_fetch(
+    {
+      cols <- DBI::dbGetQuery(
+        con,
+        "SELECT * FROM read_parquet(?) LIMIT 0;",
+        params = list(data_path)
+      ) |>
+        names()
+
+      "any_visits_this_day" %in% cols
+    },
+    error = function(con) {
+      cli::cli_abort(
+        c(
+          "Error reading schema from {.path {data_path}}",
+          "Original error: {con}"
+        ),
+        class = "wrapped_schema_read_error"
+      )
+    }
   )
-  mapped_disease <- disease_map[[disease]]
 
-  check_file_exists(data_path)
+  is_us <- identical(geo_value, "US")
+
+  cli::cli_inform(
+    c(
+      "Using {if (is_api_v2) 'API v2 (facility filtered)' else 'API v1'}",
+      "query for {.val {geo_value}}"
+    )
+  )
+
+  disease_param <- if (disease == "COVID-19") paste0(disease, "%") else disease
 
-  parameters <- list(
+  base_params <- list(
     data_path = data_path,
-    disease = mapped_disease,
+    disease = disease_param,
     min_ref_date = stringify_date(min_reference_date),
     max_ref_date = stringify_date(max_reference_date),
     report_date = stringify_date(report_date)
   )
 
-  # We need different queries for the states and the US overall. For US overall
-  # we need to aggregate over all the facilities in all the states. For the
-  # states, we need to aggregate over all the facilities in that one state
-  if (geo_value == "US") {
-    query <- "
-   SELECT
-     report_date,
-     reference_date,
-     CASE
-       WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
-       ELSE disease
-     END AS disease,
-     -- We want to inject the 'US' as our abbrevation here bc data is not agg'd
-     'US' AS geo_value,
-      sum(value) AS confirm
-    FROM read_parquet(?)
-    WHERE 1=1
-      AND disease = ?
-      AND metric = 'count_ed_visits'
-      AND reference_date >= ? :: DATE
-      AND reference_date <= ? :: DATE
-      AND report_date = ? :: DATE
-    GROUP BY reference_date, report_date, disease
-    ORDER BY reference_date
-   "
+  geo_select <- if (is_us) {
+    "'US' AS geo_value"
   } else {
-    # We want just one state so aggregate over facilites in that one state only
-    query <- "
-  SELECT
-    report_date,
-    reference_date,
-    CASE
-     WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
-     ELSE disease
-    END AS disease,
-    geo_value AS geo_value,
-    sum(value) AS confirm,
-  FROM read_parquet(?)
-  WHERE 1=1
-    AND disease = ?
-    AND metric = 'count_ed_visits'
-    AND reference_date >= ? :: DATE
-    AND reference_date <= ? :: DATE
-    AND report_date = ? :: DATE
-    AND geo_value = ?
-  GROUP BY geo_value, reference_date, report_date, disease
-  ORDER BY reference_date
-  "
-    # Append `geo_value` to the query
-    parameters <- c(parameters, list(geo_value = geo_value))
+    "geo_value"
+  }
+
+  geo_filter <- if (is_us) {
+    ""
+  } else {
+    "AND geo_value = ?"
+  }
+
+  group_by <- if (is_us) {
+    "GROUP BY reference_date, report_date, disease"
+  } else {
+    "GROUP BY geo_value, reference_date, report_date, disease"
+  }
+
+  if (!is_api_v2) {
+    query <- glue::glue(
+      "
+      SELECT
+        report_date,
+        reference_date,
+        CASE
+          WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
+          ELSE disease
+        END AS disease,
+        {geo_select},
+        SUM(value) AS confirm
+      FROM read_parquet(?)
+      WHERE disease LIKE ?
+        AND metric = 'count_ed_visits'
+        AND reference_date >= ?::DATE
+        AND reference_date <= ?::DATE
+        AND report_date = ?::DATE
+        {geo_filter}
+      {group_by}
+      ORDER BY reference_date
+    "
+    )
+
+    params <- base_params
+    if (!is_us) {
+      params <- c(params, list(geo_value = geo_value))
+    }
+  } else {
+    query <- glue::glue(
+      "
+      WITH facility_checks AS (
+        SELECT
+          *,
+          AVG(IF(any_visits_this_day, 1, 0)) OVER (
+            PARTITION BY facility
+          ) AS proportion_true
+        FROM read_parquet(?)
+        WHERE disease LIKE ?
+          AND metric = 'count_ed_visits'
+          AND reference_date >= ?::DATE
+          AND reference_date <= ?::DATE
+          AND report_date = ?::DATE
+          {geo_filter}
+      )
+      SELECT
+        report_date,
+        reference_date,
+        CASE
+          WHEN disease = 'COVID-19/Omicron' THEN 'COVID-19'
+          ELSE disease
+        END AS disease,
+        {geo_select},
+        SUM(value) AS confirm
+      FROM facility_checks
+      WHERE proportion_true >= ?
+      -- `WHERE` filters before the GROUP BY, so this filter excludes
+      -- from the agg all facilities with insufficient reporting
+      {group_by}
+      ORDER BY reference_date
+    "
+    )
+
+    params <- base_params
+    if (!is_us) {
+      params <- c(params, list(geo_value = geo_value))
+    }
+    params <- c(
+      params,
+      list(facility_active_proportion = facility_active_proportion)
+    )
   }
 
-  con <- DBI::dbConnect(duckdb::duckdb())
-  on.exit(expr = DBI::dbDisconnect(con))
   df <- rlang::try_fetch(
     DBI::dbGetQuery(
       con,
       statement = query,
-      params = unname(parameters)
+      params = unname(params)
     ),
     error = function(con) {
       cli::cli_abort(
         c(
           "Error fetching data from {.path {data_path}}",
           "Using parameters:",
-          "*" = "data_path: {.path {parameters[['data_path']]}}",
-          "*" = "mapped_disease: {.val {parameters[['disease']]}}",
-          "*" = "min_reference_date: {.val {parameters[['min_ref_date']]}}",
-          "*" = "max_reference_date: {.val {parameters[['max_ref_date']]}}",
-          "*" = "report_date: {.val {parameters[['report_date']]}}",
+          "*" = "data_path: {.path {base_params[['data_path']]}}",
+          "*" = "disease: {.val {base_params[['disease']]}}",
+          "*" = "min_reference_date: {.val {base_params[['min_ref_date']]}}",
+          "*" = "max_reference_date: {.val {base_params[['max_ref_date']]}}",
+          "*" = "report_date: {.val {base_params[['report_date']]}}",
+          "*" = "geo_value: {.val {geo_value}}",
+          "*" = paste0(
+            "facility_active_proportion: ",
+            "{.val {facility_active_proportion}}"
+          ),
           "Original error: {con}"
         ),
         class = "wrapped_invalid_query"
@@ -132,20 +206,20 @@ read_data <- function(
     }
   )
 
-  # Guard against empty return
   if (nrow(df) == 0) {
     cli::cli_abort(
       c(
         "No data matching returned from {.path {data_path}}",
-        "Using parameters {parameters}"
+        "Using parameters {base_params}"
       ),
       class = "empty_return"
     )
   }
-  # Warn for incomplete return
+
   n_rows_expected <- as.Date(max_reference_date) -
     as.Date(min_reference_date) +
     1
+
   if (nrow(df) != n_rows_expected) {
     expected_dates <- seq.Date(
       from = as.Date(min_reference_date),
@@ -153,12 +227,9 @@ read_data <- function(
       by = "day"
     )
     missing_dates <- stringify_date(
-      # Setdiff strips the date attribute from the objects; re-add it so that we
-      # can pretty-format the date for printing
-      as.Date(
-        setdiff(expected_dates, df[["reference_date"]])
-      )
+      as.Date(setdiff(expected_dates, df[["reference_date"]]))
     )
+
     cli::cli_warn(
       c(
         "Incomplete number of rows returned",
@@ -171,5 +242,5 @@ read_data <- function(
   }
 
   cli::cli_alert_success("Read {nrow(df)} rows from {.path {data_path}}")
-  return(df)
+  df
 }
diff --git a/azure/run_container_app_job.py b/azure/run_container_app_job.py
index 25ec2d5e..92118926 100644
--- a/azure/run_container_app_job.py
+++ b/azure/run_container_app_job.py
@@ -98,7 +98,7 @@ def main(image_name: str, config_container: str, job_id: str):
         state_config = config_path.split("/").pop()
         job_execution_id = job_execution.id.split("/").pop()
         print(
-            f"Started Container App Job #{i+1}/{len(task_configs)} for {state_config} with execution ID: {job_execution_id}"
+            f"Started Container App Job #{i + 1}/{len(task_configs)} for {state_config} with execution ID: {job_execution_id}"
         )
 
 
@@ -118,7 +118,7 @@ def main(image_name: str, config_container: str, job_id: str):
         "--config_container",
         type=str,
         help="The name of the storage container where config files are located",
-        default="rt-epinow2-config"
+        default="rt-epinow2-config",
     )
     parser.add_argument(
         "--job_id",
diff --git a/man/Config.Rd b/man/Config.Rd
index 4c451e2a..b53b18c0 100644
--- a/man/Config.Rd
+++ b/man/Config.Rd
@@ -25,7 +25,8 @@ Config(
   parameters = class_missing,
   sampler_opts = class_missing,
   exclusions = class_missing,
-  output_container = class_missing
+  output_container = class_missing,
+  facility_active_proportion = class_missing
 )
 }
 \arguments{
@@ -85,6 +86,13 @@ criteria.}
 
 \item{output_container}{An optional string specifying the output blob storage
 container.}
+
+\item{facility_active_proportion}{A numeric value between 0 and 1 specifying
+the proportion of days during the modeling period that facilities must have
+reported at least one informative discharge diagnosis (DDI) to be included in
+the analysis. Default is 0.94 (require
+active reporting for >=53 of 56 days in the training period).
+Lower values allow inclusion of facilities with fewer active days.}
 }
 \description{
 Represents the complete configuration for the pipeline.
diff --git a/man/read_data.Rd b/man/read_data.Rd
index 8e9875a2..15fa0bd0 100644
--- a/man/read_data.Rd
+++ b/man/read_data.Rd
@@ -10,7 +10,8 @@ read_data(
   geo_value,
   report_date,
   max_reference_date,
-  min_reference_date
+  min_reference_date,
+  facility_active_proportion = 0.94
 )
 }
 \arguments{
@@ -31,15 +32,27 @@ date. Formatted as "YYYY-MM-DD".}
 
 \item{min_reference_date}{A string representing the minimum reference
 date. Formatted as "YYYY-MM-DD".}
+
+\item{facility_active_proportion}{A numeric value between 0 and 1 specifying
+the proportion of days during the modeling period that facilities must have
+reported at least one informative discharge diagnosis (DDI) to be included in
+the analysis. Default is 0.94 (require
+active reporting for >=53 of 56 days in the training period).
+Lower values allow inclusion of facilities with fewer active days.}
 }
 \value{
 A dataframe with one or more rows and columns \code{report_date},
 \code{reference_date}, \code{geo_value}, \code{confirm}
 }
 \description{
-Each row of the table corresponds to a single facilities' cases for a
-reference-date/report-date/disease tuple. We want to aggregate these counts
-to the level of geographic aggregate/report-date/reference-date/disease.
+Reads in data from either data API v1 or v2. Data API version is
+intuited by read_data by the presence of the \code{any_visits_this_day} column
+in the underlying data. Each row of the table corresponds to a single
+facilities' cases for a reference-date/report-date/disease tuple.
+We want to aggregate these counts to the level of geographic
+aggregate/report-date/reference-date/disease. The
+\emph{facility_active_proportion} field is used to filter facilities with data
+outages from the data API v2 (this field is not used for data API v1).
 }
 \details{
 We handle two distinct cases for geographic aggregates:
diff --git a/tests/testthat/data/CA_COVID-19.json b/tests/testthat/data/CA_COVID-19.json
index ae4c2bc2..7c3c2122 100644
--- a/tests/testthat/data/CA_COVID-19.json
+++ b/tests/testthat/data/CA_COVID-19.json
@@ -13,6 +13,7 @@
   "geo_type": "state",
   "report_date": "2024-11-26",
   "production_date": "2024-11-26",
+  "output_container": "testing",
   "parameters": {
     "as_of_date": "2024-11-26",
     "generation_interval": {
@@ -59,5 +60,6 @@
     0.5,
     0.95
   ],
-  "model": "EpiNow2"
+  "model": "EpiNow2",
+  "facility_active_proportion": 0.94
 }
diff --git a/tests/testthat/data/CA_apiv2_test.parquet b/tests/testthat/data/CA_apiv2_test.parquet
new file mode 100644
index 00000000..f38e712d
Binary files /dev/null and b/tests/testthat/data/CA_apiv2_test.parquet differ
diff --git a/tests/testthat/data/README.md b/tests/testthat/data/README.md
new file mode 100644
index 00000000..9d8905ca
--- /dev/null
+++ b/tests/testthat/data/README.md
@@ -0,0 +1,17 @@
+
+- **Parquet test fixtures**
+  - `CA_test.parquet`: Synthetic test data containing metric 'count_ed_visits' and 'COVID-19/Omicron' as disease (data in format of API v1). `read_data` converts 'COVID-19/Omicron' disease to 'COVID-19'. Does _not_ contain column `any_visits_this_day`. This data was created in Fall 2024 by taking the format of a production DATA API v1 .parquet file and adding synthetic data to it.
+  - `CA_apiv2_test.parquet`: Synthetic test data containing metric 'count_ed_visits' and 'COVID-19' as disease (data in format of API v2). Contains column `any_visits_this_day`. This data was created in Fall 2024 by taking the format of a production DATA API v2 .parquet file and adding synthetic data to it.
+  - `test_data.parquet`/`us_overall_test_data.parquet`: Package data. See `?gostic_toy_rt` and `data-raw/convert_gostic_toy_rt_to_test_dataset.R`
+  - `test_parameters.parquet`: Package data. See `?sir_gt_pmf` and `data-raw/sir_gt_pmf.R`
+
+- **JSON test configs**
+  - `CA_COVID-19.json`: EpiNow2 task config for a CA/COVID-19 run (dates, model/sampler settings) pointing to `CA_test.parquet`, with no exclusions (`exclusions.path: null`).
+  - `bad_config.json`: Intentionally invalid config value to test validation/error handling
+  - `sample_config_no_exclusion.json`: Example valid config for no exclusions.
+  - `sample_config_with_exclusion.json`: Example config that includes exclusions
+  - `v_bad_config.json`: Intentionally incomplete/invalid config (only `job_id` and `task_id`) to test required fields are enforced.
+
+- **CSV exclusions fixtures**
+  - `test_exclusions.csv`: Minimal exclusions fixture (columns `reference_date, report_date, state, disease`) with one row for `state=test, disease=test`.
+  - `test_big_exclusions.csv`: Larger exclusions fixture (same columns) to test impact of many exclusions
diff --git a/tests/testthat/test-pipeline.R b/tests/testthat/test-pipeline.R
index d5c8146b..b3f63a39 100644
--- a/tests/testthat/test-pipeline.R
+++ b/tests/testthat/test-pipeline.R
@@ -77,7 +77,7 @@ test_that("Process pipeline produces expected outputs and returns success", {
   config_path <- file.path(input_dir, "sample_config_with_exclusion.json")
   config <- read_json_into_config(
     config_path,
-    c("exclusions", "output_container")
+    c("exclusions", "output_container", "facility_active_proportion")
   )
   # Read from locally
   output_dir <- "pipeline_test"
@@ -110,7 +110,7 @@ test_that("Runs on config from generator as of 2024-11-26", {
   input_dir <- test_path("data")
   config <- read_json_into_config(
     file.path(input_dir, config_path),
-    c("exclusions", "output_container")
+    c("exclusions", "output_container", "facility_active_proportion")
   )
   # Read from locally
   output_dir <- test_path("pipeline_test")
diff --git a/tests/testthat/test-read_data.R b/tests/testthat/test-read_data.R
index b54e4b04..c5d9b108 100644
--- a/tests/testthat/test-read_data.R
+++ b/tests/testthat/test-read_data.R
@@ -100,7 +100,7 @@ test_that("An invalid query throws a wrapped error", {
       min_reference_date = "2023-01-02",
       max_reference_date = "2023-01-22"
     ),
-    class = "wrapped_invalid_query"
+    class = "wrapped_schema_read_error"
   )
 })
 
@@ -121,7 +121,7 @@ test_that("Incomplete return throws warning", {
   )
 })
 
-test_that("Replace COVID-19/Omicron with COVID-19, one state", {
+test_that("Replace COVID-19/Omicron with COVID-19, one state (API v1)", {
   data_path <- test_path("data/CA_test.parquet")
 
   actual <- read_data(
@@ -140,7 +140,7 @@ test_that("Replace COVID-19/Omicron with COVID-19, one state", {
 })
 
 
-test_that("Replace COVID-19/Omicron with COVID-19, US", {
+test_that("Replace COVID-19/Omicron with COVID-19, US (API v1)", {
   data_path <- test_path("data/CA_test.parquet")
 
   actual <- read_data(
@@ -157,3 +157,95 @@ test_that("Replace COVID-19/Omicron with COVID-19, US", {
   expect_false("COVID-19/Omicron" %in% actual$disease)
   expect_true(all(actual$disease == "COVID-19"))
 })
+
+test_that("API v2 with COVID-19, one state", {
+  data_path <- test_path("data/CA_apiv2_test.parquet")
+
+  actual <- read_data(
+    data_path,
+    disease = "COVID-19",
+    geo_value = "CA",
+    report_date = "2024-11-26",
+    min_reference_date = as.Date("2024-06-01"),
+    max_reference_date = "2024-11-25",
+    facility_active_proportion = 1.0
+  )
+
+  # Expect that there should be no "COVID-19/Omicron" in the data,
+  # only "COVID-19"
+  expect_false("COVID-19/Omicron" %in% actual$disease)
+  expect_true(all(actual$disease == "COVID-19"))
+})
+
+test_that("API v2 with COVID-19, US (default facility_active_proportion)", {
+  data_path <- test_path("data/CA_apiv2_test.parquet")
+
+  actual <- read_data(
+    data_path,
+    disease = "COVID-19",
+    geo_value = "US",
+    report_date = "2024-11-26",
+    min_reference_date = as.Date("2024-06-01"),
+    max_reference_date = "2024-11-25"
+  )
+
+  # Expect that there should be no "COVID-19/Omicron" in the data,
+  # only "COVID-19"
+  expect_false("COVID-19/Omicron" %in% actual$disease)
+  expect_true(all(actual$disease == "COVID-19"))
+})
+
+test_that("facility_active_proportion affects counts (API v2)", {
+  data_path <- test_path("data/CA_apiv2_test.parquet")
+
+  # Read data with facility_active_proportion = 1.0
+  # (stricter - only facilities active all days)
+  data_strict <- read_data(
+    data_path,
+    disease = "COVID-19",
+    geo_value = "CA",
+    report_date = "2024-11-26",
+    min_reference_date = as.Date("2024-06-01"),
+    max_reference_date = "2024-11-25",
+    facility_active_proportion = 1.0
+  )
+
+  # Read data with facility_active_proportion = 0.5
+  # (less strict - facilities active >=50% of days)
+  data_lenient <- read_data(
+    data_path,
+    disease = "COVID-19",
+    geo_value = "CA",
+    report_date = "2024-11-26",
+    min_reference_date = as.Date("2024-06-01"),
+    max_reference_date = "2024-11-25",
+    facility_active_proportion = 0.5
+  )
+
+  # Both should have the same structure
+  expect_equal(names(data_strict), names(data_lenient))
+  expect_equal(nrow(data_strict), nrow(data_lenient))
+
+  expect_true(
+    all(data_lenient$confirm >= data_strict$confirm),
+    info = "Lenient data should have equal or more counts than strict data"
+  )
+})
+
+test_that("facility_active_proportion doesn't affects counts (API v1)", {
+  # facility_active_proportion is ignored for API v1
+  data_path <- test_path("data/CA_test.parquet")
+
+  actual <- read_data(
+    data_path,
+    disease = "COVID-19",
+    geo_value = "US",
+    report_date = "2024-11-26",
+    min_reference_date = as.Date("2024-06-01"),
+    max_reference_date = "2024-11-25"
+  )
+
+  expected_rows <- 178
+  actual_rows <- actual |> nrow()
+  expect_equal(expected_rows, actual_rows)
+})