From 3039d03afe7497f861b87e5aedc5c0bd6e5eb6fc Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 00:55:49 -0500
Subject: [PATCH 01/16] Create col_anomaly_check.R

---
 R/col_anomaly_check.R | 94 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 R/col_anomaly_check.R

diff --git a/R/col_anomaly_check.R b/R/col_anomaly_check.R
new file mode 100644
index 000000000..27f476375
--- /dev/null
+++ b/R/col_anomaly_check.R
@@ -0,0 +1,94 @@
+#
+#                _         _    _      _                _    
+#               (_)       | |  | |    | |              | |   
+#  _ __    ___   _  _ __  | |_ | |__  | |  __ _  _ __  | | __
+# | '_ \  / _ \ | || '_ \ | __|| '_ \ | | / _` || '_ \ | |/ /
+# | |_) || (_) || || | | || |_ | |_) || || (_| || | | ||   < 
+# | .__/  \___/ |_||_| |_| \__||_.__/ |_| \__,_||_| |_||_|\_\
+# | |                                                        
+# |_|                                                        
+# 
+# This file is part of the 'rich-iannone/pointblank' package.
+# 
+# (c) Richard Iannone <riannone@me.com>
+# 
+# For full copyright and license information, please look at
+# https://rich-iannone.github.io/pointblank/LICENSE.html
+#
+
+#' Do columns in the table contain anomalous values?
+#'
+#' @name col_anomaly_check
+NULL
+
+#' @rdname col_anomaly_check
+#' @import rlang
+#' @export
+col_anomaly_check <- function(x,
+                              x_column,
+                              y_column,
+                              preconditions = NULL,
+                              actions = NULL,
+                              step_id = NULL,
+                              label = NULL,
+                              brief = NULL,
+                              active = TRUE) {
+  
+  x_column <- rlang::enquo(x_column)
+  y_column <- rlang::enquo(y_column)
+  
+  # Resolve the columns based on the expression
+  x_column <- resolve_columns(x = x, var_expr = x_column, preconditions)
+  y_column <- resolve_columns(x = x, var_expr = y_column, preconditions)
+  
+  if (is_a_table_object(x)) {
+    
+    secret_agent <-
+      create_agent(x, label = "::QUIET::") %>%
+      col_anomaly_check(
+        x_column = x_column,
+        y_column = y_column,
+        preconditions = preconditions,
+        label = label,
+        brief = brief,
+        actions = prime_actions(actions),
+        active = active
+      ) %>%
+      interrogate()
+    
+    return(x)
+  }
+  
+  agent <- x
+  
+  if (is.null(brief)) {
+    
+    brief <-
+      create_autobrief(
+        agent = agent,
+        assertion_type = "col_anomaly_check"
+      )
+  }
+  
+  # Normalize any provided `step_id` value(s)
+  step_id <- normalize_step_id(step_id, columns, agent)
+  
+  # Check `step_id` value(s) against all other `step_id`
+  # values in earlier validation steps
+  check_step_id_duplicates(step_id, agent)
+  
+  # Add a validation step
+  agent <-
+    create_validation_step(
+      agent = agent,
+      assertion_type = "col_anomaly_check",
+      column = x_column,
+      values = y_column,
+      preconditions = preconditions,
+      actions = covert_actions(actions, agent),
+      step_id = step_id,
+      label = label,
+      brief = brief,
+      active = active
+    )
+}

From e28385ec21b031781bf7515a5af2e7dc9e612c6c Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 00:55:54 -0500
Subject: [PATCH 02/16] Create col_anomaly_check.Rd

---
 man/col_anomaly_check.Rd | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 man/col_anomaly_check.Rd

diff --git a/man/col_anomaly_check.Rd b/man/col_anomaly_check.Rd
new file mode 100644
index 000000000..b1f8a78da
--- /dev/null
+++ b/man/col_anomaly_check.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/col_anomaly_check.R
+\name{col_anomaly_check}
+\alias{col_anomaly_check}
+\title{Do columns in the table contain anomalous values?}
+\usage{
+col_anomaly_check(
+  x,
+  x_column,
+  y_column,
+  preconditions = NULL,
+  actions = NULL,
+  step_id = NULL,
+  label = NULL,
+  brief = NULL,
+  active = TRUE
+)
+}
+\description{
+Do columns in the table contain anomalous values?
+}

From 96f0f8b35d0342c0a93797c6bb6ce675bdfa5516 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 00:55:57 -0500
Subject: [PATCH 03/16] Update NAMESPACE

---
 NAMESPACE | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NAMESPACE b/NAMESPACE
index 7ba1ab726..e439551dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -15,6 +15,7 @@ export(affix_datetime)
 export(all_passed)
 export(between)
 export(case_when)
+export(col_anomaly_check)
 export(col_exists)
 export(col_is_character)
 export(col_is_date)

From c5977f05f96a96cb9b12ce77746c59c63fa1e22a Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 00:57:45 -0500
Subject: [PATCH 04/16] Update DESCRIPTION

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index dbf41f1d9..b03d4ace1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,8 +41,9 @@ Imports:
     htmltools (>= 0.4.0),
     log4r (>= 0.3.2),
     knitr (>= 1.30),
-    rlang (>= 0.4.9),
     magrittr,
+    mgcv (>= 1.8-33),
+    rlang (>= 0.4.9),
     scales (>= 1.1.1),
     testthat (>= 2.3.2),
     tibble (>= 3.0.4),

From b500637700d57e364dddb4dcdc4d9bddce663106 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:26:13 -0500
Subject: [PATCH 05/16] Add `anomaly_detection_ts()` function

---
 R/anomaly_detection.R | 419 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 419 insertions(+)
 create mode 100644 R/anomaly_detection.R

diff --git a/R/anomaly_detection.R b/R/anomaly_detection.R
new file mode 100644
index 000000000..3da881cc0
--- /dev/null
+++ b/R/anomaly_detection.R
@@ -0,0 +1,419 @@
+anomaly_detection_ts <- function(data_tbl,
+                                 time_col,
+                                 value_col) {
+  
+  data_tbl <-
+    data_tbl %>%
+    dplyr::select(dplyr::all_of(c(time_col, value_col))) %>%
+    dplyr::rename(time = 1, value = 2) %>%
+    dplyr::filter(!is.na(time)) %>%
+    dplyr::filter(!is.na(value))
+  
+  n_rows_data_tbl <-
+    data_tbl %>%
+    dplyr::count() %>%
+    dplyr::pull(n)
+  
+  mean <- get_tbl_value_mean(data_tbl)
+  sd <- get_tbl_value_sd(data_tbl)
+  
+  # Create a standardized version of the input data set; this centers the
+  # data (yielding a `z` value of approximately 0) and scales the data so that
+  # the standard deviation is 1
+  data_tbl_standardized <-
+    data_tbl %>%
+    dplyr::select(time, value) %>%
+    dplyr::mutate(z = (value - mean) / sd)
+  
+  # Sample random datapoints from the db table; this is needed for the fit
+  # model which will be used for detrending the standardized dataset
+  random_rows <-
+    c(
+      1:5,
+      sample(
+        x = seq_len(n_rows_data_tbl),
+        size = floor(0.05 * n_rows_data_tbl),
+        replace = FALSE
+      ),
+      (n_rows_data_tbl - 5):n_rows_data_tbl
+    ) %>%
+    sort() %>%
+    unique()
+  
+  # Collect random rows from the standardized dataset
+  data_tbl_standardized_sample <-
+    data_tbl_standardized %>%
+    dplyr::filter(dplyr::row_number() %in% random_rows) %>%
+    dplyr::collect()
+  
+  # Prepare a data frame suitable for spline generation via
+  # the `mgcv::gam()` function
+  data_tbl_gam_df <-
+    data_tbl_standardized_sample %>%
+    dplyr::mutate(time = as.integer(time)) %>%
+    dplyr::rename(x = time, y = z) %>%
+    dplyr::select(-value) %>%
+    as.data.frame()
+  
+  # Generate a GAM model for the sampled points
+  gam_model <- mgcv::gam(y ~ s(x), data = data_tbl_gam_df)
+  
+  # Create a table with the predicted values
+  gam_model_tbl <-
+    dplyr::tibble(
+      time = data_tbl_standardized_sample$time,
+      fit = unname(mgcv::predict.gam(gam_model)),
+    )
+  
+  # # Plot the sampled data points along with the model fit
+  # ggplot(data_tbl_standardized_sample) +
+  #   geom_point(aes(x = time, y = z)) +
+  #   geom_line(aes(x = time, y = fit), data = gam_model_tbl, color = "blue")
+  
+  # Perform an analysis on the standardized data; this focuses only on the
+  # unchanged `time` variable, giving us columns that indicate how many data
+  # points are in each time grouping
+  time_interval_n_analysis <- add_time_granularity_cols(data_tbl_standardized)
+  
+  # This provides a time interval summary and should be
+  # used to detemrine which time granularity to use
+  time_interval_summary <-
+    time_interval_n_analysis %>%
+    dplyr::select(starts_with("n_")) %>%
+    dplyr::group_by() %>%
+    dplyr::summarize_all(
+      .funs = list(
+        ~ mean(., na.rm = TRUE),
+        ~ min(., na.rm = TRUE),
+        ~ max(., na.rm = TRUE)
+      )
+    ) %>%
+    dplyr::collect() %>%
+    tidyr::pivot_longer(
+      cols = dplyr::everything(),
+      names_prefix = "n_",
+      names_to = c("timespan", "stat"),
+      values_to = "values",
+      names_sep = "_"
+    ) %>%
+    tidyr::pivot_wider(
+      names_from = "stat",
+      values_from = "values"
+    )
+  
+  # Automatically select the time granularity with the highest
+  # average number of data points above 20 per window
+  time_int_gt_20 <-
+    time_interval_summary %>% dplyr::filter(mean >= 20)
+  
+  if (nrow(time_int_gt_20) < 1) {
+    stop(
+      "There aren't enough data points in any of the possible time granularity types.",
+      call. = FALSE
+    )
+  }
+  
+  time_granularity_list <-
+    time_int_gt_20 %>% 
+    dplyr::arrange(dplyr::desc(mean)) %>%
+    utils::head(1) %>%
+    as.list()
+  
+  timespan_select <- time_granularity_list$timespan
+  
+  # This provides the min and max of `z`
+  z_interval_n_analysis <-
+    time_interval_n_analysis %>%
+    dplyr::group_by() %>%
+    dplyr::summarize(
+      min = min(z, na.rm = TRUE),
+      max = max(z, na.rm = TRUE)
+    )
+  
+  z_interval_breaks <-
+    c(
+      seq(from = 0, to =  1, by = 0.5),
+      seq(from = 0, to = -1, by = -0.5)
+    ) %>%
+    unique() %>%
+    sort()
+  
+  # Simplify the `time_interval_n_analysis` by only keeping key columns
+  data_tbl <-
+    time_interval_n_analysis %>%
+    dplyr::select(1, 2, z, dplyr::all_of(paste0("time_", timespan_select)))
+  
+  # Create a local lookup table that contains all of the time groups and
+  # an empty column with the `model_fit` values; this is important because the
+  # sampling used to create the spline fit likely won't contain values for all
+  # time groups, but, we can use this template to account for all available
+  # time groups and fill in missing values
+  time_groups <-
+    data_tbl %>%
+    dplyr::select(starts_with("time_")) %>%
+    dplyr::distinct() %>%
+    dplyr::collect() %>%
+    dplyr::mutate(model_fit = NA_real_)
+  
+  # Create a piecewise representation of the GAM model. This necessary (but
+  # also not very lossy approximation) makes it possible to join the GAM
+  # predictions to the DB data
+  gam_model_piecewise_tbl <-
+    gam_model_tbl %>%
+    add_time_granularity_cols() %>%
+    dplyr::select(1, 2, ends_with(timespan_select)) %>%
+    dplyr::group_by(time_1d) %>%
+    dplyr::summarize(
+      model_fit = mean(fit, na.rm = TRUE),
+      .groups = "drop"
+    ) %>%
+    dplyr::bind_rows(time_groups) %>%
+    dplyr::arrange(time_1d) %>%
+    tidyr::fill(model_fit) %>%
+    dplyr::distinct()
+  
+  data_tbl <-
+    data_tbl %>%
+    dplyr::left_join(
+      gam_model_piecewise_tbl,
+      by = paste0("time_", timespan_select),
+      copy = TRUE
+    ) %>%
+    dplyr::mutate(z_detrend = model_fit - z)
+  
+  encoded_windows <-
+    data_tbl %>%
+    dplyr::group_by(time_1d) %>%
+    dplyr::summarize(
+      mean = mean(z_detrend, na.rm = TRUE)
+    ) %>%
+    dplyr::mutate(code_n_w = dplyr::case_when(
+      mean < -2.0 ~ 1,
+      mean < -1.5 ~ 2,
+      mean < -1.0 ~ 3,
+      mean < -0.5 ~ 4,
+      mean < -0.0 ~ 5,
+      mean <  0.5 ~ 6,
+      mean <  1.0 ~ 7,
+      mean <  1.5 ~ 8,
+      mean <  2.0 ~ 9,
+      mean < 9999 ~ 10
+    )) %>%
+    dplyr::mutate(code_l_w = dplyr::case_when(
+      mean < -2.0 ~ "A",
+      mean < -1.5 ~ "B",
+      mean < -1.0 ~ "C",
+      mean < -0.5 ~ "D",
+      mean < -0.0 ~ "E",
+      mean <  0.5 ~ "F",
+      mean <  1.0 ~ "G",
+      mean <  1.5 ~ "H",
+      mean <  2.0 ~ "I",
+      mean < 9999 ~ "J"
+    ))
+  
+  encoded_points <-
+    data_tbl %>%
+    dplyr::mutate(code_n_p = dplyr::case_when(
+      z_detrend < -2.0 ~ 1,
+      z_detrend < -1.5 ~ 2,
+      z_detrend < -1.0 ~ 3,
+      z_detrend < -0.5 ~ 4,
+      z_detrend < -0.0 ~ 5,
+      z_detrend <  0.5 ~ 6,
+      z_detrend <  1.0 ~ 7,
+      z_detrend <  1.5 ~ 8,
+      z_detrend <  2.0 ~ 9,
+      z_detrend < 9999 ~ 10
+    )) %>%
+    dplyr::mutate(code_l_p = dplyr::case_when(
+      z_detrend < -2.0 ~ "A",
+      z_detrend < -1.5 ~ "B",
+      z_detrend < -1.0 ~ "C",
+      z_detrend < -0.5 ~ "D",
+      z_detrend < -0.0 ~ "E",
+      z_detrend <  0.5 ~ "F",
+      z_detrend <  1.0 ~ "G",
+      z_detrend <  1.5 ~ "H",
+      z_detrend <  2.0 ~ "I",
+      z_detrend < 9999 ~ "J"
+    ))
+  
+  unified_data <-
+    encoded_points %>%
+    dplyr::left_join(
+      encoded_windows, by = "time_1d"
+    ) %>%
+    dplyr::mutate(code_n_dist = abs(code_n_p - code_n_w)) %>%
+    dplyr::mutate(
+      sd_z_detrend_ul = 2,
+      sd_z_detrend_ll = -2
+    ) %>%
+    dplyr::mutate(outlier_sd_z = dplyr::case_when(
+      z_detrend >= sd_z_detrend_ul | z_detrend <= sd_z_detrend_ll ~ TRUE,
+      TRUE ~ FALSE
+    )) %>%
+    dplyr::mutate(outlier_dist = dplyr::case_when(
+      code_n_dist > 2 ~ TRUE,
+      TRUE ~ FALSE
+    )) %>%
+    dplyr::mutate(outlier = dplyr::if_else(
+      outlier_sd_z | outlier_dist, TRUE, FALSE
+    )) %>%
+    dplyr::mutate(`pb_is_good_` = !outlier)
+  
+  
+  # plot_data <-
+  #   unified_data %>%
+  #   dplyr::select(1, 2, z, z_detrend, sd_z_detrend_ul, sd_z_detrend_ll, outlier) %>%
+  #   dplyr::collect()
+  # 
+  # # Plot of actual data values with seeded values and marked outliers
+  # ggplot() +
+  #   geom_point(
+  #     data = plot_data %>% dplyr::filter(outlier),
+  #     aes(x = time, y = value), color = "red"
+  #   ) +
+  #   geom_point(
+  #     data = plot_data %>% dplyr::filter(!outlier),
+  #     aes(x = time, y = value)
+  #   ) +
+  #   scale_x_datetime(date_breaks = "1 month")
+  # 
+  # # Plot of standardized data values with seeded values and marked outliers
+  # ggplot() +
+  #   geom_point(
+  #     data = plot_data %>% dplyr::filter(outlier),
+  #     aes(x = time, y = z_detrend), color = "red"
+  #   ) +
+  #   geom_point(
+  #     data = plot_data %>% dplyr::filter(!outlier),
+  #     aes(x = time, y = z_detrend)
+  #   ) +
+  #   geom_hline(yintercept = 1, color = "gray") +
+  #   geom_hline(yintercept = -1, color = "gray") +
+  #   geom_line(data = plot_data, aes(x = time, y = sd_z_detrend_ul), color = "red") +
+  #   geom_line(data = plot_data, aes(x = time, y = sd_z_detrend_ll), color = "red") +
+  #   scale_y_continuous(breaks = seq(-5.0, 5.0, 0.5)) +
+  #   scale_x_datetime(date_breaks = "1 month")
+  
+  
+  unified_data
+}
+
+get_tbl_value_mean <- function(data_tbl) {
+  
+  data_tbl %>%
+    dplyr::summarize(value_mean = mean(value, na.rm = TRUE)) %>%
+    dplyr::pull(value_mean)
+}
+
+get_tbl_value_sd <- function(data_tbl) {
+  
+  mean <- get_tbl_value_mean(data_tbl = data_tbl)
+  
+  variance <-
+    data_tbl %>%
+    dplyr::select(value) %>%
+    dplyr::mutate(
+      "diff" = (!!mean - value)^2
+    ) %>%
+    dplyr::group_by() %>%
+    dplyr::summarize(
+      "var"  = mean(diff, na.rm = TRUE)
+    ) %>%
+    dplyr::pull(var)
+  
+  (abs(variance))^0.5
+}
+
+add_time_granularity_cols <- function(data_tbl) {
+  
+  data_tbl %>%
+    dplyr::mutate(
+      time_1d = substr(as.character(time), 1, 10),
+      time_12h =
+        dplyr::case_when(
+          substr(as.character(time), 12, 13) %in%
+            c("00", "01", "02", "03", "04", "05", "06",
+              "07", "08", "09", "10", "11") ~
+            paste0(substr(as.character(time), 1, 11), "00"),
+          substr(as.character(time), 12, 13) %in%
+            c("12", "13", "14", "15", "16", "17", "18",
+              "19", "20", "21", "22", "23") ~
+            paste0(substr(as.character(time), 1, 11), "12")
+        ),
+      time_8h =
+        dplyr::case_when(
+          substr(as.character(time), 12, 13) %in%
+            c("00", "01", "02", "03", "04", "05", "06", "07") ~
+            paste0(substr(as.character(time), 1, 11), "00"),
+          substr(as.character(time), 12, 13) %in%
+            c("08", "09", "10", "11", "12", "13", "14", "15") ~
+            paste0(substr(as.character(time), 1, 11), "08"),
+          substr(as.character(time), 12, 13) %in%
+            c("16", "17", "18", "19", "20", "21", "22", "23") ~
+            paste0(substr(as.character(time), 1, 11), "16")
+        ),
+      time_6h =
+        dplyr::case_when(
+          substr(as.character(time), 12, 13) %in%
+            c("00", "01", "02", "03", "04", "05") ~
+            paste0(substr(as.character(time), 1, 11), "00"),
+          substr(as.character(time), 12, 13) %in%
+            c("06", "07", "08", "09", "10", "11") ~
+            paste0(substr(as.character(time), 1, 11), "06"),
+          substr(as.character(time), 12, 13) %in%
+            c("12", "13", "14", "15", "16", "17") ~
+            paste0(substr(as.character(time), 1, 11), "12"),
+          substr(as.character(time), 12, 13) %in%
+            c("18", "19", "20", "21", "22", "23") ~
+            paste0(substr(as.character(time), 1, 11), "16")
+        ),
+      time_1h = substr(as.character(time), 1, 13),
+      time_30m =
+        dplyr::case_when(
+          substr(as.character(time), 15, 15) %in% c("3", "4", "5") ~
+            paste0(substr(as.character(time), 1, 14), "3"),
+          substr(as.character(time), 15, 15) %in% c("0", "1", "2") ~
+            paste0(substr(as.character(time), 1, 14), "0")
+        ),
+      time_15m =
+        dplyr::case_when(
+          substr(as.character(time), 15, 16) %in%
+            c("00", "01", "02", "03", "04", "05", "06", "07",
+              "08", "09", "10", "11", "12", "13", "14") ~
+            paste0(substr(as.character(time), 1, 14), "00"),
+          substr(as.character(time), 15, 16) %in%
+            c("15", "16", "17", "18", "19", "20", "21", "22",
+              "23", "24", "25", "26", "27", "28", "29") ~
+            paste0(substr(as.character(time), 1, 14), "15"),
+          substr(as.character(time), 15, 16) %in%
+            c("30", "31", "32", "33", "34", "35", "36", "37",
+              "38", "39", "40", "41", "42", "43", "44") ~
+            paste0(substr(as.character(time), 1, 14), "30"),
+          substr(as.character(time), 15, 16) %in%
+            c("45", "46", "47", "48", "49", "50", "51", "52",
+              "53", "54", "55", "56", "57", "58", "59") ~
+            paste0(substr(as.character(time), 1, 14), "45")
+        ),
+      time_1m = substr(as.character(time), 1, 16)
+    ) %>%
+    dplyr::group_by(time_1d) %>%
+    dplyr::mutate(n_1d = n()) %>%
+    dplyr::group_by(time_12h) %>%
+    dplyr::mutate(n_12h = n()) %>%
+    dplyr::group_by(time_8h) %>%
+    dplyr::mutate(n_8h = n()) %>%
+    dplyr::group_by(time_6h) %>%
+    dplyr::mutate(n_6h = n()) %>%
+    dplyr::group_by(time_1h) %>%
+    dplyr::mutate(n_1h = n()) %>%
+    dplyr::group_by(time_30m) %>%
+    dplyr::mutate(n_30m = n()) %>%
+    dplyr::group_by(time_15m) %>%
+    dplyr::mutate(n_15m = n()) %>%
+    dplyr::group_by(time_1m) %>%
+    dplyr::mutate(n_1m = n()) %>%
+    dplyr::ungroup()
+}

From cf212fd56c7448c84d13c0b555b3c4ec77cf60d7 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:26:18 -0500
Subject: [PATCH 06/16] Update col_anomaly_check.R

---
 R/col_anomaly_check.R | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/R/col_anomaly_check.R b/R/col_anomaly_check.R
index 27f476375..1a180f213 100644
--- a/R/col_anomaly_check.R
+++ b/R/col_anomaly_check.R
@@ -18,6 +18,30 @@
 
 #' Do columns in the table contain anomalous values?
 #'
+#' The `col_anomaly_check()` validation function, the
+#' `expect_col_anomaly_check()` expectation function, and the
+#' `test_col_anomaly_check()` test function all check whether column values
+#' contain anomalous values in a time series. Because this family of functions
+#' only currently supports a time-series analysis for the detection of
+#' anomalies, the `x_column` must contain date-time values and the `y_column`
+#' must contain numeric values. The validation function can be used directly on
+#' a data table or with an *agent* object (technically, a `ptblank_agent`
+#' object) whereas the expectation and test functions can only be used with a
+#' data table. The types of data tables that can be used include data frames,
+#' tibbles, database tables (`tbl_dbi`), and Spark DataFrames (`tbl_spark`).
+#' Each validation step or expectation will operate over the number of test
+#' units that is equal to the number of rows in the table (after any
+#' `preconditions` have been applied).
+#'
+#' @inheritParams col_vals_gt
+#' @param x_column The column that contains the *x* values. This should be a
+#'   date-time column because, currently, the `col_anomaly_check()` function
+#'   only supports anomaly detection as part of a time-series analysis.
+#' @param y_column The column that contains the *y* values, which must be
+#'   numeric.
+#'
+#' @family validation functions
+#'
 #' @name col_anomaly_check
 NULL
 
@@ -69,9 +93,9 @@ col_anomaly_check <- function(x,
         assertion_type = "col_anomaly_check"
       )
   }
-  
+
   # Normalize any provided `step_id` value(s)
-  step_id <- normalize_step_id(step_id, columns, agent)
+  step_id <- normalize_step_id(step_id, columns = "column", agent)
   
   # Check `step_id` value(s) against all other `step_id`
   # values in earlier validation steps

From a486558a080f962bed81d8d0e14a05fe89d3ade9 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:26:53 -0500
Subject: [PATCH 07/16] Update interrogate.R

---
 R/interrogate.R | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/R/interrogate.R b/R/interrogate.R
index 45e10a5de..59dae9e12 100644
--- a/R/interrogate.R
+++ b/R/interrogate.R
@@ -649,6 +649,11 @@ check_table_with_assertion <- function(agent,
         agent = agent,
         idx = idx,
         table = table
+      ),
+      "col_anomaly_check" = interrogate_col_anomaly_check(
+        agent = agent,
+        idx = idx,
+        table = table
       )
     )
   
@@ -1718,6 +1723,74 @@ interrogate_col_schema_match <- function(agent,
   )
 }
 
+interrogate_col_anomaly_check <- function(agent,
+                                          idx,
+                                          table) {
+  # Obtain the `x_column` as a symbol
+  x_column <- get_column_as_sym_at_idx(agent = agent, idx = idx)
+  
+  # Convert the symbol to a character vector
+  x_column <- as.character(x_column)
+  
+  # Obtain the `y_column` as a symbol
+  y_column <- get_values_at_idx(agent = agent, idx = idx)
+
+  # nocov start
+  
+  # Create function for validating the `col_anomaly_check()` step function
+  tbl_col_anomaly_check <- function(table,
+                                    x_column,
+                                    y_column) {
+    
+    # Stop function if `x_column` is not a time column
+    x_column_value_1 <-
+      table %>% 
+      dplyr::select({{ x_column }}) %>%
+      utils::head(1) %>%
+      dplyr::pull(1)
+    
+    if (!inherits(x_column_value_1, "POSIXct")) {
+      stop(
+        "The column provided as the `x_column` must contain date-time values.",
+        call. = FALSE
+      )
+    }
+    
+    # Stop function if `y_column` is not numeric
+    y_column_value_1 <-
+      table %>% 
+      dplyr::select({{ y_column }}) %>%
+      utils::head(1) %>%
+      dplyr::pull(1)
+    
+    if (!is.numeric(y_column_value_1)) {
+      stop(
+        "The column provided as the `y_column` must contain numeric values.",
+        call. = FALSE
+      )
+    }
+    
+    # Get the augmented table with anomalies values
+    table_anomalies <- 
+      anomaly_detection_ts(
+        data_tbl = table,
+        time_col = x_column,
+        value_col = y_column
+      )
+    
+    table_anomalies
+  }
+  
+  # Perform the validation of the table 
+  pointblank_try_catch(
+    tbl_col_anomaly_check(
+      table = table,
+      x_column = x_column,
+      y_column = y_column
+    )
+  )
+}
+
 # nolint start
 
 # Validity checks for the column and value 

From ba521f3ce0c43160f45fa9975be583ba01c37630 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:26:55 -0500
Subject: [PATCH 08/16] Update steps_and_briefs.R

---
 R/steps_and_briefs.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/steps_and_briefs.R b/R/steps_and_briefs.R
index 4208d31bc..6557b235f 100644
--- a/R/steps_and_briefs.R
+++ b/R/steps_and_briefs.R
@@ -282,6 +282,12 @@ create_autobrief <- function(agent,
     autobrief <- finalize_autobrief(expectation_text, precondition_text)
   }
   
+  if (assertion_type == "col_anomaly_check") {
+    
+    # TODO: replace with prepared text
+    autobrief <- "The expectation is that anomalies are not present."
+  }
+  
   if (assertion_type == "conjointly") {
 
     values_text <- values_text %>% tidy_gsub("\"", "'")

From 87c48698b2ed5be430244461c0694ff242cd33a9 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:26:57 -0500
Subject: [PATCH 09/16] Update zzz.R

---
 R/zzz.R | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/R/zzz.R b/R/zzz.R
index 910fbce09..d43a99e99 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -59,6 +59,8 @@ utils::globalVariables(
     "assertion_type",
     "bin_num",
     "brief",
+    "code_n_p",
+    "code_n_w",
     "color",
     "column",
     "columns",
@@ -81,6 +83,7 @@ utils::globalVariables(
     "f_failed",
     "f_pass",
     "f_passed",
+    "fit",
     "Freq",
     "frequency",
     "Frequency",
@@ -88,6 +91,7 @@ utils::globalVariables(
     "item",
     "label",
     "::labels::",
+    "model_fit",
     "n",
     "n_fail",
     "n_pass",
@@ -96,6 +100,9 @@ utils::globalVariables(
     "N_pts",
     "N_val",
     "notify",
+    "outlier",
+    "outlier_dist",
+    "outlier_sd_z",
     ".panel_x",
     ".panel_y",
     "pb_is_good_",
@@ -117,10 +124,21 @@ utils::globalVariables(
     "status_color",
     "step_id",
     "tbl_checked",
+    "time",
+    "time_12h",
+    "time_15m",
+    "time_1d",
+    "time_1h",
+    "time_1m",
+    "time_30m",
+    "time_6h",
+    "time_8h",
     "total_pts",
     "type",
     "value",
     "values",
+    "value_mean",
+    "var",
     "Var1",
     "Var2",
     "view",
@@ -128,7 +146,9 @@ utils::globalVariables(
     "W_pts",
     "W_val",
     "warn",
-    "x"
+    "x",
+    "z",
+    "z_detrend"
   )
 )
 

From 31e50f9d8700cbab84653e4ae837eb264cbd5ca3 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:28:46 -0500
Subject: [PATCH 10/16] Update help files using roxygen

---
 man/col_anomaly_check.Rd    | 97 ++++++++++++++++++++++++++++++++++++-
 man/col_exists.Rd           |  1 +
 man/col_is_character.Rd     |  1 +
 man/col_is_date.Rd          |  1 +
 man/col_is_factor.Rd        |  1 +
 man/col_is_integer.Rd       |  1 +
 man/col_is_logical.Rd       |  1 +
 man/col_is_numeric.Rd       |  1 +
 man/col_is_posix.Rd         |  1 +
 man/col_schema_match.Rd     |  1 +
 man/col_vals_between.Rd     |  1 +
 man/col_vals_decreasing.Rd  |  1 +
 man/col_vals_equal.Rd       |  1 +
 man/col_vals_expr.Rd        |  1 +
 man/col_vals_gt.Rd          |  1 +
 man/col_vals_gte.Rd         |  1 +
 man/col_vals_in_set.Rd      |  1 +
 man/col_vals_increasing.Rd  |  1 +
 man/col_vals_lt.Rd          |  1 +
 man/col_vals_lte.Rd         |  1 +
 man/col_vals_not_between.Rd |  1 +
 man/col_vals_not_equal.Rd   |  1 +
 man/col_vals_not_in_set.Rd  |  1 +
 man/col_vals_not_null.Rd    |  1 +
 man/col_vals_null.Rd        |  1 +
 man/col_vals_regex.Rd       |  1 +
 man/conjointly.Rd           |  1 +
 man/rows_distinct.Rd        |  1 +
 28 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/man/col_anomaly_check.Rd b/man/col_anomaly_check.Rd
index b1f8a78da..55c860422 100644
--- a/man/col_anomaly_check.Rd
+++ b/man/col_anomaly_check.Rd
@@ -16,6 +16,101 @@ col_anomaly_check(
   active = TRUE
 )
 }
+\arguments{
+\item{x}{A data frame, tibble (\code{tbl_df} or \code{tbl_dbi}), Spark DataFrame
+(\code{tbl_spark}), or, an agent object of class \code{ptblank_agent} that is created
+with \code{\link[=create_agent]{create_agent()}}.}
+
+\item{x_column}{The column that contains the \emph{x} values. This should be a
+date-time column because, currently, the \code{col_anomaly_check()} function
+only supports anomaly detection as part of a time-series analysis.}
+
+\item{y_column}{The column that contains the \emph{y} values, which must be
+numeric.}
+
+\item{preconditions}{An optional expression for mutating the input table
+before proceeding with the validation. This is ideally as a one-sided R
+formula using a leading \code{~}. In the formula representation, the \code{.} serves
+as the input data table to be transformed (e.g.,
+\code{~ . \%>\% dplyr::mutate(col = col + 10)}.}
+
+\item{actions}{A list containing threshold levels so that the validation step
+can react accordingly when exceeding the set levels. This is to be created
+with the \code{\link[=action_levels]{action_levels()}} helper function.}
+
+\item{step_id}{One or more optional identifiers for the single or multiple
+validation steps generated from calling a validation function. The use of
+step IDs serves to distinguish validation steps from each other and provide
+an opportunity for supplying a more meaningful label compared to the step
+index. By default this is \code{NULL}, and \strong{pointblank} will automatically
+generate the step ID value (based on the step index) in this case. One or
+more values can be provided, and the exact number of ID values should (1)
+match the number of validation steps that the validation function call will
+produce (influenced by the number of \code{columns} provided), (2) be an ID
+string not used in any previous validation step, and (3) be a vector with
+unique values.}
+
+\item{label}{An optional label for the validation step. This label appears in
+the agent report and for the best appearance it should be kept short.}
+
+\item{brief}{An optional, text-based description for the validation step. If
+nothing is provided here then an \emph{autobrief} is generated by the agent,
+using the language provided in \code{\link[=create_agent]{create_agent()}}'s \code{lang} argument (which
+defaults to \code{"en"} or English). The \emph{autobrief} incorporates details of the
+validation step so it's often the preferred option in most cases (where a
+\code{label} might be better suited to succinctly describe the validation).}
+
+\item{active}{A logical value indicating whether the validation step should
+be active. If the step function is working with an agent, \code{FALSE} will make
+the validation step inactive (still reporting its presence and keeping
+indexes for the steps unchanged). If the step function will be operating
+directly on data, then any step with \code{active = FALSE} will simply pass the
+data through with no validation whatsoever. The default for this is \code{TRUE}.}
+}
 \description{
-Do columns in the table contain anomalous values?
+The \code{col_anomaly_check()} validation function, the
+\code{expect_col_anomaly_check()} expectation function, and the
+\code{test_col_anomaly_check()} test function all check whether column values
+contain anomalous values in a time series. Because this family of functions
+only currently supports a time-series analysis for the detection of
+anomalies, the \code{x_column} must contain date-time values and the \code{y_column}
+must contain numeric values. The validation function can be used directly on
+a data table or with an \emph{agent} object (technically, a \code{ptblank_agent}
+object) whereas the expectation and test functions can only be used with a
+data table. The types of data tables that can be used include data frames,
+tibbles, database tables (\code{tbl_dbi}), and Spark DataFrames (\code{tbl_spark}).
+Each validation step or expectation will operate over the number of test
+units that is equal to the number of rows in the table (after any
+\code{preconditions} have been applied).
+}
+\seealso{
+Other validation functions: 
+\code{\link{col_exists}()},
+\code{\link{col_is_character}()},
+\code{\link{col_is_date}()},
+\code{\link{col_is_factor}()},
+\code{\link{col_is_integer}()},
+\code{\link{col_is_logical}()},
+\code{\link{col_is_numeric}()},
+\code{\link{col_is_posix}()},
+\code{\link{col_schema_match}()},
+\code{\link{col_vals_between}()},
+\code{\link{col_vals_decreasing}()},
+\code{\link{col_vals_equal}()},
+\code{\link{col_vals_expr}()},
+\code{\link{col_vals_gte}()},
+\code{\link{col_vals_gt}()},
+\code{\link{col_vals_in_set}()},
+\code{\link{col_vals_increasing}()},
+\code{\link{col_vals_lte}()},
+\code{\link{col_vals_lt}()},
+\code{\link{col_vals_not_between}()},
+\code{\link{col_vals_not_equal}()},
+\code{\link{col_vals_not_in_set}()},
+\code{\link{col_vals_not_null}()},
+\code{\link{col_vals_null}()},
+\code{\link{col_vals_regex}()},
+\code{\link{conjointly}()},
+\code{\link{rows_distinct}()}
 }
+\concept{validation functions}
diff --git a/man/col_exists.Rd b/man/col_exists.Rd
index a3505d6e9..224ea651a 100644
--- a/man/col_exists.Rd
+++ b/man/col_exists.Rd
@@ -189,6 +189,7 @@ tbl \%>\% test_col_exists(vars(a, b))
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
 \code{\link{col_is_factor}()},
diff --git a/man/col_is_character.Rd b/man/col_is_character.Rd
index ed79f8eba..2b8b29121 100644
--- a/man/col_is_character.Rd
+++ b/man/col_is_character.Rd
@@ -184,6 +184,7 @@ tbl \%>\% test_col_is_character(vars(b))
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_date}()},
 \code{\link{col_is_factor}()},
diff --git a/man/col_is_date.Rd b/man/col_is_date.Rd
index 836d458ce..08eb434f6 100644
--- a/man/col_is_date.Rd
+++ b/man/col_is_date.Rd
@@ -188,6 +188,7 @@ small_table \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_factor}()},
diff --git a/man/col_is_factor.Rd b/man/col_is_factor.Rd
index ac6794856..66335e417 100644
--- a/man/col_is_factor.Rd
+++ b/man/col_is_factor.Rd
@@ -190,6 +190,7 @@ tbl \%>\% test_col_is_factor(vars(f))
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_is_integer.Rd b/man/col_is_integer.Rd
index e8601ff52..f13e010e3 100644
--- a/man/col_is_integer.Rd
+++ b/man/col_is_integer.Rd
@@ -185,6 +185,7 @@ tbl \%>\% test_col_is_integer(vars(b))
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_is_logical.Rd b/man/col_is_logical.Rd
index 5507e993c..3be5512b0 100644
--- a/man/col_is_logical.Rd
+++ b/man/col_is_logical.Rd
@@ -189,6 +189,7 @@ small_table \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_is_numeric.Rd b/man/col_is_numeric.Rd
index 0bc32e5c1..a98efd417 100644
--- a/man/col_is_numeric.Rd
+++ b/man/col_is_numeric.Rd
@@ -189,6 +189,7 @@ small_table \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_is_posix.Rd b/man/col_is_posix.Rd
index dd02e487d..4aee212a8 100644
--- a/man/col_is_posix.Rd
+++ b/man/col_is_posix.Rd
@@ -191,6 +191,7 @@ small_table \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_schema_match.Rd b/man/col_schema_match.Rd
index 2b11aca2a..3c12d679b 100644
--- a/man/col_schema_match.Rd
+++ b/man/col_schema_match.Rd
@@ -252,6 +252,7 @@ tbl \%>\% test_col_schema_match(schema_obj)
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_between.Rd b/man/col_vals_between.Rd
index 863368b9e..a79f6f6d7 100644
--- a/man/col_vals_between.Rd
+++ b/man/col_vals_between.Rd
@@ -295,6 +295,7 @@ small_table \%>\%
 The analogue to this function: \code{\link[=col_vals_not_between]{col_vals_not_between()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_decreasing.Rd b/man/col_vals_decreasing.Rd
index dd8c3bca9..7b9ff6c02 100644
--- a/man/col_vals_decreasing.Rd
+++ b/man/col_vals_decreasing.Rd
@@ -181,6 +181,7 @@ The analogous function that moves in the opposite direction:
 \code{\link[=col_vals_increasing]{col_vals_increasing()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_equal.Rd b/man/col_vals_equal.Rd
index 2074811d8..840a7a5a3 100644
--- a/man/col_vals_equal.Rd
+++ b/man/col_vals_equal.Rd
@@ -245,6 +245,7 @@ test_col_vals_equal(tbl, vars(a), 5)
 The analogue to this function: \code{\link[=col_vals_not_equal]{col_vals_not_equal()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_expr.Rd b/man/col_vals_expr.Rd
index e06648683..fe9d27c38 100644
--- a/man/col_vals_expr.Rd
+++ b/man/col_vals_expr.Rd
@@ -232,6 +232,7 @@ nicely within \code{col_vals_expr()} and its variants: \code{\link[rlang:nse-def
 \code{\link[dplyr:between]{dplyr::between()}}, and \code{\link[dplyr:case_when]{dplyr::case_when()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_gt.Rd b/man/col_vals_gt.Rd
index 7499bcd75..051e87fae 100644
--- a/man/col_vals_gt.Rd
+++ b/man/col_vals_gt.Rd
@@ -246,6 +246,7 @@ test_col_vals_gt(tbl, vars(a), 4)
 The analogous function with a left-closed bound: \code{\link[=col_vals_gte]{col_vals_gte()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_gte.Rd b/man/col_vals_gte.Rd
index c6705d860..c66767063 100644
--- a/man/col_vals_gte.Rd
+++ b/man/col_vals_gte.Rd
@@ -247,6 +247,7 @@ test_col_vals_gte(tbl, vars(a), 5)
 The analogous function with a left-open bound: \code{\link[=col_vals_gt]{col_vals_gt()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_in_set.Rd b/man/col_vals_in_set.Rd
index 6a5d9bdc5..331b9d742 100644
--- a/man/col_vals_in_set.Rd
+++ b/man/col_vals_in_set.Rd
@@ -226,6 +226,7 @@ small_table \%>\%
 The analogue to this function: \code{\link[=col_vals_not_in_set]{col_vals_not_in_set()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_increasing.Rd b/man/col_vals_increasing.Rd
index 7540678b1..ba8dacbdb 100644
--- a/man/col_vals_increasing.Rd
+++ b/man/col_vals_increasing.Rd
@@ -181,6 +181,7 @@ The analogous function that moves in the opposite direction:
 \code{\link[=col_vals_decreasing]{col_vals_decreasing()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_lt.Rd b/man/col_vals_lt.Rd
index 32b7080b7..42f8b60b4 100644
--- a/man/col_vals_lt.Rd
+++ b/man/col_vals_lt.Rd
@@ -246,6 +246,7 @@ test_col_vals_lt(tbl, vars(c), 5)
 The analogous function with a right-closed bound: \code{\link[=col_vals_lte]{col_vals_lte()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_lte.Rd b/man/col_vals_lte.Rd
index ed37343f5..6344da5fc 100644
--- a/man/col_vals_lte.Rd
+++ b/man/col_vals_lte.Rd
@@ -249,6 +249,7 @@ test_col_vals_lte(tbl, vars(c), 4)
 The analogous function with a right-open bound: \code{\link[=col_vals_lt]{col_vals_lt()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_not_between.Rd b/man/col_vals_not_between.Rd
index b96b24019..e72f125d4 100644
--- a/man/col_vals_not_between.Rd
+++ b/man/col_vals_not_between.Rd
@@ -292,6 +292,7 @@ small_table \%>\%
 The analogue to this function: \code{\link[=col_vals_between]{col_vals_between()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_not_equal.Rd b/man/col_vals_not_equal.Rd
index 2a52827a5..1a0389ab0 100644
--- a/man/col_vals_not_equal.Rd
+++ b/man/col_vals_not_equal.Rd
@@ -246,6 +246,7 @@ test_col_vals_not_equal(tbl, vars(a), 6)
 The analogue to this function: \code{\link[=col_vals_equal]{col_vals_equal()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_not_in_set.Rd b/man/col_vals_not_in_set.Rd
index 87f64305d..fd76dbffe 100644
--- a/man/col_vals_not_in_set.Rd
+++ b/man/col_vals_not_in_set.Rd
@@ -233,6 +233,7 @@ small_table \%>\%
 The analogue to this function: \code{\link[=col_vals_in_set]{col_vals_in_set()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_not_null.Rd b/man/col_vals_not_null.Rd
index 71e4d9de9..bd700e875 100644
--- a/man/col_vals_not_null.Rd
+++ b/man/col_vals_not_null.Rd
@@ -216,6 +216,7 @@ tbl \%>\% test_col_vals_not_null(vars(b))
 The analogue to this function: \code{\link[=col_vals_null]{col_vals_null()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_null.Rd b/man/col_vals_null.Rd
index 7504353a8..59061fac6 100644
--- a/man/col_vals_null.Rd
+++ b/man/col_vals_null.Rd
@@ -215,6 +215,7 @@ tbl \%>\% test_col_vals_null(vars(c))
 The analogue to this function: \code{\link[=col_vals_not_null]{col_vals_not_null()}}.
 
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/col_vals_regex.Rd b/man/col_vals_regex.Rd
index 35002195d..8ffe52d9e 100644
--- a/man/col_vals_regex.Rd
+++ b/man/col_vals_regex.Rd
@@ -245,6 +245,7 @@ small_table \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/conjointly.Rd b/man/conjointly.Rd
index 9690c78b7..f9d34ae79 100644
--- a/man/conjointly.Rd
+++ b/man/conjointly.Rd
@@ -272,6 +272,7 @@ tbl \%>\%
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},
diff --git a/man/rows_distinct.Rd b/man/rows_distinct.Rd
index baa9e5648..4606d684a 100644
--- a/man/rows_distinct.Rd
+++ b/man/rows_distinct.Rd
@@ -177,6 +177,7 @@ all_passed(agent)
 }
 \seealso{
 Other validation functions: 
+\code{\link{col_anomaly_check}()},
 \code{\link{col_exists}()},
 \code{\link{col_is_character}()},
 \code{\link{col_is_date}()},

From 5474f3cc1371a066421399d8bd5814f260cd7b3d Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sat, 19 Dec 2020 22:40:03 -0500
Subject: [PATCH 11/16] Remove min version requirement for mgcv

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b03d4ace1..49f3f5818 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,7 +42,7 @@ Imports:
     log4r (>= 0.3.2),
     knitr (>= 1.30),
     magrittr,
-    mgcv (>= 1.8-33),
+    mgcv,
     rlang (>= 0.4.9),
     scales (>= 1.1.1),
     testthat (>= 2.3.2),

From e80b1fa302b0570796ab7744cd0ff1d13fcc434b Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sun, 20 Dec 2020 21:36:01 -0500
Subject: [PATCH 12/16] Create col_anomaly_check.png

---
 inst/img/function_icons/col_anomaly_check.png | Bin 0 -> 2948 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 inst/img/function_icons/col_anomaly_check.png

diff --git a/inst/img/function_icons/col_anomaly_check.png b/inst/img/function_icons/col_anomaly_check.png
new file mode 100644
index 0000000000000000000000000000000000000000..07d01cb265492817f105342ff13b586e05c79794
GIT binary patch
literal 2948
zcmV-~3w!j5P)<h;3K|Lk000e1NJLTq002V(002V>1^@s6$I7^K00004XF*Lt006O%
z3;baP0000uWmrjOO-%qQ0000800D<-00aO40096102%-Q00002paK8{000010000(
zpaTE|000010000(00000aic-r000X2Nkl<Zcmd^H1&kEU60XA?f)kwJ4#8c51_A`P
zAR$12put{n3z8SyJ-Cw}3p_j!+}$NWaMwfn)mLGLp1GZwU4HiN*sG*(dZuN%Ys#zl
z!pIxqq)eGIQFwTGf`kbZ#`5kSy?TU&g{d!JzJ$Md^X3z;?DOxeE{q4Y<nli*;apUR
zl&idtF<eq|i2&HN9rE&n6<ms=A#U8bls9i)%8(%g#fujYkNzt7@#6=*c=3X6-n=Q-
zpNoIP*Zy4QyKSZ#H@@bgvSi7k=Fgw6UcGvyLIJ*i|E>-kIH2m*t*cY{R3~KQ;|5?%
zwP?{oef;<_6mZ~k4=YxzPze$wFpx6Xju5cRX&|Cy%a%ST2(^dXwrx|fV#NYNzH<?g
zz*m9R{8weonl+>;!u~<~_wR2|$F84vkjq{mW6`2T_WmJ#4|$k0X;K3rX@H63T)wiV
zP^^nXdh@<E*SvXiK?WnGK8&awT;dccP=JypOUBjc0f!D9qGH90(W+Ieg1VrxWy{L^
zi4rBs24rNwUHbIt<zZz0Vat$8mMkeY3>J0d$Px4POr>=8>{)vL{JHro7!}x00M;()
zfDEyu;>V9K4<qXj>wPL;zC4vEQG$*hJxaZL^`dg+%F(@h_hbn$mH;gA<r=I%h9SxC
zi5DkWFY%fe2>AN-E6tugTe`9)qiff$(YSHrXztv(6j{j7*BM;2>u<8v+3}+|ajf5h
ztzH7_0N?_Q963_h$Tl>nw+(rA)v#ehU&1zM&pv$kAX1z+Ebq8`_paP;-n=<z@ANhC
zrcImVz1g#8pBQD7!O~mg1GGiDbm=Htwrn(I$`oqQpn-h(J$v?0nKET)%a$!cr38I)
zE1l{skx&O)T)1#SG)9snNeIP&w{PFl)vH$tS|zd|N|Aw0LT>m{>x~*UB9zF4C}Wez
z+$7$0`0!zxG-;CSYeOCx2<kLx(u68hs6b=JjG>n=UpgWrWRZdBR`1@uDS!U_#2Sh^
zckV29499aFkrCxG!*ON5*#J9b+O%n6yuE(?nmDp0>;a^BK}b=?+O=!NSQ{{4fT-%-
zyLS@@afCX&5P|kcr;PLG&(p<=7d_Y3xN&3AG;7wZ@$8!~Uq110_wV0N-@bjLB}<l2
z+qP|g%dZKl1<IbNs#&w9di?mY7=S~D3=xZucj$V3`t+$XtW>YQef##6xtT2_Qnz>S
zUWF*uym|8!`#-)6PxF~>fd-8-Zr{F5g9i^LkS?sv6DLkky?XViQ>RYE$npHzDN>}M
z*s){N!Gi}q`)10NN%|yDo?PM|ty;A*$ADG*p)O;<AEgXdT?$_ckuwBbJ9g}-9zJ|%
zeu-dLDpsr*f#NtO;`rL2PzJovnKNh7=+UEzV~bR)RxOd@g$flikRB=5#EBDI-UI6L
zH&*jU*aW29p+g4(YZNYAm{zY|ZD9rGf8vA`0b%Xiw<q=xEerO#*|%>W?bxw{a^=b;
z@3Fi7`uL$khY~`D@87?d$GF#h9=)%x`fszz=g*&o{Mgx2nKNfr3l}a_$Xw|Fp@ueo
zu@vTO1$W(FSh;egITmv?Yu3zj=z|9j6nn$!<jIqseRVbgaso!ilP6D9w{G23o;-Pw
z;g%OVN$us!ms>W24RJKf=OQ`e7eT%RDg%5@?D9}YMnFc_r%#{6vdft>C(W8QOAH*t
z#dacHXc9Zv`5N(1_zwvEWz3k7x^(Gc4zc$QL<X3X1W@s$430;trAwD8R$=m$Iif8|
zTo;6!O`sV4>#~6qAwRQh*|G@KvvcQ8nS;Ge#XN7ej~Fq+oZFuY$6D3Efdkd_>C+W|
z%m_nt$^gz>k--rs#rX?a4APHC+5$O&01&c$`*wvF`$}lDH)(E`*CTZpOa?f~0AF(%
zV{qNIYnPEvl{u?ctr}tNej}qtj~+4!gx<u76O|z_^%wq$I_F0evzdtyW-WE+&K<|8
zVVzd1R!!kl4vtQ+w(;4QO?q=q1Id>!U$*qsk>RRZYt*PA_WiJ7!zfj%RG=b>UW0F)
zWQ24I@K2E<MMQNnDa*WuqvbRWEL%3YrTPB-`@|_idi3a#SgiUOySzc0uV?xgS2oen
z*t2I(#V2{>Y=e_VK!0|Y)Z)d9)u2Ix9KV`AZ=iL^EZ%^nfR*fU>Cz>|@e8wG_wL>8
zU4(Tp7oy`pieJcBw{D#|sj(0^VZNYEn>LodP_H_0;6=@uGe_)_ckkZmO@7rO5REn9
z3Kj1JU@0~N6&qqcyXb7fbNPpi;lqbp7Jx<In3Ek~WPz8_ZWK-%n%&?h7+MMK&@S*!
z0|96SP${c;Ri{oJ#hJK3hFkjT$ne|FV1dPJ{{=YsR;f~@Uk`rPkh_FT(SeUlnzyV*
ziAmbLzjEaY@q)~Ie=3|Lrm0h>(v2H8h_w{u&YjyF#)viRfmZxR1~$%efQl9^N~KDb
zqKz9j$|A;(A1|SP<Ur7ljYHrX0HX{3K_I}Nj#%DMnlxz$1|Ht!1@M2WRH-6<2wZwY
zz<Q0dxBDeCEVT9W2DSrFW)`^YP?Lb;$B!!*Q!uf0c=qg>g-tl8s@SR0yC{xSnR8-(
z_BtcXV;R6V2o>mSVMK}`xA(K(>ulnxTJ2b~-y@T*12X3b;y_j6+y#mqQm0N$=gysz
zpbykDpDc`^?(Fj-$cwWYpgt!+W=;o2I&pMO%zOR0TitIq@ij^5(xokrahAdt0xX^Q
z2I2ayUAs!u%a?m?pBM26u%{bTuU_5Kj4VF<90P#R)2DHu(H(x9O+a$6H`1m}E2Iw>
z{!akReKx$rM#zyPhg!dWz2~&hFn#*;5jD%<Ma9BZ#$YbE^PJdLY&a@tI~{a30h<05
zGW3a~Tb+YWiiN$%5~y>?B73|=N5;Qm6L*4(y4LI0uS?Pz8W5@*X+=D1*RGvw`oL3w
zB7BBpMM&2{*deSJT>*#TaN38hTUjFH67HFe{a&&on{doh6)#@gR9Wy#SX%_d4KTNn
zD>3{6wm}pdWu65p7?!C&ef#z6Cu^!*ySA*ge*OB&?wvX<T4xhi$~boHm_-6}T!I!X
zSRg0#yikqk75cInPaGzs%9SfiwhL!FL9Si^D-){MpT33$!{IpgTg3SCn%}A2kupx5
zI%QG?50*M4wij&Ox>bA*jA2t*JVUQBQPr+fhN}Q&7Lizifkhl3MVj9U&YU?z(D%rk
z!RSGJg57B1!*fnUqTO>x$}qkba+h%7-F}Dza~R$#e#Ze9+pf=zxuV-urwmt_VSF}@
z9U4{36-glgJXJVo$Z>*i-OOgo1kvoql}+5%%2uJAdzd_VvUCGWa1iIOMFchxFfwr1
z9cR9qHf>5AohG=;0A>-n{>^`Y3}^{rUUuEbty{N<Q+smQ9bP9?CW0%iTjNJep#hK?
z@*4)HpEBTd9L!l{*kL>1e8s3yqvU51p@D*5y8tpC;1*vQ)S;6*!tijNam0YFJ2_%d
zuwcPoqeH9)Ho3>;Ho|7GICX$lfQI1Al;kx+84=jGAwbvTX2OICxL9Duj2VPvQcM5?
z8UN+YzQf_s4I4JZgnj$mqk)X4-1;B%fMbM1VKEU5WMJ<zxXeS|0m2gDeRjWN6i)1x
z67Gj{LE<9}#}DCj;YTok>wih``kIKuj~zP}=gb^`a)7Y+elFs+$Al{q&vwe-eOn+s
uhw{sDRxCR^V(HihiDzjIp&Jfd{qui&oo;vg8tm)<0000<MNUMnLSTXwjFC71

literal 0
HcmV?d00001


From b5218f81a6e12fa5de5ed25e32ca4dd98845a9f0 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sun, 20 Dec 2020 21:36:03 -0500
Subject: [PATCH 13/16] Create col_anomaly_check.svg

---
 inst/img/function_icons/col_anomaly_check.svg | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 inst/img/function_icons/col_anomaly_check.svg

diff --git a/inst/img/function_icons/col_anomaly_check.svg b/inst/img/function_icons/col_anomaly_check.svg
new file mode 100644
index 000000000..823fa3cab
--- /dev/null
+++ b/inst/img/function_icons/col_anomaly_check.svg
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>col_anomaly_check()</title>
+    <defs>
+        <rect id="path-1" x="0" y="0" width="20" height="20"></rect>
+    </defs>
+    <g id="Function-Icons---original" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="col_anomaly_check()" transform="translate(-0.025844, 0.702042)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <polygon id="l_bound_5" fill="#000000" transform="translate(55.683984, 45.885685) rotate(-299.000000) translate(-55.683984, -45.885685) " points="54.8839836 41.0856849 54.8839836 50.6856849 56.4839836 50.6856849 56.4839836 41.0856849"></polygon>
+            <polygon id="l_bound_4" fill="#000000" transform="translate(44.781783, 48.112467) rotate(-270.000000) translate(-44.781783, -48.112467) " points="43.9817831 43.3124669 43.9817831 52.9124669 45.5817831 52.9124669 45.5817831 43.3124669"></polygon>
+            <polygon id="l_bound_3" fill="#000000" transform="translate(33.310597, 48.112467) rotate(-270.000000) translate(-33.310597, -48.112467) " points="32.5105968 43.3124669 32.5105968 52.9124669 34.1105968 52.9124669 34.1105968 43.3124669"></polygon>
+            <polygon id="l_bound_2" fill="#000000" transform="translate(22.565830, 50.339249) rotate(-299.000000) translate(-22.565830, -50.339249) " points="21.7658296 45.5392488 21.7658296 55.1392488 23.3658296 55.1392488 23.3658296 45.5392488"></polygon>
+            <polygon id="l_bound_1" fill="#000000" transform="translate(12.291585, 56.120915) rotate(-299.000000) translate(-12.291585, -56.120915) " points="11.4915846 51.3209149 11.4915846 60.9209149 13.0915846 60.9209149 13.0915846 51.3209149"></polygon>
+            <polygon id="u_bound_5" fill="#000000" transform="translate(55.683984, 20.558276) rotate(-299.000000) translate(-55.683984, -20.558276) " points="54.8839836 15.7582763 54.8839836 25.3582763 56.4839836 25.3582763 56.4839836 15.7582763"></polygon>
+            <polygon id="u_bound_4" fill="#000000" transform="translate(44.781783, 23.741792) rotate(-270.000000) translate(-44.781783, -23.741792) " points="43.9817831 18.941792 43.9817831 28.541792 45.5817831 28.541792 45.5817831 18.941792"></polygon>
+            <polygon id="u_bound_3" fill="#000000" transform="translate(33.310597, 23.741792) rotate(-270.000000) translate(-33.310597, -23.741792) " points="32.5105968 18.941792 32.5105968 28.541792 34.1105968 28.541792 34.1105968 18.941792"></polygon>
+            <polygon id="u_bound_2" fill="#000000" transform="translate(22.014729, 26.388457) rotate(-299.000000) translate(-22.014729, -26.388457) " points="21.2147294 21.5884568 21.2147294 31.1884568 22.8147294 31.1884568 22.8147294 21.5884568"></polygon>
+            <polygon id="u_bound_1" fill="#000000" transform="translate(11.740484, 32.170123) rotate(-299.000000) translate(-11.740484, -32.170123) " points="10.9404844 27.3701229 10.9404844 36.9701229 12.5404844 36.9701229 12.5404844 27.3701229"></polygon>
+            <path d="M57.7700059,31.7005321 C56.3916576,31.7005321 55.2700059,32.8221839 55.2700059,34.2005321 C55.2700059,35.5788803 56.3916576,36.7005321 57.7700059,36.7005321 C59.1483541,36.7005321 60.2700059,35.5788803 60.2700059,34.2005321 C60.2700059,32.8221839 59.1483541,31.7005321 57.7700059,31.7005321 Z" id="data_point_solid_13" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M54.6839836,23.7005321 C53.3056354,23.7005321 52.1839836,24.8221839 52.1839836,26.2005321 C52.1839836,27.5788803 53.3056354,28.7005321 54.6839836,28.7005321 C56.0623318,28.7005321 57.1839836,27.5788803 57.1839836,26.2005321 C57.1839836,24.8221839 56.0623318,23.7005321 54.6839836,23.7005321 Z" id="data_point_solid_12" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M52.6839836,39.2005321 C51.3056354,39.2005321 50.1839836,40.3221839 50.1839836,41.7005321 C50.1839836,43.0788803 51.3056354,44.2005321 52.6839836,44.2005321 C54.0623318,44.2005321 55.1839836,43.0788803 55.1839836,41.7005321 C55.1839836,40.3221839 54.0623318,39.2005321 52.6839836,39.2005321 Z" id="data_point_solid_11" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M47.6839836,33.2124669 C46.3056354,33.2124669 45.1839836,34.3341186 45.1839836,35.7124669 C45.1839836,37.0908151 46.3056354,38.2124669 47.6839836,38.2124669 C49.0623318,38.2124669 50.1839836,37.0908151 50.1839836,35.7124669 C50.1839836,34.3341186 49.0623318,33.2124669 47.6839836,33.2124669 Z" id="data_point_solid_10" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M42.6839836,27.7005321 C41.3056354,27.7005321 40.1839836,28.8221839 40.1839836,30.2005321 C40.1839836,31.5788803 41.3056354,32.7005321 42.6839836,32.7005321 C44.0623318,32.7005321 45.1839836,31.5788803 45.1839836,30.2005321 C45.1839836,28.8221839 44.0623318,27.7005321 42.6839836,27.7005321 Z" id="data_point_solid_09" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M39.8839836,39.3005321 C38.5056354,39.3005321 37.3839836,40.4221839 37.3839836,41.8005321 C37.3839836,43.1788803 38.5056354,44.3005321 39.8839836,44.3005321 C41.2623318,44.3005321 42.3839836,43.1788803 42.3839836,41.8005321 C42.3839836,40.4221839 41.2623318,39.3005321 39.8839836,39.3005321 Z" id="data_point_solid_08" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M35.9839836,30.0850582 C34.6056354,30.0850582 33.4839836,31.20671 33.4839836,32.5850582 C33.4839836,33.9634064 34.6056354,35.0850582 35.9839836,35.0850582 C37.3623318,35.0850582 38.4839836,33.9634064 38.4839836,32.5850582 C38.4839836,31.20671 37.3623318,30.0850582 35.9839836,30.0850582 Z" id="data_point_solid_07" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M30.9839836,36.8005321 C29.6056354,36.8005321 28.4839836,37.9221839 28.4839836,39.3005321 C28.4839836,40.6788803 29.6056354,41.8005321 30.9839836,41.8005321 C32.3623318,41.8005321 33.4839836,40.6788803 33.4839836,39.3005321 C33.4839836,37.9221839 32.3623318,36.8005321 30.9839836,36.8005321 Z" id="data_point_solid_06" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M27.0839836,27.7005321 C25.7056354,27.7005321 24.5839836,28.8221839 24.5839836,30.2005321 C24.5839836,31.5788803 25.7056354,32.7005321 27.0839836,32.7005321 C28.4623318,32.7005321 29.5839836,31.5788803 29.5839836,30.2005321 C29.5839836,28.8221839 28.4623318,27.7005321 27.0839836,27.7005321 Z" id="data_point_solid_05" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M23.3839836,35.7124669 C22.0056354,35.7124669 20.8839836,36.8341186 20.8839836,38.2124669 C20.8839836,39.5908151 22.0056354,40.7124669 23.3839836,40.7124669 C24.7623318,40.7124669 25.8839836,39.5908151 25.8839836,38.2124669 C25.8839836,36.8341186 24.7623318,35.7124669 23.3839836,35.7124669 Z" id="data_point_solid_04" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M17.9700059,39.3005321 C16.5916576,39.3005321 15.4700059,40.4221839 15.4700059,41.8005321 C15.4700059,43.1788803 16.5916576,44.3005321 17.9700059,44.3005321 C19.3483541,44.3005321 20.4700059,43.1788803 20.4700059,41.8005321 C20.4700059,40.4221839 19.3483541,39.3005321 17.9700059,39.3005321 Z" id="data_point_solid_03" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M12.9700059,35.2005321 C11.5916576,35.2005321 10.4700059,36.3221839 10.4700059,37.7005321 C10.4700059,39.0788803 11.5916576,40.2005321 12.9700059,40.2005321 C14.3483541,40.2005321 15.4700059,39.0788803 15.4700059,37.7005321 C15.4700059,36.3221839 14.3483541,35.2005321 12.9700059,35.2005321 Z" id="data_point_solid_02" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M10.2055623,46.7005321 C8.82721408,46.7005321 7.7055623,47.8221839 7.7055623,49.2005321 C7.7055623,50.5788803 8.82721408,51.7005321 10.2055623,51.7005321 C11.5839105,51.7005321 12.7055623,50.5788803 12.7055623,49.2005321 C12.7055623,47.8221839 11.5839105,46.7005321 10.2055623,46.7005321 Z" id="data_point_solid_01" fill="#000000" fill-rule="nonzero"></path>
+            <path d="M33.712234,9.90417906 C32.3338858,9.90417906 31.212234,11.0258308 31.212234,12.4041791 C31.212234,13.7825273 32.3338858,14.9041791 33.712234,14.9041791 C35.0905822,14.9041791 36.212234,13.7825273 36.212234,12.4041791 C36.212234,11.0258308 35.0905822,9.90417906 33.712234,9.90417906 Z" id="data_point_anomaly" stroke="#000000" fill="#FFFFFF" fill-rule="nonzero"></path>
+            <g id="warning_sign" transform="translate(29.582638, 1.109050)">
+                <mask id="mask-2" fill="white">
+                    <use xlink:href="#path-1"></use>
+                </mask>
+                <g id="Rectangle"></g>
+                <g id="Group" mask="url(#mask-2)">
+                    <g transform="translate(8.833333, 5.666667)">
+                        <g transform="translate(1.293667, 5.850846) rotate(0.430848) translate(-1.293667, -5.850846) translate(0.293667, 0.850846)">
+                            <g transform="translate(0.000000, 0.430653)" fill="#000000" fill-rule="nonzero" id="Shape">
+                                <path d="M1.72261062,8.18240044 C1.72261062,8.18240044 0,8.18240044 0,8.18240044 C0,8.18240044 0,6.45978982 0,6.45978982 C0,6.45978982 1.72261062,6.45978982 1.72261062,6.45978982 C1.72261062,6.45978982 1.72261062,8.18240044 1.72261062,8.18240044 Z M1.72261062,4.7371792 C1.72261062,4.7371792 0,4.7371792 0,4.7371792 C0,4.7371792 0,0.430652655 0,0.430652655 C0,0.430652655 1.72261062,0.430652655 1.72261062,0.430652655 C1.72261062,0.430652655 1.72261062,4.7371792 1.72261062,4.7371792 Z"></path>
+                            </g>
+                        </g>
+                    </g>
+                </g>
+            </g>
+        </g>
+    </g>
+</svg>
\ No newline at end of file

From 9d356bc8ac4177e42964866b9a64c75c391632a0 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sun, 20 Dec 2020 21:51:19 -0500
Subject: [PATCH 14/16] Update interrogate.R

---
 R/interrogate.R | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/R/interrogate.R b/R/interrogate.R
index 59dae9e12..c3b80486a 100644
--- a/R/interrogate.R
+++ b/R/interrogate.R
@@ -346,6 +346,26 @@ interrogate <- function(agent,
         )
     }
     
+    # Add extracts of rows for anomalous values
+    if (assertion_type == "col_anomaly_check") {
+
+      tbl_checked$value <-
+        tbl_checked$value %>%
+        dplyr::select(time, value, pb_is_good_)
+      
+      agent <- 
+        add_table_extract(
+          agent = agent,
+          idx = i,
+          tbl_checked = tbl_checked,
+          extract_failed = extract_failed,
+          get_first_n = get_first_n,
+          sample_n = sample_n,
+          sample_frac = sample_frac,
+          sample_limit = sample_limit
+        )
+    }
+    
     # Get the ending time for the validation step
     validation_end_time <- Sys.time()
     

From 9d9a30b6dfaa482cf8c5c8f42521c61061975d52 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Sun, 20 Dec 2020 22:52:15 -0500
Subject: [PATCH 15/16] Update interrogate.R

---
 R/interrogate.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/interrogate.R b/R/interrogate.R
index c3b80486a..0326a31a6 100644
--- a/R/interrogate.R
+++ b/R/interrogate.R
@@ -1801,6 +1801,8 @@ interrogate_col_anomaly_check <- function(agent,
     table_anomalies
   }
   
+  # nocov end
+  
   # Perform the validation of the table 
   pointblank_try_catch(
     tbl_col_anomaly_check(

From 9b87c1500f550a0925b7e6d363d76d713522cb42 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Mon, 21 Dec 2020 16:50:15 -0500
Subject: [PATCH 16/16] Update anomaly_detection.R

---
 R/anomaly_detection.R | 100 ++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 43 deletions(-)

diff --git a/R/anomaly_detection.R b/R/anomaly_detection.R
index 3da881cc0..79eb37b43 100644
--- a/R/anomaly_detection.R
+++ b/R/anomaly_detection.R
@@ -65,11 +65,6 @@ anomaly_detection_ts <- function(data_tbl,
       fit = unname(mgcv::predict.gam(gam_model)),
     )
   
-  # # Plot the sampled data points along with the model fit
-  # ggplot(data_tbl_standardized_sample) +
-  #   geom_point(aes(x = time, y = z)) +
-  #   geom_line(aes(x = time, y = fit), data = gam_model_tbl, color = "blue")
-  
   # Perform an analysis on the standardized data; this focuses only on the
   # unchanged `time` variable, giving us columns that indicate how many data
   # points are in each time grouping
@@ -108,7 +103,8 @@ anomaly_detection_ts <- function(data_tbl,
   
   if (nrow(time_int_gt_20) < 1) {
     stop(
-      "There aren't enough data points in any of the possible time granularity types.",
+      "There aren't enough data points in any of the available time ",
+      "granularity types.",
       call. = FALSE
     )
   }
@@ -242,7 +238,7 @@ anomaly_detection_ts <- function(data_tbl,
   unified_data <-
     encoded_points %>%
     dplyr::left_join(
-      encoded_windows, by = "time_1d"
+      encoded_windows, by = paste0("time_", timespan_select)
     ) %>%
     dplyr::mutate(code_n_dist = abs(code_n_p - code_n_w)) %>%
     dplyr::mutate(
@@ -262,42 +258,6 @@ anomaly_detection_ts <- function(data_tbl,
     )) %>%
     dplyr::mutate(`pb_is_good_` = !outlier)
   
-  
-  # plot_data <-
-  #   unified_data %>%
-  #   dplyr::select(1, 2, z, z_detrend, sd_z_detrend_ul, sd_z_detrend_ll, outlier) %>%
-  #   dplyr::collect()
-  # 
-  # # Plot of actual data values with seeded values and marked outliers
-  # ggplot() +
-  #   geom_point(
-  #     data = plot_data %>% dplyr::filter(outlier),
-  #     aes(x = time, y = value), color = "red"
-  #   ) +
-  #   geom_point(
-  #     data = plot_data %>% dplyr::filter(!outlier),
-  #     aes(x = time, y = value)
-  #   ) +
-  #   scale_x_datetime(date_breaks = "1 month")
-  # 
-  # # Plot of standardized data values with seeded values and marked outliers
-  # ggplot() +
-  #   geom_point(
-  #     data = plot_data %>% dplyr::filter(outlier),
-  #     aes(x = time, y = z_detrend), color = "red"
-  #   ) +
-  #   geom_point(
-  #     data = plot_data %>% dplyr::filter(!outlier),
-  #     aes(x = time, y = z_detrend)
-  #   ) +
-  #   geom_hline(yintercept = 1, color = "gray") +
-  #   geom_hline(yintercept = -1, color = "gray") +
-  #   geom_line(data = plot_data, aes(x = time, y = sd_z_detrend_ul), color = "red") +
-  #   geom_line(data = plot_data, aes(x = time, y = sd_z_detrend_ll), color = "red") +
-  #   scale_y_continuous(breaks = seq(-5.0, 5.0, 0.5)) +
-  #   scale_x_datetime(date_breaks = "1 month")
-  
-  
   unified_data
 }
 
@@ -417,3 +377,57 @@ add_time_granularity_cols <- function(data_tbl) {
     dplyr::mutate(n_1m = n()) %>%
     dplyr::ungroup()
 }
+
+plot_fit_standardized_sample <- function(data_tbl_standardized_sample) {
+  
+  # Plot the sampled data points along with the model fit
+  ggplot(data_tbl_standardized_sample) +
+    geom_point(aes(x = time, y = z)) +
+    geom_line(aes(x = time, y = fit), data = gam_model_tbl, color = "blue")
+}
+
+plot_anomalies <- function(unified_data) {
+  
+  plot_data <-
+    unified_data %>%
+    dplyr::select(
+      1, 2, z, z_detrend,
+      sd_z_detrend_ul, sd_z_detrend_ll, outlier
+    ) %>%
+    dplyr::collect()
+
+  # Plot of actual data values with seeded values and marked outliers
+  ggplot() +
+    geom_point(
+      data = plot_data %>% dplyr::filter(outlier),
+      aes(x = time, y = value), color = "red"
+    ) +
+    geom_point(
+      data = plot_data %>% dplyr::filter(!outlier),
+      aes(x = time, y = value)
+    ) +
+    scale_x_datetime(date_breaks = "1 month")
+
+  # Plot of standardized data values with seeded values and marked outliers
+  ggplot() +
+    geom_point(
+      data = plot_data %>% dplyr::filter(outlier),
+      aes(x = time, y = z_detrend), color = "red"
+    ) +
+    geom_point(
+      data = plot_data %>% dplyr::filter(!outlier),
+      aes(x = time, y = z_detrend)
+    ) +
+    geom_hline(yintercept = 1, color = "gray") +
+    geom_hline(yintercept = -1, color = "gray") +
+    geom_line(
+      data = plot_data,
+      aes(x = time, y = sd_z_detrend_ul), color = "red"
+    ) +
+    geom_line(
+      data = plot_data, aes(x = time, y = sd_z_detrend_ll),
+      color = "red"
+    ) +
+    scale_y_continuous(breaks = seq(-5.0, 5.0, 0.5)) +
+    scale_x_datetime(date_breaks = "1 month")
+}