tidymodels
diff --git a/‎NEWS.md‎
Lines changed: 3 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/class-kap.R‎
Lines changed: 101 additions & 26 deletions b/‎R/class-kap.R‎
Lines changed: 101 additions & 26 deletions
diff --git a/‎man/kap.Rd‎
Lines changed: 18 additions & 2 deletions b/‎man/kap.Rd‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎tests/pycompare/generate-pycompare.R‎
Lines changed: 10 additions & 6 deletions b/‎tests/pycompare/generate-pycompare.R‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎tests/pycompare/py-accuracy‎
-3 Bytes b/‎tests/pycompare/py-accuracy‎
-3 Bytes
diff --git a/‎tests/pycompare/py-f_meas‎
-3 Bytes b/‎tests/pycompare/py-f_meas‎
-3 Bytes
diff --git a/‎tests/pycompare/py-f_meas_beta_.5‎
-3 Bytes b/‎tests/pycompare/py-f_meas_beta_.5‎
-3 Bytes
diff --git a/‎tests/pycompare/py-kap‎
50 Bytes b/‎tests/pycompare/py-kap‎
50 Bytes
diff --git a/‎tests/pycompare/py-mcc‎
-3 Bytes b/‎tests/pycompare/py-mcc‎
-3 Bytes
diff --git a/‎tests/pycompare/py-precision‎
-3 Bytes b/‎tests/pycompare/py-precision‎
-3 Bytes
@@ -15,6 +15,9 @@
   package are now supported, and are automatically converted to factors before
   computing any metric. Note that this means that any equivocal values are
   materialized as `NA` (#198).
+  
+* The `kap()` metric has a new `weighting` argument to apply linear or
+  quadratic weightings before computing the kappa value (#2, #125, @jonthegeek).
 
 * When `sens()` is undefined when computing `ppv()`, `npv()`, `j_index()`, or 
   `bal_accuracy()`, a sensitivity warning is now correctly thrown, rather than
 
@@ -15,10 +15,26 @@
 #'
 #' @inheritParams sens
 #'
+#' @param weighting A weighting to apply when computing the scores. One of:
+#'   `"none"`, `"linear"`, or `"quadratic"`. Linear and quadratic weighting
+#'   penalizes mis-predictions that are "far away" from the true value. Note
+#'   that distance is judged based on the ordering of the levels in `truth` and
+#'   `estimate`. It is recommended to provide ordered factors for `truth` and
+#'   `estimate` to explicitly code the ordering, but this is not required.
+#'
+#'   In the binary case, all 3 weightings produce the same value, since it is
+#'   only ever possible to be 1 unit away from the true value.
+#'
 #' @author Max Kuhn
+#' @author Jon Harmon
+#'
+#' @references
+#'   Cohen, J. (1960). "A coefficient of agreement for nominal
+#'   scales". _Educational and Psychological Measurement_. 20 (1): 37-46.
 #'
-#' @references Cohen, J. (1960). "A coefficient of agreement for nominal
-#'  scales". _Educational and Psychological Measurement_. 20 (1): 37-46.
+#'   Cohen, J. (1968). "Weighted kappa: Nominal scale agreement provision for
+#'   scaled disagreement or partial credit". _Psychological
+#'   Bulletin_. 70 (4): 213-220.
 #'
 #' @export
 #' @examples
@@ -49,51 +65,61 @@ kap <- new_class_metric(
 
 #' @export
 #' @rdname kap
-kap.data.frame  <- function(data, truth, estimate,
-                            na_rm = TRUE, ...) {
+kap.data.frame  <- function(data,
+                            truth,
+                            estimate,
+                            weighting = "none",
+                            na_rm = TRUE,
+                            ...) {
 
   metric_summarizer(
     metric_nm = "kap",
     metric_fn = kap_vec,
     data = data,
     truth = !!enquo(truth),
     estimate = !!enquo(estimate),
-    na_rm = na_rm
+    na_rm = na_rm,
+    metric_fn_options = list(weighting = weighting)
   )
 
 }
 
 #' @export
-kap.table <- function(data, ...) {
+kap.table <- function(data,
+                      weighting = "none",
+                      ...) {
   check_table(data)
   metric_tibbler(
     .metric = "kap",
     .estimator = finalize_estimator(data, metric_class = "kap"),
-    .estimate = kap_table_impl(data)
+    .estimate = kap_table_impl(data, weighting = weighting)
   )
 }
 
 #' @export
-kap.matrix <- function(data, ...) {
+kap.matrix <- function(data,
+                       weighting = "none",
+                       ...) {
   data <- as.table(data)
-  kap.table(data)
+  kap.table(data, weighting = weighting)
 }
 
 #' @export
 #' @rdname kap
-kap_vec <- function(truth, estimate, na_rm = TRUE, ...) {
-
+kap_vec <- function(truth,
+                    estimate,
+                    weighting = "none",
+                    na_rm = TRUE,
+                    ...) {
   estimator <- finalize_estimator(truth, metric_class = "kap")
 
-  kap_impl <- function(truth, estimate) {
-
+  kap_impl <- function(truth, estimate, weighting) {
     xtab <- vec2table(
       truth = truth,
       estimate = estimate
     )
 
-    kap_table_impl(xtab)
-
+    kap_table_impl(xtab, weighting = weighting)
   }
 
   metric_vec_template(
@@ -102,25 +128,74 @@ kap_vec <- function(truth, estimate, na_rm = TRUE, ...) {
     estimate = estimate,
     na_rm = na_rm,
     estimator = estimator,
-    cls = "factor"
+    cls = "factor",
+    weighting = weighting
   )
-
 }
 
-kap_table_impl <- function(data) {
-  kap_binary(data)
+kap_table_impl <- function(data, weighting) {
+  full_sum <- sum(data)
+  row_sum <- rowSums(data)
+  col_sum <- colSums(data)
+  expected <- outer(row_sum, col_sum) / full_sum
+
+  n_levels <- nrow(data)
+  w <- make_weighting_matrix(weighting, n_levels)
+
+  n_disagree <- sum(w * data)
+  n_chance <- sum(w * expected)
+
+  1 - n_disagree / n_chance
 }
 
-kap_binary <- function(data) {
+make_weighting_matrix <- function(weighting, n_levels) {
+  validate_weighting(weighting)
 
-  n <- sum(data)
+  if (is_no_weighting(weighting)) {
+    # [n_levels x n_levels], 0 on diagonal, 1 on off-diagonal
+    w <- matrix(1L, nrow = n_levels, ncol = n_levels)
+    diag(w) <- 0L
+    return(w)
+  }
 
-  .row_sums <- rowSums(data)
-  .col_sums <- colSums(data)
+  if (is_linear_weighting(weighting)) {
+    power <- 1L
+  } else {
+    # quadratic
+    power <- 2L
+  }
+
+  # [n_levels x n_levels], 0 on diagonal, increasing weighting on off-diagonal
+  w <- rlang::seq2(0L, n_levels - 1L)
+  w <- matrix(w, nrow = n_levels, ncol = n_levels)
+  w <- abs(w - t(w)) ^ power
+
+  w
+}
+
+# ------------------------------------------------------------------------------
 
-  expected_acc <- sum( (.row_sums * .col_sums) / n ) / n
+validate_weighting <- function(x) {
+  if (!rlang::is_string(x)) {
+    abort("`weighting` must be a string.")
+  }
 
-  obs_acc <- accuracy_binary(data)
+  ok <- is_no_weighting(x) ||
+    is_linear_weighting(x) ||
+    is_quadratic_weighting(x)
 
-  (obs_acc - expected_acc) / (1 - expected_acc)
+  if (!ok) {
+    abort("`weighting` must be 'none', 'linear', or 'quadratic'.")
+  }
+
+  invisible(x)
+}
+is_no_weighting <- function(x) {
+  identical(x, "none")
+}
+is_linear_weighting <- function(x) {
+  identical(x, "linear")
+}
+is_quadratic_weighting <- function(x) {
+  identical(x, "quadratic")
 }
@@ -16,7 +16,7 @@ save_metric_results <- function(nm, fn, ..., average = c("macro", "micro", "weig
   res <- c(res, res2)
 
   names(res) <- c("binary", average)
-  saveRDS(res, paste0("tests/pycompare/py-", nm))
+  saveRDS(res, paste0("tests/pycompare/py-", nm), version = 2)
 }
 
 # ------------------------------------------------------------------------------
@@ -32,19 +32,23 @@ py_mcc <- list(
   binary = skmetrics$matthews_corrcoef(two_class_example$truth, two_class_example$predicted),
   multiclass = skmetrics$matthews_corrcoef(hpc_cv$obs, hpc_cv$pred)
 )
-saveRDS(py_mcc, "tests/pycompare/py-mcc")
+saveRDS(py_mcc, "tests/pycompare/py-mcc", version = 2)
 
 # Accuracy
 py_accuracy <- list(
   binary = skmetrics$accuracy_score(two_class_example$truth, two_class_example$predicted),
   multiclass = skmetrics$accuracy_score(hpc_cv$obs, hpc_cv$pred)
 )
-saveRDS(py_accuracy, "tests/pycompare/py-accuracy")
+saveRDS(py_accuracy, "tests/pycompare/py-accuracy", version = 2)
 
 # Kappa
 py_kap <- list(
-  binary = skmetrics$cohen_kappa_score(two_class_example$truth, two_class_example$predicted),
-  multiclass = skmetrics$cohen_kappa_score(hpc_cv$obs, hpc_cv$pred)
+  binary = skmetrics$cohen_kappa_score(two_class_example$truth, two_class_example$predicted, levels(two_class_example$truth)),
+  multiclass = skmetrics$cohen_kappa_score(hpc_cv$obs, hpc_cv$pred, labels = levels(hpc_cv$obs)),
+  linear_binary = skmetrics$cohen_kappa_score(two_class_example$truth, two_class_example$predicted, levels(two_class_example$truth), weights = "linear"),
+  linear_multiclass = skmetrics$cohen_kappa_score(hpc_cv$obs, hpc_cv$pred, labels = levels(hpc_cv$obs), weights = "linear"),
+  quadratic_binary = skmetrics$cohen_kappa_score(two_class_example$truth, two_class_example$predicted, levels(two_class_example$truth), weights = "quadratic"),
+  quadratic_multiclass = skmetrics$cohen_kappa_score(hpc_cv$obs, hpc_cv$pred, labels = levels(hpc_cv$obs), weights = "quadratic")
 )
-saveRDS(py_kap, "tests/pycompare/py-kap")
+saveRDS(py_kap, "tests/pycompare/py-kap", version = 2)