Merge pull request #141 from epiforecasts/model-classification

kathsherratt · web-flow · commit 1fbae27de4df · 2026-01-05T18:53:20.000Z
Better report model classification
diff --git a/R/process-data.R b/R/process-data.R
@@ -8,29 +8,32 @@ source(here("R", "utils-variants.R"))
 
 # Metadata ----------------------------------------------------------------
 # Get classification of model types
-classify_models <- function(file = here("data", "model-classification.csv")) {
+classify_models <- function(file = here("data", "model-classification.csv"),
+                            return_majority = TRUE) {
   methods <- read_csv(file) |>
     pivot_longer(
       -model,
       names_to = "classifier", values_to = "classification"
     ) |>
     filter(!(is.na(classification) | classification == "#N/A")) |>
     group_by(model) |>
-    summarise(
-      agreement = (n_distinct(classification) == 1),
-      classification = names(
-        sort(table(classification), decreasing = TRUE)[1]
-      ),
-      .groups = "drop"
-    ) |>
-    mutate(classification = factor(
-      classification,
-      levels = c(
-        "Agent-based", "Mechanistic",
-        "Semi-mechanistic", "Statistical",
-        "Machine learning", "Other"
-      )
-    ))
+    mutate(raters = n()) |>
+    group_by(model, classification) |>
+    mutate(votes = n(),
+           agreement = votes == raters)
+
+  if (return_majority) {
+    methods <- methods |>
+      group_by(model) |>
+      slice_max(order_by = votes, with_ties = FALSE)
+  }
+
+  methods <- methods |>
+    mutate(classification = ifelse(grepl("EpiExpert", model), "Judgement",
+                                   classification),
+           classification = factor(classification)) |>
+    ungroup()
+
   return(methods)
 }
 
diff --git a/report/results.Rmd b/report/results.Rmd
@@ -64,12 +64,16 @@ print_table1(scores)
 structures <- scores |>
   select(Model, Method, agreement) |>
   distinct()
-
-structure_count <- table(structures$Method)
-structure_count <- structure_count[structure_count > 0]
+structure_count <- structures |> 
+  group_by(Method) |> 
+  summarise(agreement = sum(agreement),
+            n = n(),
+            p = agreement / n)
 ```
 
-We categorised `r structure_count[["Statistical"]]` models as statistical, `r structure_count[["Semi-mechanistic"]]` as semi-mechanistic, `r structure_count[["Mechanistic"]]` as mechanistic, `r structure_count[["Agent-based"]]` as agent-based and `r structure_count[["Other"]]` models that used human judgement forecasting as "other" (Supplementary Table). For `r sum(!structures$agreement)` (`r round(sum(!structures$agreement) / nrow(structures) * 100)`%) models, investigators disagreed on model classification. The majority of 2/3 was used as the final classification, with additional manual review which in all cases retained the majority decision. In the volume of forecasts provided, mechanistic, semi-mechanistic, and statistical models each contributed similar numbers of forecasts with approximately one-third each. Agent-based and "other" models provided fewer forecasts, representing only 1-2% of forecasts. 
+Among 47 models, we categorised 17 models as using mechanistic structure, 12 as statistical, 12 as semi-mechanistic, 3 as agent-based and 3 that used human judgement forecasting (Supplementary Table). Raters disagreed on 17 (36%) model classifications. The majority of 2/3 was used as the final classification, with additional manual review which in all cases retained the majority decision. Raters most often disagreed when classifying semi-mechanistic models, with 8 out of 12 receiving one or more votes for a statistical model structure.
+
+In the volume of forecasts provided, mechanistic, semi-mechanistic, and statistical models each contributed similar numbers of forecasts with approximately one-third each. Agent-based and judgement models provided fewer forecasts, representing only 1-2% of forecasts. 
 
 ```{r target-description}
 targets <- scores |>
diff --git a/report/supplement/Supplement.Rmd b/report/supplement/Supplement.Rmd
@@ -78,6 +78,21 @@ table_metadata(scores) |>
   kable(caption = "Model characteristics contributing to the European COVID-19 Forecast Hub, by method used, number of countries targeted, and number of forecasts contributed.")
 ```
 
+```{r classification}
+model_classifier <- classify_models(return_majority = FALSE) |> 
+  distinct(model, classification, raters, votes, agreement) |> 
+  group_by(model) |>
+  mutate(final = classification[which.max(votes)]) |> 
+  pivot_wider(names_from = classification, values_from = votes, values_fill = 0) |> 
+  filter(model %in% unique(scores$Model)) |>
+  select(Model = model, `Final classification` = final,
+         Agreement = agreement, `Total raters` = raters, everything()) |>
+  arrange(Agreement, Model)
+
+model_classifier |> 
+    kable(caption = "Classification of models by number of raters in total and agreement on model structure")
+```
+
 \newpage
 
 # Statistical methods