microsoft
diff --git a/‎..Rcheck/00check.log‎
Lines changed: 14 additions & 0 deletions b/‎..Rcheck/00check.log‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CRAN-SUBMISSION‎
Lines changed: 3 additions & 3 deletions b/‎CRAN-SUBMISSION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NEWS.md‎
Lines changed: 3 additions & 1 deletion b/‎NEWS.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/calculate_IV.R‎
Lines changed: 67 additions & 34 deletions b/‎R/calculate_IV.R‎
Lines changed: 67 additions & 34 deletions
diff --git a/‎R/create_IV.R‎
Lines changed: 4 additions & 4 deletions b/‎R/create_IV.R‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/globals.R‎
Lines changed: 2 additions & 1 deletion b/‎R/globals.R‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/tm_wordcloud.R‎
Lines changed: 4 additions & 2 deletions b/‎R/tm_wordcloud.R‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎man/create_IV.Rd‎
Lines changed: 3 additions & 3 deletions b/‎man/create_IV.Rd‎
Lines changed: 3 additions & 3 deletions
@@ -0,0 +1,14 @@
+* using log directory ‘/home/runner/work/wpa/wpa/..Rcheck’
+* using R version 4.3.3 (2024-02-29)
+* using platform: x86_64-pc-linux-gnu (64-bit)
+* R was compiled by
+    gcc (Ubuntu 13.2.0-23ubuntu3) 13.2.0
+    GNU Fortran (Ubuntu 13.2.0-23ubuntu3) 13.2.0
+* running under: Ubuntu 24.04.2 LTS
+* using session charset: UTF-8
+* using options ‘--no-examples --no-manual --no-vignettes’
+* checking for file ‘./DESCRIPTION’ ... ERROR
+Required fields missing or empty:
+  ‘Author’ ‘Maintainer’
+* DONE
+Status: 1 ERROR
@@ -31,3 +31,6 @@ lastMiKTeXException
 ^cran-comments\.md$
 ^CRAN-RELEASE$
 ^CRAN-SUBMISSION$
+
+# Reverse dependency checks
+^revdep/
@@ -8,3 +8,4 @@ Meta
 .RDataTmp
 SQ-overview.html
 wpa export 20200427_131327.png
+revdep/
@@ -1,3 +1,3 @@
-Version: 1.9.2
-Date: 2025-05-28 14:01:14 UTC
-SHA: d2bbd2a998182433f85e93dc51d36460b43e2ef7
+Version: 1.10.0
+Date: 2025-08-26 11:43:05 UTC
+SHA: a4d4ea06c5de4ed451fcd8ab05d94a248ec8bda8
@@ -1,8 +1,10 @@
 # wpa 1.10.0
 
 - Refactored codebase to use latest dplyr syntax
-- Added support for logical outcome variables in `create_IV()`
+- Added support for logical outcome and categorical predictor variables in `create_IV()`
 - Added detection of text missing values in `validation_report()`
+- Enhanced flexibility in display control for `create_dt()`
+- Improved test coverage
 
 # wpa 1.9.2
 
 
@@ -45,23 +45,34 @@ calculate_IV <- function(data,
       )
   }
 
-  # Compute q
-  q <- stats::quantile(
-    pred_var,
-    probs = c(1:(bins - 1) / bins),
-    na.rm = TRUE,
-    type = 3
-    )
+  # Check if predictor is categorical (character or factor)
+  if(is.character(pred_var) || is.factor(pred_var)){
+    
+    # For categorical variables, use the categories themselves as intervals
+    unique_vals <- unique(pred_var[!is.na(pred_var)])
+    intervals <- as.numeric(as.factor(pred_var))
+    
+  } else {
+    
+    # For numeric variables, use quantile-based binning (original logic)
+    # Compute q
+    q <- stats::quantile(
+      pred_var,
+      probs = c(1:(bins - 1) / bins),
+      na.rm = TRUE,
+      type = 3
+      )
 
-  # Compute cuts
-  cuts <- unique(q)
+    # Compute cuts
+    cuts <- unique(q)
 
-  # Compute intervals
-  intervals <-
-    findInterval(
-      pred_var,
-      vec = cuts,
-      rightmost.closed = FALSE)
+    # Compute intervals
+    intervals <-
+      findInterval(
+        pred_var,
+        vec = cuts,
+        rightmost.closed = FALSE)
+  }
 
   # Compute cut_table
   cut_table <-
@@ -70,23 +81,45 @@ calculate_IV <- function(data,
       outc_var) %>%
     as.data.frame.matrix()
 
-  ## get min/max
-  cut_table_2 <-
-    data.frame(
-    var = pred_var,
-    intervals
-  ) %>%
-    group_by(intervals) %>%
-    summarise(
-      min = min(var, na.rm = TRUE) %>% round(digits = 1),
-      max = max(var, na.rm = TRUE) %>% round(digits = 1),
-      n = n(),
-      .groups = "drop"
+  ## get min/max or category labels
+  if(is.character(pred_var) || is.factor(pred_var)){
+    
+    # For categorical variables, use the actual category names
+    cut_table_2 <-
+      data.frame(
+        var = pred_var,
+        intervals
+      ) %>%
+      group_by(intervals) %>%
+      summarise(
+        category = first(var),  # Get the actual category name
+        n = n(),
+        .groups = "drop"
+      ) %>%
+      mutate(!!sym(predictor) := category) %>%
+      mutate(percentage = n / sum(n)) %>%
+      select(!!sym(predictor), intervals, n, percentage)
+      
+  } else {
+    
+    # For numeric variables, use min/max ranges (original logic)
+    cut_table_2 <-
+      data.frame(
+      var = pred_var,
+      intervals
     ) %>%
-    mutate(!!sym(predictor) :=
-    glue::glue("[{round(min, digits = 1)},{round(max, digits = 1)}]")) %>%
-    mutate(percentage = n / sum(n)) %>%
-    select(!!sym(predictor), intervals, n, percentage)
+      group_by(intervals) %>%
+      summarise(
+        min = min(var, na.rm = TRUE) %>% round(digits = 1),
+        max = max(var, na.rm = TRUE) %>% round(digits = 1),
+        n = n(),
+        .groups = "drop"
+      ) %>%
+      mutate(!!sym(predictor) :=
+      glue::glue("[{round(min, digits = 1)},{round(max, digits = 1)}]")) %>%
+      mutate(percentage = n / sum(n)) %>%
+      select(!!sym(predictor), intervals, n, percentage)
+  }
 
   # Create variables that are double
   cut_table_1 <- as.numeric(cut_table$`1`)
@@ -138,8 +171,8 @@ calculate_IV <- function(data,
 #' @param data Data frame containing the data.
 #' @param outcome String containing the name of the outcome variable.
 #' @param predictors Character vector containing the names of the predictor
-#'   variables. If `NULL` (default) is supplied, all numeric variables in the
-#'   data will be used.
+#'   variables. If `NULL` (default) is supplied, all numeric, character, and factor 
+#'   variables in the data will be used.
 #' @param bins Numeric value representing the number of bins to use. Defaults to
 #'   10.
 #'
@@ -162,7 +195,7 @@ map_IV <- function(data,
       data %>%
       select(-!!sym(outcome)) %>%
       select(
-        where(is.numeric)
+        where(function(x) is.numeric(x) || is.character(x) || is.factor(x))
       ) %>%
       names()
   }
 
@@ -7,12 +7,12 @@
 #'
 #' @description
 #' Specify an outcome variable and return IV outputs.
-#' All numeric variables in the dataset are used as predictor variables.
+#' All numeric, character, and factor variables in the dataset are used as predictor variables.
 #'
 #' @param data A Person Query dataset in the form of a data frame.
 #' @param predictors A character vector specifying the columns to be used as
-#'   predictors. Defaults to NULL, where all numeric vectors in the data will be
-#'   used as predictors.
+#'   predictors. Defaults to NULL, where all numeric, character, and factor vectors 
+#'   in the data will be used as predictors.
 #' @param outcome A string specifying a binary variable, i.e. can only contain
 #' the values 1 or 0, or a logical variable (TRUE/FALSE). Logical variables will
 #' be automatically converted to binary (TRUE to 1, FALSE to 0).
@@ -109,7 +109,7 @@ create_IV <- function(data,
     train <-
       data %>%
       rename(outcome = outcome) %>%
-      select(where(is.numeric)) %>%
+      select(where(function(x) is.numeric(x) || is.character(x) || is.factor(x))) %>%
       tidyr::drop_na()
 
   } else {
 
@@ -266,6 +266,7 @@ utils::globalVariables(
     "degree",
     "eigenvector",
     "node_size",
-    "pagerank"
+    "pagerank",
+    "category"
     )
 )
@@ -42,10 +42,12 @@
 #'
 #' @import dplyr
 #' @examples
-#' tm_wordcloud(mt_data, keep = 30)
+#' mt_data_mini <- mt_data[sample(1:nrow(mt_data), 500), ]
+#'
+#' tm_wordcloud(mt_data_mini, keep = 30)
 #'
 #' # Removing stopwords
-#' tm_wordcloud(mt_data, keep = 30, stopwords = c("weekly", "update"))
+#' tm_wordcloud(mt_data_mini, keep = 30, stopwords = c("weekly", "update"))
 #'
 #' @family Text-mining
 #'
Original file line number	Diff line number	Diff line change
`@@ -266,6 +266,7 @@ utils::globalVariables(`
`266`	`266`	`"degree",`
`267`	`267`	`"eigenvector",`
`268`	`268`	`"node_size",`
`269`		`- "pagerank"`
	`269`	`+ "pagerank",`
	`270`	`+ "category"`
`270`	`271`	`)`
`271`	`272`	`)`