| 
32 | 32 | #' * Factorize the features that are categorical.  | 
33 | 33 | #' * Add length of stay (c_jail_out - c_jail_in) in the dataset.  | 
34 | 34 | #' * `Pre-processing Resource:` @url https://github.com/propublica/compas-analysis/blob/master/Compas%20Analysis.ipynb  | 
 | 35 | +#'   | 
 | 36 | +#' Note: The 'is_recid' column was removed as it's a target column.  | 
35 | 37 | #'  | 
36 | 38 | #' @section Metadata:  | 
37 |  | -#' * (integer) age : The age of defendants.  | 
 | 39 | +#' * (integer) age: The age of defendants.  | 
38 | 40 | #' * (factor) c_charge_degree : The charge degree of defendants. F: Felony M: Misdemeanor  | 
39 | 41 | #' * (factor) race: The race of defendants.  | 
40 | 42 | #' * (factor) age_cat: The age category of defendants.  | 
 | 
45 | 47 | #'   If they are too far apart, that may indicate an error. If the value is negative,  | 
46 | 48 | #'   that indicate the screening date happened before the arrest date.  | 
47 | 49 | #' * (integer) decile_score: Indicate the risk of recidivism (Min=1, Max=10)  | 
48 |  | -#' * (integer) is_recid: Binary variable indicate whether defendant is rearrested at any time.  | 
49 | 50 | #' * (factor) two_year_recid: Binary variable indicate whether defendant is rearrested at within two years.  | 
50 | 51 | #' * (numeric) length_of_stay: The count of days stay in jail.  | 
51 | 52 | #'  | 
@@ -80,6 +81,7 @@ get_compas_task = function() { # nocov start  | 
80 | 81 |   b = as_data_backend(mlr3fairness::compas)  | 
81 | 82 |   task = mlr3::TaskClassif$new("compas", b, target = "two_year_recid")  | 
82 | 83 |   task$col_roles$pta = "sex"  | 
 | 84 | +  task$col_roles$feature = setdiff(task$feature_names, "is_recid")  | 
83 | 85 |   b$hash = task$man = "mlr3fairness::mlr_tasks_compas"  | 
84 | 86 |   task  | 
85 | 87 | } # nocov end  | 
 | 
0 commit comments