Skip to content

Commit 4ca6395

Browse files
gmbeckergithub-actions[bot]shajoezhu
authored
1041 formats var (#1064)
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Joe Zhu <sha.joe.zhu@gmail.com>
1 parent 466fffb commit 4ca6395

File tree

11 files changed

+1451
-435
lines changed

11 files changed

+1451
-435
lines changed

R/00tabletrees.R

Lines changed: 86 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ setClassUnion("SubsetDef", c("expression", "logical", "integer", "numeric"))
4646

4747
setClassUnion("integerOrNULL", c("NULL", "integer"))
4848
setClassUnion("characterOrNULL", c("NULL", "character"))
49+
setClassUnion("characterOrList", c("list", "character"))
4950

5051
## should XXX [splits, s_values, sval_labels, subset(?)] be a data.frame?
5152
setClass("TreePos", representation(
@@ -60,10 +61,29 @@ validity = function(object) {
6061
}
6162
)
6263

64+
setOldClass(c("FormatList", "list"))
65+
66+
FormatList <- function(..., .list = list(...)) {
67+
if (!is.list(.list)) {
68+
.list <- list(.list)
69+
}
70+
valid <- vapply(.list, is, class2 = "FormatSpec", TRUE)
71+
if (!are(.list, "FormatSpec")) {
72+
stop(
73+
"Attempted to construct FormatList with elements that are not ",
74+
"FormatSpec compatible. This should not happen, please contact ",
75+
"the maintainers."
76+
)
77+
}
78+
79+
class(.list) <- c("FormatList", "list")
80+
.list
81+
}
82+
6383
setClassUnion("functionOrNULL", c("NULL", "function"))
6484
setClassUnion("listOrNULL", c("NULL", "list"))
6585
## TODO (?) make "list" more specific, e.g FormatList, or FunctionList?
66-
setClassUnion("FormatSpec", c("NULL", "character", "function", "list"))
86+
setClassUnion("FormatSpec", c("NULL", "character", "function", "list", "FormatList"))
6787
setClassUnion("ExprOrNULL", c("NULL", "expression"))
6888

6989
setClass("ValueWrapper", representation(
@@ -133,7 +153,7 @@ setClass("Split",
133153
name = "character",
134154
split_label = "character",
135155
split_format = "FormatSpec",
136-
split_na_str = "character",
156+
split_na_str = "characterOrList",
137157
split_label_position = "character",
138158
## NB this is the function which is applied to
139159
## get the content rows for the CHILDREN of this
@@ -633,7 +653,9 @@ setClass("VAnalyzeSplit",
633653
representation(
634654
default_rowlabel = "character",
635655
include_NAs = "logical",
636-
var_label_position = "character"
656+
var_label_position = "character",
657+
row_formats_var = "characterOrNULL",
658+
row_na_strs_var = "characterOrNULL"
637659
)
638660
)
639661

@@ -672,7 +694,9 @@ AnalyzeVarSplit <- function(var,
672694
indent_mod = 0L,
673695
label_pos = "default",
674696
cvar = "",
675-
section_div = NA_character_) {
697+
section_div = NA_character_,
698+
formats_var = NULL,
699+
na_strs_var = NULL) {
676700
check_ok_label(split_label)
677701
label_pos <- match.arg(label_pos, c("default", label_pos_values))
678702
if (!any(nzchar(defrowlab))) {
@@ -701,7 +725,9 @@ AnalyzeVarSplit <- function(var,
701725
page_title_prefix = NA_character_,
702726
child_section_div = section_div,
703727
child_show_colcounts = FALSE,
704-
child_colcount_format = NA_character_
728+
child_colcount_format = NA_character_,
729+
row_formats_var = formats_var,
730+
row_na_strs_var = na_strs_var
705731
) ## no content_extra_args
706732
}
707733

@@ -823,7 +849,9 @@ AnalyzeMultiVars <- function(var,
823849
child_labels = c("default", "topleft", "visible", "hidden"),
824850
child_names = var,
825851
cvar = "",
826-
section_div = NA_character_) {
852+
section_div = NA_character_,
853+
formats_var = NULL,
854+
na_strs_var = NULL) {
827855
## NB we used to resolve to strict TRUE/FALSE for label visibillity
828856
## in this function but that was too greedy for repeated
829857
## analyze calls, so that now occurs in the tabulation machinery
@@ -842,26 +870,59 @@ AnalyzeMultiVars <- function(var,
842870
## split_format = .repoutlst(split_format, nv)
843871
inclNAs <- .repoutlst(inclNAs, nv)
844872
section_div_if_multivar <- if (length(var) > 1) NA_character_ else section_div
845-
pld <- mapply(AnalyzeVarSplit,
846-
var = var,
847-
split_name = child_names,
848-
split_label = split_label,
849-
afun = afun,
850-
defrowlab = defrowlab,
851-
cfun = cfun,
852-
cformat = cformat,
853-
## split_format = split_format,
854-
inclNAs = inclNAs,
855-
MoreArgs = list(
856-
extra_args = extra_args,
857-
indent_mod = indent_mod,
858-
label_pos = show_kidlabs,
859-
split_format = split_format,
860-
split_na_str = split_na_str,
861-
section_div = section_div_if_multivar
862-
), ## rvis),
863-
SIMPLIFY = FALSE
873+
874+
moreargs <- list(
875+
extra_args = extra_args,
876+
indent_mod = indent_mod,
877+
label_pos = show_kidlabs,
878+
section_div = section_div_if_multivar,
879+
formats_var = formats_var,
880+
na_strs_var = na_strs_var
864881
)
882+
mv_list_case <- is.list(split_format) &&
883+
all(var %in% names(split_format)) &&
884+
all(vapply(split_format, is, class2 = "FormatList", TRUE))
885+
if (mv_list_case) { # diff format list for each var
886+
stopifnot(all(var %in% names(split_na_str)))
887+
## split_value does *not* go in more args, not constant across vars
888+
pld <- mapply(
889+
AnalyzeVarSplit,
890+
var = var,
891+
split_name = child_names,
892+
split_label = split_label,
893+
afun = afun,
894+
defrowlab = defrowlab,
895+
cfun = cfun,
896+
cformat = cformat,
897+
## in case they're in the wrong order for some insane reason
898+
split_format = split_format[var],
899+
split_na_str = split_na_str[var],
900+
inclNAs = inclNAs,
901+
MoreArgs = moreargs, ## rvis),
902+
SIMPLIFY = FALSE
903+
)
904+
} else { # not diff lists for each var
905+
## split format goes in more args because its constant across vars
906+
pld <- mapply(
907+
AnalyzeVarSplit,
908+
var = var,
909+
split_name = child_names,
910+
split_label = split_label,
911+
afun = afun,
912+
defrowlab = defrowlab,
913+
cfun = cfun,
914+
cformat = cformat,
915+
inclNAs = inclNAs,
916+
MoreArgs = c(
917+
moreargs,
918+
list(
919+
split_format = split_format,
920+
split_na_str = split_na_str
921+
)
922+
), ## rvis),
923+
SIMPLIFY = FALSE
924+
)
925+
}
865926
} else {
866927
## we're combining existing splits here
867928
pld <- unlist(lapply(.payload, .uncompound))

R/argument_conventions.R

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ gen_args <- function(df, alt_counts_df, spl, pos, tt, tr, verbose, colwidths, ob
9898
#' functions. See [formatters::list_valid_format_labels()] for a list of all available format strings.
9999
#' @param format_na_str (`string`)\cr string which should be displayed when formatted if this cell's value(s)
100100
#' are all `NA`.
101+
#' @param formats_var (`string` or `NULL`)\cr `NULL` (the default) or the name of the list column containing named
102+
#' lists of default formats to use. These will be applied with the same precedence as the `format` argument; i.e.,
103+
#' they will not override formats (other than `"default"`) set within the afun.
104+
#' Cannot be used simultaneously with `format`.
105+
#' @param na_strs_var (`string` or `NULL`)\cr `NULL` (the default) or the name of the list column containing named
106+
#' lists of default NA strings to use. These will be applied with the same precedence as the `format` argument; i.e.,
107+
#' they will not override formats (other than `"default"`) set within the afun.
108+
#' Cannot be used simultaneously with `format`. Cannot be used if `formats_var` is `NULL`.
101109
#' @param indent_mod (`numeric`)\cr modifier for the default indent position for the structure created by this
102110
#' function (subtable, content table, or row) *and all of that structure's children*. Defaults to 0, which
103111
#' corresponds to the unmodified default behavior.
@@ -157,7 +165,7 @@ lyt_args <- function(lyt, var, vars, label, labels_var, varlabels, varnames, spl
157165
var_labels, cvar,
158166
table_names, topleft, align, page_by, page_prefix,
159167
format_na_str, section_div, na_str, show_colcounts,
160-
colcount_format, parent_name) {
168+
colcount_format, parent_name, formats_var, na_strs_var) {
161169
NULL
162170
}
163171

R/colby_constructors.R

Lines changed: 106 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,12 +1067,81 @@ NULL
10671067
#' divider will be overridden by a split-level section divider when
10681068
#' both apply to the same position in the rendered output.
10691069
#'
1070-
#' @inherit split_cols_by return
1071-
#'
1072-
#' @details
1073-
#' When non-`NULL`, `format` is used to specify formats for all generated rows, and can be a character vector, a
1074-
#' function, or a list of functions. It will be repped out to the number of rows once this is calculated during the
1075-
#' tabulation process, but will be overridden by formats specified within `rcell` calls in `afun`.
1070+
#' @details When `length(vars) > 1` and when two calls to `analyze`
1071+
#' are done in sequence (the second with the default `nested =
1072+
#' TRUE`), the analyses will be combined into a multi-variable
1073+
#' analysis that will be reflected in the row structure of the
1074+
#' resulting table. In these cases, the default is to show the
1075+
#' label describing the variable analyzed for each of the
1076+
#' resulting subtables, while that is hidden by default in
1077+
#' one-variable cases.
1078+
#'
1079+
#' # Specifying Default Formatting Behavior
1080+
#'
1081+
#' *Default* formatting behavior for rows generated by `afun` can be
1082+
#' specified by one of `format` or `formats_var`. In both cases, these
1083+
#' default formatting instructions *will not* supersede formatting
1084+
#' specified from within `afun` at either the `rcell` or `in_rows`
1085+
#' call levels; They will only apply to rows/cells whose formatting as
1086+
#' returned by `afun` is either `NULL` or `"default"`. When
1087+
#' non-`NULL`, `format` is used to specify formats for all generated
1088+
#' rows, and can be a character vector, a function, or a list of
1089+
#' functions. It will be repped out to the number of rows once this is
1090+
#' calculated during the tabulation process, but will be overridden by
1091+
#' formats specified within `rcell` calls in `afun`.
1092+
#'
1093+
#' `format` can accept a format label string (see
1094+
#' [formatters::list_valid_format_labels()]), a formatting function, an
1095+
#' unnamed list, or a named list.
1096+
#'
1097+
#' When `format` is an unnamed list - or a named list where not all
1098+
#' values of `vars` appear in the names - its elements will be repped
1099+
#' out to the number of rows generated by `afun` (separately) within
1100+
#' each row facet `afun` is applied within. **This includes recycling
1101+
#' behavior, even in the case where the number of rows is not cleanly
1102+
#' divisible by the number of specified formats**. This behavior is
1103+
#' retained largely for legacy reasons and switching to the new
1104+
#' named-list behavior is advised where applicable.
1105+
#'
1106+
#' When `format` is a named list whose names contain all values in
1107+
#' `vars`, the elements of `format` are taken to be specific to the
1108+
#' analysis of the corresponding variable; this allows us to specify a
1109+
#' multi-variable analysis where e.g., the different variables are
1110+
#' analyzed by the same `afun` but have different levels of
1111+
#' measurement precision (and thus different formatting needs). In
1112+
#' this case the var-specific formatting can be a single format (label
1113+
#' string or function) or can be a named list whose names will be
1114+
#' matched up to those of the rows generated by applying `afun` in
1115+
#' each row facet. Matching of formats to rows is performed the same
1116+
#' as in the `formats_var` case and is described below.
1117+
#'
1118+
#' When `formats_var` is non-`NULL`, it specifies the name of a list
1119+
#' column containing formatting instructions for one or more rows
1120+
#' `afun` will generate when applied within a row facet. This can be
1121+
#' used when the analysis results for a single variable (e.g., `value`
1122+
#' or `AVAL` in long-form data) should be formatted differently within
1123+
#' different row facets (e.g., when faceting on `statistic` or
1124+
#' `PARAMCD`). The value of `df[[formats_var]]` is assumed without
1125+
#' verification to be constant within each row facet `afun` is applied
1126+
#' within, and the first (list) value of the column within the row
1127+
#' facet data will be used.
1128+
#'
1129+
#' In the `formats_var` case as well as the case of `format` being a
1130+
#' named list containing the values of `vars`, after rows are created
1131+
#' during tabulation, the default formats are matched and applied to
1132+
#' them as follows:
1133+
#'
1134+
#' 1. When the generated row's name (as given by `obj_name`) matches
1135+
#' a name in the list, the corresponding default format is applied,
1136+
#' 2. for those without exact matches, the default format whose name
1137+
#' provides *the best partial match* to each row name is applied,
1138+
#' 3. For those without default format names that partially match
1139+
#' the row name, no default format is applied.
1140+
#'
1141+
#' Note carefully that in (2), it is the names of the list of formats
1142+
#' that are partially matching the row names not the other way around.
1143+
#'
1144+
#' # The Analysis Function
10761145
#'
10771146
#' The analysis function (`afun`) should take as its first parameter either `x` or `df`. Whichever of these the
10781147
#' function accepts will change the behavior when tabulation is performed as follows:
@@ -1086,6 +1155,8 @@ NULL
10861155
#' other parameters which, *if and only if* present in the formals, will be passed to the function by the tabulation
10871156
#' machinery. These are listed and described in [additional_fun_params].
10881157
#'
1158+
#' @inherit split_cols_by return
1159+
#'
10891160
#' @note None of the arguments described in [additional_fun_params] can be overridden via `extra_args` or when calling
10901161
#' [make_afun()]. `.N_col` and `.N_total` can be overridden via the `col_counts` argument to [build_table()].
10911162
#' Alternative values for the others must be calculated within `afun` based on a combination of extra arguments and
@@ -1124,7 +1195,9 @@ analyze <- function(lyt,
11241195
table_names = vars,
11251196
parent_name = NULL,
11261197
format = NULL,
1198+
formats_var = NULL,
11271199
na_str = NA_character_,
1200+
na_strs_var = NULL,
11281201
nested = TRUE,
11291202
## can't name this na_rm symbol conflict with possible afuns!!
11301203
inclNAs = FALSE,
@@ -1134,6 +1207,19 @@ analyze <- function(lyt,
11341207
section_div = NA_character_) {
11351208
show_labels <- match.arg(show_labels)
11361209
subafun <- substitute(afun)
1210+
if (!is.null(format) && !is.null(formats_var)) {
1211+
stop(
1212+
"Cannot use 'format' and 'formats_var' arguments at ",
1213+
"the same time. Please choose one method for specifying ",
1214+
"default formatting."
1215+
)
1216+
} else if (is.null(formats_var) && !is.null(na_strs_var)) {
1217+
stop(
1218+
"Cannot use 'na_strs_var' (got ",
1219+
na_strs_var,
1220+
") without using 'formats_var'."
1221+
)
1222+
}
11371223
# R treats a single NA value as a logical atomic. The below
11381224
# maps all the NAs in `var_labels` to NA_character_ required by `Split`
11391225
# and avoids the error when `var_labels` is just c(NA).
@@ -1159,6 +1245,17 @@ analyze <- function(lyt,
11591245
defrowlab <- var_labels
11601246
}
11611247

1248+
## hook up the new hotness
1249+
var_format_lists <- length(vars) > 1 &&
1250+
is.list(format) &&
1251+
all(vars %in% names(format))
1252+
1253+
if (var_format_lists) {
1254+
format <- lapply(format, function(x) FormatList(.list = x))
1255+
if (is.character(na_str)) {
1256+
na_str <- lapply(format, function(x) na_str)
1257+
}
1258+
}
11621259
spl <- AnalyzeMultiVars(vars, var_labels,
11631260
afun = afun,
11641261
split_format = format,
@@ -1170,7 +1267,9 @@ analyze <- function(lyt,
11701267
child_names = table_names,
11711268
child_labels = show_labels,
11721269
section_div = section_div,
1173-
split_name = parent_name
1270+
split_name = parent_name,
1271+
formats_var = formats_var,
1272+
na_strs_var = na_strs_var
11741273
)
11751274

11761275
if (nested && (is(last_rowsplit(lyt), "VAnalyzeSplit") || is(last_rowsplit(lyt), "AnalyzeMultiVars"))) {

0 commit comments

Comments
 (0)