gglite/R/utils.R at e63e39d793c5145e9745622a92a0bc7b760a0349 · yihui/gglite · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#' Classify a Variable's Type
#'
#' Returns `'date'`, `'categorical'`, or `'numeric'` for a given vector, or
#' `'none'` when `NULL`.
#'
#' @param x A vector (or `NULL`).
#' @return A single character string.
#' @noRd
var_type = function(x) {
  if (is.null(x)) return('none')
  if (inherits(x, 'Date') || inherits(x, 'POSIXt')) return('date')
  if (is.character(x) || is.factor(x) || is.logical(x)) return('categorical')
  'numeric'
}

#' Convert a Time Series to a Data Frame
#'
#' Converts a `ts` (or `mts`) object into a data frame suitable for plotting.
#' Univariate series produce columns `time` and `value`; multivariate series
#' are reshaped to long format with columns `time`, `series`, and `value`.
#'
#' @param x A `ts` or `mts` object.
#' @return A list with elements `data` (a data frame) and `aesthetics` (a named
#'   list of default aesthetic mappings).
#' @noRd
ts_to_df = function(x) {
  t = as.numeric(time(x))
  if (NCOL(x) > 1) {
    nms = colnames(x)
    d = data.frame(
      time = rep(t, length(nms)),
      series = factor(rep(nms, each = length(t)), levels = nms),
      value = as.numeric(x)
    )
    list(data = d, aesthetics = list(x = 'time', y = 'value', color = 'series'))
  } else {
    list(
      data = data.frame(time = t, value = as.numeric(x)),
      aesthetics = list(x = 'time', y = 'value')
    )
  }
}

#' Remove NULL Elements from a List
#' @noRd
dropNulls = function(x) x[!vapply(x, is.null, logical(1))]

#' Null-coalescing operator
#' @noRd
`%||%` = function(x, y) if (is.null(x)) y else x

#' Process a Layout Argument (padding, margin, or inset)
#'
#' Convert a scalar or length-4 vector into named G2 layout options.
#' A scalar sets the property directly (e.g., `padding = 20`). A length-4
#' vector sets `Top`, `Right`, `Bottom`, `Left` variants; `NA` values are
#' omitted.
#'
#' @param name Base name: `'padding'`, `'margin'`, or `'inset'`.
#' @param value `NULL`, a scalar, or a length-4 numeric vector.
#' @return A named list of layout options.
#' @noRd
process_layout = function(name, value) {
  if (is.null(value)) return(list())
  if (length(value) == 1) {
    res = list(value)
    names(res) = name
    return(res)
  }
  if (length(value) != 4) stop(
    "'", name, "' must be a scalar or a length-4 vector (top, right, bottom, left)"
  )
  sides = c('Top', 'Right', 'Bottom', 'Left')
  res = setNames(as.list(value), paste0(name, sides))
  dropNulls(lapply(res, function(v) if (is.na(v)) NULL else v))
}

#' Extract Column References from a G2 Inline Data Transform Spec
#'
#' When a layer's `data` is a list with a `transform` key (rather than a data
#' frame), the transform entries can reference data columns via `field`,
#' `fields`, `groupBy`, or `by`. This helper extracts those names so they can
#' be included in the trimming allowlist.
#'
#' @param data A layer `data` value.
#' @return A list of referenced column names, or `NULL`.
#' @noRd
data_transform_vars = function(data) {
  if (!is.list(data) || is.data.frame(data) || is.null(data$transform)) return()
  lapply(data$transform, `[`, c('field', 'fields', 'groupBy', 'by'))
}

#' Collect Variable Names Used in a Chart
#'
#' Gathers all column names referenced by the chart's aesthetic mappings,
#' facet encodings, and the encode, label text, and inline transform fields of
#' any layer that reads from the chart-level data (i.e., layers that do not
#' supply their own data frame).
#'
#' @param chart A `g2` object.
#' @return A list of variable names.
#' @noRd
collect_vars = function(chart) {
  vars = c(chart$aesthetics, chart$facet$encode)
  # Include vars for layers that use the chart-level data. Layers with their
  # own data *frame* bring their own source; layers with a G2 transform spec
  # (a list) or no data at all still read from the chart-level data frame.
  vars2 = lapply(chart$layers, function(layer) {
    if (!is.data.frame(layer$data)) c(
      layer$encode, lapply(layer$labels, `[[`, 'text'),
      data_transform_vars(layer$data)
    )
  })
  unlist(c(vars, vars2))
}

#' Trim a Data Frame to Used Columns
#'
#' Restricts a data frame to only the columns listed in `vars`. If `data` is
#' wrapped in [I()], the `AsIs` class is stripped and the data is returned
#' untrimmed (all columns preserved) — this lets callers opt out of trimming
#' when variables are referenced outside the visible spec, e.g. inside inline
#' JavaScript functions.
#'
#' @param data A data frame (possibly with class `AsIs`), or any other value.
#' @param vars Character vector of column names to keep.
#' @return The (possibly trimmed) data frame, or `data` unchanged when it is
#'   not a data frame or when trimming is not applicable.
#' @noRd
trim_data = function(data, vars) {
  if (inherits(data, 'AsIs'))
    return(structure(data, class = setdiff(class(data), 'AsIs')))
  vars = unlist(vars)
  if (!is.data.frame(data) || !length(vars)) return(data)
  keep = intersect(vars, names(data))
  if (!length(keep) || length(keep) == ncol(data)) return(data)
  data[, keep, drop = FALSE]
}

#' Annotate Data Frames for Column-Major JSON
#'
#' Recursively walks a nested list and wraps any data frame found in a `data`
#' field with `list(type = 'column', value = df)` so that the G2 column-major
#' helper script can convert it client-side.
#'
#' @param x A nested list.
#' @return The annotated list.
#' @noRd
annotate_df = function(x) {
  if (is.data.frame(x) || !is.list(x)) return(x)
  nms = names(x)
  if ('data' %in% nms) {
    if (is.data.frame(d <- x$data)) {
      # Convert Date/POSIXt columns to millisecond timestamps for G2
      for (col in names(d)) {
        if (inherits(d[[col]], 'Date'))
          d[[col]] = as.numeric(d[[col]]) * 86400000
        else if (inherits(d[[col]], 'POSIXt'))
          d[[col]] = as.numeric(d[[col]]) * 1000
      }
      x$data = list(type = 'column', value = d)
    } else if (is.null(d)) x$data = NULL
  }
  idx = setdiff(nms, '')
  idx = if (length(idx)) setdiff(idx, 'data') else seq_along(x)
  for (i in idx) {
    if (is.list(xi <- x[[i]])) x[[i]] = annotate_df(xi)
  }
  x
}

#' Extract Terms from a Formula Expression
#'
#' Recursively extracts variable names from a formula expression. The `+`
#' operator separates terms (e.g., `x1 + x2` yields `c('x1', 'x2')`);
#' any other expression is deparsed as-is.
#'
#' @param expr A language object.
#' @return A character vector of term names.
#' @noRd
extract_terms = function(expr) {
  if (is.name(expr)) return(as.character(expr))
  if (is.call(expr) && identical(expr[[1]], as.name('+')))
    return(c(extract_terms(expr[[2]]), extract_terms(expr[[3]])))
  deparse(expr)
}

#' Convert a One-Sided Formula to a Variable Name
#'
#' Converts a one-sided formula of the form `~ var` to the character string
#' `"var"`. All other values are returned unchanged. This allows users to write
#' aesthetic mappings as `color = ~ species` instead of `color = 'species'`.
#'
#' @param x A value (formula or otherwise).
#' @return A character string if `x` is a one-sided single-term formula;
#'   otherwise `x` unchanged.
#' @noRd
as_var = function(x) {
  if (!inherits(x, 'formula') || length(x) != 2) return(x)
  terms = extract_terms(x[[2]])
  if (length(terms) != 1) stop(
    "Formula '", deparse(x), "' must contain exactly one variable name ",
    "(e.g., `~ var`)"
  )
  terms
}

#' Apply as_var() to Each Element of a List
#' @noRd
as_vars = function(x) lapply(x, as_var)

#' Parse a Formula into Aesthetic and Facet Mappings
#'
#' Interprets an R formula as chart aesthetic mappings:
#' - `y ~ x` maps to `list(x = 'x', y = 'y')`
#' - `~ x` maps to `list(x = 'x')`
#' - `~ x1 + x2 + x3` maps to `list(position = c('x1', 'x2', 'x3'))`
#' - `y ~ x | z` adds faceting by `z` (columns)
#' - `y ~ x | 0 + z` adds faceting by `z` (rows)
#' - `y ~ x | z1 + z2` adds faceting by `z1` (columns) and `z2` (rows)
#'
#' @param f A formula object.
#' @return A list with `aesthetics` (named list) and `facet` (a facet list or
#'   `NULL`).
#' @noRd
parse_formula = function(f) {
  lhs = if (length(f) == 3) f[[2]]
  rhs = if (length(f) == 3) f[[3]] else f[[2]]

  # Extract conditioning (facet) variables from |
  facet_terms = NULL
  if (is.call(rhs) && identical(rhs[[1]], as.name('|'))) {
    facet_terms = extract_terms(rhs[[3]])
    rhs = rhs[[2]]
  }

  rhs_terms = extract_terms(rhs)

  # Build aesthetics
  aesthetics = list()
  if (!is.null(lhs)) aesthetics$y = deparse(lhs)
  if (length(rhs_terms) == 1) {
    aesthetics$x = rhs_terms
  } else if (length(rhs_terms) > 1) {
    aesthetics$position = rhs_terms
  }

  # Build facet: | var → column; | 0 + var → row; | var1 + var2 → both
  facet = NULL
  if (length(facet_terms)) {
    # A leading 0 shifts terms to row (y) instead of column (x)
    nms = if (facet_terms[1] == '0') 'y' else c('x', 'y')
    facet_terms = facet_terms[facet_terms != '0']
    enc = setNames(as.list(facet_terms), head(nms, length(facet_terms)))
    if (length(enc)) facet = list(type = 'facetRect', encode = enc)
  }

  list(aesthetics = aesthetics, facet = facet)
}