Skip to content

Commit edfecda

Browse files
Document fct_lump() in its own help topic and clarify superseded status (#388)
Fixes #355. --------- Co-authored-by: Hadley Wickham <[email protected]>
1 parent 97ace68 commit edfecda

File tree

5 files changed

+154
-143
lines changed

5 files changed

+154
-143
lines changed

R/lump.R

Lines changed: 61 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,78 @@
1+
#' Superseded helper for lumping factor levels
2+
#'
3+
#' @description
4+
#' `r lifecycle::badge("superseded")`
5+
#'
6+
#' `fct_lump()` automatically chooses between [fct_lump_min()],
7+
#' [fct_lump_prop()], [fct_lump_n()], and [fct_lump_lowfreq()] based on
8+
#' its arguments. It is kept for backward compatibility, but is
9+
#' superseded and no longer recommended for new code.
10+
#'
11+
#' For new code, prefer the more explicit helpers:
12+
#' [fct_lump_min()], [fct_lump_prop()], [fct_lump_n()], and
13+
#' [fct_lump_lowfreq()].
14+
#'
15+
#' @inheritParams fct_lump_min
16+
#' @keywords internal
17+
#' @examples
18+
#' x <- factor(letters[rpois(100, 5)])
19+
#' table(x)
20+
#' table(fct_lump(x, n = 3))
21+
#' @export
22+
fct_lump <- function(
23+
f,
24+
n,
25+
prop,
26+
w = NULL,
27+
other_level = "Other",
28+
ties.method = c("min", "average", "first", "last", "random", "max")
29+
) {
30+
if (missing(n) && missing(prop)) {
31+
fct_lump_lowfreq(f, w = w, other_level = other_level)
32+
} else if (missing(prop)) {
33+
fct_lump_n(
34+
f,
35+
n,
36+
w = w,
37+
other_level = other_level,
38+
ties.method = ties.method
39+
)
40+
} else if (missing(n)) {
41+
fct_lump_prop(f, prop, w = w, other_level = other_level)
42+
} else {
43+
cli::cli_abort("Must supply only one of {.arg n} and {.arg prop}.")
44+
}
45+
}
46+
147
#' Lump uncommon factor levels together into "other"
248
#'
349
#' @description
4-
#' A family for lumping together levels that meet some criteria.
50+
#' A family of functions to lump together levels based on different criteria:
51+
#'
552
#' * `fct_lump_min()`: lumps levels that appear fewer than `min` times.
653
#' * `fct_lump_prop()`: lumps levels that appear in fewer than (or equal to)
7-
#' `prop * n` times.
54+
#' `prop * n` times.
855
#' * `fct_lump_n()` lumps all levels except for the `n` most frequent
9-
#' (or least frequent if `n < 0`)
56+
#' (or least frequent if `n < 0`)
1057
#' * `fct_lump_lowfreq()` lumps together the least frequent levels, ensuring
11-
#' that "other" is still the smallest level.
12-
#'
13-
#' `fct_lump()` exists primarily for historical reasons, as it automatically
14-
#' picks between these different methods depending on its arguments.
15-
#' We no longer recommend that you use it.
58+
#' that "other" is still the smallest level.
1659
#'
1760
#' @param f A factor (or character vector).
1861
#' @param n Positive `n` preserves the most common `n` values.
1962
#' Negative `n` preserves the least common `-n` values.
20-
#' It there are ties, you will get at least `abs(n)` values.
21-
#' @param prop Positive `prop` lumps values which do not appear at least
63+
#' If there are ties, you will get at least `abs(n)` values.
64+
#' @param prop Positive `prop` lumps values which do not appear at least
2265
#' `prop` of the time. Negative `prop` lumps values that
2366
#' do not appear at most `-prop` of the time.
2467
#' @param min Preserve levels that appear at least `min` number of times.
2568
#' @param w An optional numeric vector giving weights for frequency of
26-
#' each value (not level) in f.
69+
#' each value (not level) in `f`.
2770
#' @param other_level Value of level used for "other" values. Always
2871
#' placed at end of levels.
2972
#' @param ties.method A character string specifying how ties are
3073
#' treated. See [rank()] for details.
31-
#' @export
74+
#'
75+
#' @name fct_lump_helpers
3276
#' @seealso [fct_other()] to convert specified levels to other.
3377
#' @examples
3478
#' x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
@@ -45,58 +89,10 @@
4589
#' x |>
4690
#' fct_lump_lowfreq() |>
4791
#' table()
48-
#'
49-
#' x <- factor(letters[rpois(100, 5)])
50-
#' x
51-
#' table(x)
52-
#' table(fct_lump_lowfreq(x))
53-
#'
54-
#' # Use positive values to collapse the rarest
55-
#' fct_lump_n(x, n = 3)
56-
#' fct_lump_prop(x, prop = 0.1)
57-
#'
58-
#' # Use negative values to collapse the most common
59-
#' fct_lump_n(x, n = -3)
60-
#' fct_lump_prop(x, prop = -0.1)
61-
#'
62-
#' # Use weighted frequencies
63-
#' w <- c(rep(2, 50), rep(1, 50))
64-
#' fct_lump_n(x, n = 5, w = w)
65-
#'
66-
#' # Use ties.method to control how tied factors are collapsed
67-
#' fct_lump_n(x, n = 6)
68-
#' fct_lump_n(x, n = 6, ties.method = "max")
69-
#'
70-
#' # Use fct_lump_min() to lump together all levels with fewer than `n` values
71-
#' table(fct_lump_min(x, min = 10))
72-
#' table(fct_lump_min(x, min = 15))
73-
fct_lump <- function(
74-
f,
75-
n,
76-
prop,
77-
w = NULL,
78-
other_level = "Other",
79-
ties.method = c("min", "average", "first", "last", "random", "max")
80-
) {
81-
if (missing(n) && missing(prop)) {
82-
fct_lump_lowfreq(f, w = w, other_level = other_level)
83-
} else if (missing(prop)) {
84-
fct_lump_n(
85-
f,
86-
n,
87-
w = w,
88-
other_level = other_level,
89-
ties.method = ties.method
90-
)
91-
} else if (missing(n)) {
92-
fct_lump_prop(f, prop, w = w, other_level = other_level)
93-
} else {
94-
cli::cli_abort("Must supply only one of {.arg n} and {.arg prop}.")
95-
}
96-
}
92+
NULL
9793

9894
#' @export
99-
#' @rdname fct_lump
95+
#' @rdname fct_lump_helpers
10096
fct_lump_min <- function(f, min, w = NULL, other_level = "Other") {
10197
f <- check_factor(f)
10298
check_number_decimal(min, min = 0)
@@ -107,7 +103,7 @@ fct_lump_min <- function(f, min, w = NULL, other_level = "Other") {
107103
}
108104

109105
#' @export
110-
#' @rdname fct_lump
106+
#' @rdname fct_lump_helpers
111107
fct_lump_prop <- function(f, prop, w = NULL, other_level = "Other") {
112108
f <- check_factor(f)
113109
check_number_decimal(prop)
@@ -129,7 +125,7 @@ fct_lump_prop <- function(f, prop, w = NULL, other_level = "Other") {
129125
}
130126

131127
#' @export
132-
#' @rdname fct_lump
128+
#' @rdname fct_lump_helpers
133129
fct_lump_n <- function(
134130
f,
135131
n,
@@ -154,7 +150,7 @@ fct_lump_n <- function(
154150
}
155151

156152
#' @export
157-
#' @rdname fct_lump
153+
#' @rdname fct_lump_helpers
158154
fct_lump_lowfreq <- function(f, w = NULL, other_level = "Other") {
159155
f <- check_factor(f)
160156
check_string(other_level, allow_na = TRUE)

_pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ reference:
3737
contents:
3838
- fct_anon
3939
- fct_collapse
40-
- fct_lump
40+
- fct_lump_min
4141
- fct_other
4242
- fct_recode
4343
- fct_relabel

man/fct_inorder.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/fct_lump.Rd

Lines changed: 14 additions & 76 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)