1+ # ' Superseded helper for lumping factor levels
2+ # '
3+ # ' @description
4+ # ' `r lifecycle::badge("superseded")`
5+ # '
6+ # ' `fct_lump()` automatically chooses between [fct_lump_min()],
7+ # ' [fct_lump_prop()], [fct_lump_n()], and [fct_lump_lowfreq()] based on
8+ # ' its arguments. It is kept for backward compatibility, but is
9+ # ' superseded and no longer recommended for new code.
10+ # '
11+ # ' For new code, prefer the more explicit helpers:
12+ # ' [fct_lump_min()], [fct_lump_prop()], [fct_lump_n()], and
13+ # ' [fct_lump_lowfreq()].
14+ # '
15+ # ' @inheritParams fct_lump_min
16+ # ' @keywords internal
17+ # ' @examples
18+ # ' x <- factor(letters[rpois(100, 5)])
19+ # ' table(x)
20+ # ' table(fct_lump(x, n = 3))
21+ # ' @export
22+ fct_lump <- function (
23+ f ,
24+ n ,
25+ prop ,
26+ w = NULL ,
27+ other_level = " Other" ,
28+ ties.method = c(" min" , " average" , " first" , " last" , " random" , " max" )
29+ ) {
30+ if (missing(n ) && missing(prop )) {
31+ fct_lump_lowfreq(f , w = w , other_level = other_level )
32+ } else if (missing(prop )) {
33+ fct_lump_n(
34+ f ,
35+ n ,
36+ w = w ,
37+ other_level = other_level ,
38+ ties.method = ties.method
39+ )
40+ } else if (missing(n )) {
41+ fct_lump_prop(f , prop , w = w , other_level = other_level )
42+ } else {
43+ cli :: cli_abort(" Must supply only one of {.arg n} and {.arg prop}." )
44+ }
45+ }
46+
147# ' Lump uncommon factor levels together into "other"
248# '
349# ' @description
4- # ' A family for lumping together levels that meet some criteria.
50+ # ' A family of functions to lump together levels based on different criteria:
51+ # '
552# ' * `fct_lump_min()`: lumps levels that appear fewer than `min` times.
653# ' * `fct_lump_prop()`: lumps levels that appear in fewer than (or equal to)
7- # ' `prop * n` times.
54+ # ' `prop * n` times.
855# ' * `fct_lump_n()` lumps all levels except for the `n` most frequent
9- # ' (or least frequent if `n < 0`)
56+ # ' (or least frequent if `n < 0`)
1057# ' * `fct_lump_lowfreq()` lumps together the least frequent levels, ensuring
11- # ' that "other" is still the smallest level.
12- # '
13- # ' `fct_lump()` exists primarily for historical reasons, as it automatically
14- # ' picks between these different methods depending on its arguments.
15- # ' We no longer recommend that you use it.
58+ # ' that "other" is still the smallest level.
1659# '
1760# ' @param f A factor (or character vector).
1861# ' @param n Positive `n` preserves the most common `n` values.
1962# ' Negative `n` preserves the least common `-n` values.
20- # ' It there are ties, you will get at least `abs(n)` values.
21- # ' @param prop Positive `prop` lumps values which do not appear at least
63+ # ' If there are ties, you will get at least `abs(n)` values.
64+ # ' @param prop Positive `prop` lumps values which do not appear at least
2265# ' `prop` of the time. Negative `prop` lumps values that
2366# ' do not appear at most `-prop` of the time.
2467# ' @param min Preserve levels that appear at least `min` number of times.
2568# ' @param w An optional numeric vector giving weights for frequency of
26- # ' each value (not level) in f .
69+ # ' each value (not level) in `f` .
2770# ' @param other_level Value of level used for "other" values. Always
2871# ' placed at end of levels.
2972# ' @param ties.method A character string specifying how ties are
3073# ' treated. See [rank()] for details.
31- # ' @export
74+ # '
75+ # ' @name fct_lump_helpers
3276# ' @seealso [fct_other()] to convert specified levels to other.
3377# ' @examples
3478# ' x <- factor(rep(LETTERS[1:9], times = c(40, 10, 5, 27, 1, 1, 1, 1, 1)))
4589# ' x |>
4690# ' fct_lump_lowfreq() |>
4791# ' table()
48- # '
49- # ' x <- factor(letters[rpois(100, 5)])
50- # ' x
51- # ' table(x)
52- # ' table(fct_lump_lowfreq(x))
53- # '
54- # ' # Use positive values to collapse the rarest
55- # ' fct_lump_n(x, n = 3)
56- # ' fct_lump_prop(x, prop = 0.1)
57- # '
58- # ' # Use negative values to collapse the most common
59- # ' fct_lump_n(x, n = -3)
60- # ' fct_lump_prop(x, prop = -0.1)
61- # '
62- # ' # Use weighted frequencies
63- # ' w <- c(rep(2, 50), rep(1, 50))
64- # ' fct_lump_n(x, n = 5, w = w)
65- # '
66- # ' # Use ties.method to control how tied factors are collapsed
67- # ' fct_lump_n(x, n = 6)
68- # ' fct_lump_n(x, n = 6, ties.method = "max")
69- # '
70- # ' # Use fct_lump_min() to lump together all levels with fewer than `n` values
71- # ' table(fct_lump_min(x, min = 10))
72- # ' table(fct_lump_min(x, min = 15))
73- fct_lump <- function (
74- f ,
75- n ,
76- prop ,
77- w = NULL ,
78- other_level = " Other" ,
79- ties.method = c(" min" , " average" , " first" , " last" , " random" , " max" )
80- ) {
81- if (missing(n ) && missing(prop )) {
82- fct_lump_lowfreq(f , w = w , other_level = other_level )
83- } else if (missing(prop )) {
84- fct_lump_n(
85- f ,
86- n ,
87- w = w ,
88- other_level = other_level ,
89- ties.method = ties.method
90- )
91- } else if (missing(n )) {
92- fct_lump_prop(f , prop , w = w , other_level = other_level )
93- } else {
94- cli :: cli_abort(" Must supply only one of {.arg n} and {.arg prop}." )
95- }
96- }
92+ NULL
9793
9894# ' @export
99- # ' @rdname fct_lump
95+ # ' @rdname fct_lump_helpers
10096fct_lump_min <- function (f , min , w = NULL , other_level = " Other" ) {
10197 f <- check_factor(f )
10298 check_number_decimal(min , min = 0 )
@@ -107,7 +103,7 @@ fct_lump_min <- function(f, min, w = NULL, other_level = "Other") {
107103}
108104
109105# ' @export
110- # ' @rdname fct_lump
106+ # ' @rdname fct_lump_helpers
111107fct_lump_prop <- function (f , prop , w = NULL , other_level = " Other" ) {
112108 f <- check_factor(f )
113109 check_number_decimal(prop )
@@ -129,7 +125,7 @@ fct_lump_prop <- function(f, prop, w = NULL, other_level = "Other") {
129125}
130126
131127# ' @export
132- # ' @rdname fct_lump
128+ # ' @rdname fct_lump_helpers
133129fct_lump_n <- function (
134130 f ,
135131 n ,
@@ -154,7 +150,7 @@ fct_lump_n <- function(
154150}
155151
156152# ' @export
157- # ' @rdname fct_lump
153+ # ' @rdname fct_lump_helpers
158154fct_lump_lowfreq <- function (f , w = NULL , other_level = " Other" ) {
159155 f <- check_factor(f )
160156 check_string(other_level , allow_na = TRUE )
0 commit comments