Skip to content

Commit 6ac9d79

Browse files
Changed get_cluster_fill_counts() to use tidyselect
1 parent e0f91ea commit 6ac9d79

File tree

3 files changed

+27
-33
lines changed

3 files changed

+27
-33
lines changed

DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Imports:
3333
rjags,
3434
stringr,
3535
tidyr,
36+
tidyselect
3637
Suggests:
3738
knitr,
3839
rmarkdown,

R/cluster_format.R

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -65,51 +65,40 @@ format_template_data <- function(template) {
6565
}
6666

6767

68-
#' get_cluster_fill_counts
68+
#' Get Cluster Fill Counts
6969
#'
7070
#' `get_cluster_fill_counts()` creates a data frame that shows the number of
7171
#' graphs in each cluster for each input document.
7272
#'
73-
#' @param df A data frame with columns `writer`, `doc`, and `cluster`. Each
74-
#' row corresponding to a graph and lists the writer of that graph, the document
75-
#' from which the graph was obtained, and the cluster to which that graph is assigned.
76-
#' @return A dataframe of cluster fill counts for each document in the input data frame.
77-
#'
73+
#' @param df A data frame with columns `docname` and `cluster`. Each row
74+
#' corresponding to a graph and lists the document from which the graph was
75+
#' obtained, and the cluster to which that graph is assigned. Optionally, the
76+
#' data frame might also have `writer` and `doc` columns. If present, `writer`
77+
#' lists the writer ID of each document and `doc` is an identifier to
78+
#' distinguish between different documents from the same writer.
79+
#' @return A dataframe of cluster fill counts for each document in the input
80+
#' data frame.
81+
#'
7882
#' @examples
79-
#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8083
#' docname <- c(rep('doc1',20), rep('doc2', 20), rep('doc3', 20))
84+
#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8185
#' doc <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8286
#' cluster <- sample(3, 60, replace=TRUE)
8387
#' df <- data.frame(docname, writer, doc, cluster)
8488
#' get_cluster_fill_counts(df)
85-
#'
89+
#'
8690
#' @export
8791
#' @md
8892
get_cluster_fill_counts <- function(df) {
8993
docname <- writer <- doc <- cluster <- n <- NULL
9094

91-
if (('writer' %in% colnames(df)) && ('doc' %in% colnames(df))) {
92-
# count number of graphs in each cluster for each writer
93-
cluster_fill_counts <- df %>%
94-
dplyr::group_by(docname, writer, doc, cluster) %>%
95-
dplyr::summarise(n = dplyr::n()) %>%
96-
dplyr::mutate(n = as.integer(n)) %>%
97-
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)
98-
99-
# sort columns
100-
cols <- c(colnames(cluster_fill_counts[, c(1, 2, 3)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1, 2, 3)]))))
101-
cluster_fill_counts <- cluster_fill_counts[, cols]
102-
} else {
103-
cluster_fill_counts <- df %>%
104-
dplyr::group_by(docname, cluster) %>%
95+
# count number of graphs in each cluster for each writer
96+
cluster_fill_counts <- df %>%
97+
dplyr::group_by(dplyr::pick(tidyselect::any_of(c("docname", "writer", "doc", "cluster")))) %>%
10598
dplyr::summarise(n = dplyr::n()) %>%
10699
dplyr::mutate(n = as.integer(n)) %>%
107-
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)
108-
109-
# sort columns
110-
cols <- c(colnames(cluster_fill_counts[, c(1)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1)]))))
111-
cluster_fill_counts <- cluster_fill_counts[, cols]
112-
}
100+
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0) %>%
101+
dplyr::select(tidyselect::any_of(c("docname", "writer", "doc")), tidyselect::everything())
113102

114103
return(cluster_fill_counts)
115104
}

man/get_cluster_fill_counts.Rd

Lines changed: 9 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)