@@ -65,51 +65,40 @@ format_template_data <- function(template) {
6565}
6666
6767
68- # ' get_cluster_fill_counts
68+ # ' Get Cluster Fill Counts
6969# '
7070# ' `get_cluster_fill_counts()` creates a data frame that shows the number of
7171# ' graphs in each cluster for each input document.
7272# '
73- # ' @param df A data frame with columns `writer`, `doc`, and `cluster`. Each
74- # ' row corresponding to a graph and lists the writer of that graph, the document
75- # ' from which the graph was obtained, and the cluster to which that graph is assigned.
76- # ' @return A dataframe of cluster fill counts for each document in the input data frame.
77- # '
73+ # ' @param df A data frame with columns `docname` and `cluster`. Each row
74+ # ' corresponding to a graph and lists the document from which the graph was
75+ # ' obtained, and the cluster to which that graph is assigned. Optionally, the
76+ # ' data frame might also have `writer` and `doc` columns. If present, `writer`
77+ # ' lists the writer ID of each document and `doc` is an identifier to
78+ # ' distinguish between different documents from the same writer.
79+ # ' @return A dataframe of cluster fill counts for each document in the input
80+ # ' data frame.
81+ # '
7882# ' @examples
79- # ' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8083# ' docname <- c(rep('doc1',20), rep('doc2', 20), rep('doc3', 20))
84+ # ' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8185# ' doc <- c(rep(1, 20), rep(2, 20), rep(3, 20))
8286# ' cluster <- sample(3, 60, replace=TRUE)
8387# ' df <- data.frame(docname, writer, doc, cluster)
8488# ' get_cluster_fill_counts(df)
85- # '
89+ # '
8690# ' @export
8791# ' @md
8892get_cluster_fill_counts <- function (df ) {
8993 docname <- writer <- doc <- cluster <- n <- NULL
9094
91- if ((' writer' %in% colnames(df )) && (' doc' %in% colnames(df ))) {
92- # count number of graphs in each cluster for each writer
93- cluster_fill_counts <- df %> %
94- dplyr :: group_by(docname , writer , doc , cluster ) %> %
95- dplyr :: summarise(n = dplyr :: n()) %> %
96- dplyr :: mutate(n = as.integer(n )) %> %
97- tidyr :: pivot_wider(names_from = cluster , values_from = n , values_fill = 0 )
98-
99- # sort columns
100- cols <- c(colnames(cluster_fill_counts [, c(1 , 2 , 3 )]), sort(as.numeric(colnames(cluster_fill_counts [, - c(1 , 2 , 3 )]))))
101- cluster_fill_counts <- cluster_fill_counts [, cols ]
102- } else {
103- cluster_fill_counts <- df %> %
104- dplyr :: group_by(docname , cluster ) %> %
95+ # count number of graphs in each cluster for each writer
96+ cluster_fill_counts <- df %> %
97+ dplyr :: group_by(dplyr :: pick(tidyselect :: any_of(c(" docname" , " writer" , " doc" , " cluster" )))) %> %
10598 dplyr :: summarise(n = dplyr :: n()) %> %
10699 dplyr :: mutate(n = as.integer(n )) %> %
107- tidyr :: pivot_wider(names_from = cluster , values_from = n , values_fill = 0 )
108-
109- # sort columns
110- cols <- c(colnames(cluster_fill_counts [, c(1 )]), sort(as.numeric(colnames(cluster_fill_counts [, - c(1 )]))))
111- cluster_fill_counts <- cluster_fill_counts [, cols ]
112- }
100+ tidyr :: pivot_wider(names_from = cluster , values_from = n , values_fill = 0 ) %> %
101+ dplyr :: select(tidyselect :: any_of(c(" docname" , " writer" , " doc" )), tidyselect :: everything())
113102
114103 return (cluster_fill_counts )
115104}
0 commit comments