22# ' @importFrom lubridate %m+%
33
44# Theme for plots
5- mytheme <- ggplot2 :: theme_bw() + ggplot2 :: theme(axis.text = ggplot2 :: element_text(size = 16 ),
6- axis.title.x = ggplot2 :: element_text(size = 18 ),
7- axis.title.y = ggplot2 :: element_text(size = 18 , angle = 90 ))
5+ mytheme <- ggplot2 :: theme_bw() + ggplot2 :: theme(axis.text = ggplot2 :: element_text(size = 16 ),
6+ axis.title.x = ggplot2 :: element_text(size = 18 ),
7+ axis.title.y = ggplot2 :: element_text(size = 18 , angle = 90 ))
88
99# queryDict <- c('downloads'='select CLIENT,NORMALIZED_METHOD_SIGNATURE,PROJECT_ID,BENEFACTOR_ID,PARENT_ID,ENTITY_ID,AR.TIMESTAMP,RESPONSE_STATUS,DATE,USER_ID,NODE_TYPE,N.NAME from ACCESS_RECORD AR, PROCESSED_ACCESS_RECORD PAR, NODE_SNAPSHOT N, (select distinct ID from NODE_SNAPSHOT where PROJECT_ID = "%s") NODE where AR.TIMESTAMP Between %s AND %s and AR.SESSION_ID = PAR.SESSION_ID and AR.TIMESTAMP = PAR.TIMESTAMP and PAR.ENTITY_ID = NODE.ID and N.ID = NODE.ID and (PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/file" or PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/version/#/file");',
1010# 'webAccess'='select NORMALIZED_METHOD_SIGNATURE,PROJECT_ID,BENEFACTOR_ID,PARENT_ID,ENTITY_ID,CONVERT(AR.TIMESTAMP, CHAR) AS TIMESTAMP,RESPONSE_STATUS,DATE,USER_ID,NODE_TYPE,N.NAME from ACCESS_RECORD AR, PROCESSED_ACCESS_RECORD PAR, NODE_SNAPSHOT N, (select distinct ID from NODE_SNAPSHOT where PROJECT_ID = "%s") NODE where AR.TIMESTAMP Between %s AND %s and AR.SESSION_ID = PAR.SESSION_ID and AR.TIMESTAMP = PAR.TIMESTAMP and PAR.ENTITY_ID = NODE.ID and N.ID = NODE.ID and CLIENT = "WEB" AND (PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/bundle" OR PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/version/#/bundle" OR PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/wiki2" OR PAR.NORMALIZED_METHOD_SIGNATURE = "GET /entity/#/wiki2/#");')
@@ -13,23 +13,23 @@ mytheme <- ggplot2::theme_bw() + ggplot2::theme(axis.text=ggplot2::element_text(
1313# q.browse <- sprintf(template, projectId, beginTimestamp, endTimestamp)
1414
1515# ' @export
16- render_report <- function (project_id , team_order , data_file , reportType = " report" ) {
16+ render_report <- function (project_id , team_order , data_file , reportType = " report" ) {
1717
18- templates <- c(" report" = system.file(" templates" , " report.Rmd" ,
19- package = " synapseusagereports" ))
18+ templates <- c(" report" = system.file(" templates" , " report.Rmd" ,
19+ package = " synapseusagereports" ))
2020
21- myParams <- list (projectId = project_id ,
22- teamOrder = team_order ,
23- queryDataFile = data_file )
21+ myParams <- list (projectId = project_id ,
22+ teamOrder = team_order ,
23+ queryDataFile = data_file )
2424
2525 htmlFileName <- paste0(myParams [[' projectId' ]], " _" , reportType , " _" ,
2626 lubridate :: today(), " .html" )
2727
2828 outputFileName <- paste0(" /tmp/" , htmlFileName )
2929
30- cat(rmarkdown :: render(input = templates [[reportType ]],
31- output_file = outputFileName ,
32- params = myParams ))
30+ cat(rmarkdown :: render(input = templates [[reportType ]],
31+ output_file = outputFileName ,
32+ params = myParams ))
3333
3434}
3535
@@ -115,23 +115,23 @@ doQuery <- function(con, template, projectId, start_date, end_date) {
115115 message(sprintf(" Query: %s" , q ))
116116 message(sprintf(" Querying %s to %s" , start_date , end_date ))
117117
118- res <- DBI :: dbGetQuery(conn = con , statement = q )
118+ res <- DBI :: dbGetQuery(conn = con , statement = q )
119119 return (res )
120120}
121121
122122# ' @export
123123processQuery <- function (data ) {
124124 queryData <- data %> %
125- dplyr :: rename(userId = USER_ID , id = ENTITY_ID ) %> %
125+ dplyr :: rename(userId = USER_ID , id = ENTITY_ID ) %> %
126126 dplyr :: select(userId , id , DATE , TIMESTAMP , NODE_TYPE , NAME , recordType ) %> %
127127 # dplyr::ungroup() %>%
128128 # rename(duplicateCount=n) %>%
129- dplyr :: mutate(date = as.Date(as.character(DATE )),
130- userId = as.character(userId ),
131- id = as.character(id ),
132- dateGrouping = lubridate :: floor_date(date , unit = " month" ),
133- monthYear = paste(lubridate :: month(dateGrouping , label = TRUE ),
134- lubridate :: year(dateGrouping ))) %> %
129+ dplyr :: mutate(date = as.Date(as.character(DATE )),
130+ userId = as.character(userId ),
131+ id = as.character(id ),
132+ dateGrouping = lubridate :: floor_date(date , unit = " month" ),
133+ monthYear = paste(lubridate :: month(dateGrouping , label = TRUE ),
134+ lubridate :: year(dateGrouping ))) %> %
135135 dplyr :: group_by(id , userId , TIMESTAMP , recordType ) %> % # Get unique due to name changes, might not be most recent name!
136136 dplyr :: arrange(TIMESTAMP ) %> %
137137 dplyr :: slice(1 ) %> %
@@ -140,28 +140,37 @@ processQuery <- function(data) {
140140 queryData
141141}
142142
143+ # ' Fetch data from a data warehouse database.
144+ # ' This first performs a filtering operation to limit the query to entities within the specified project.
145+ # ' A temporary table is created and the timestamp of the most recent snapshot is added, which is used for joining.
146+ # '
147+ # ' @param con A database connection.
148+ # ' @param qTemplate A string of the query template from 'query_template_strings'.
149+ # ' @param projectId A Synapse Project ID.
150+ # ' @param timestampBreaksDf A data frame of timestamp intervals.
151+ # ' @return A data frame of the query results.
143152# ' @export
144153getData <- function (con , qTemplate , projectId , timestampBreaksDf ) {
145154 q.create_temp <- " CREATE TEMPORARY TABLE PROJECT_STATS (`TIMESTAMP` bigint(20) NOT NULL, `ID` bigint(20) NOT NULL, PRIMARY KEY (`ID`,`TIMESTAMP`)); "
146- create <- DBI :: dbSendQuery(conn = con ,
147- statement = q.create_temp )
155+ create <- DBI :: dbSendQuery(conn = con ,
156+ statement = q.create_temp )
148157 message(sprintf(" Created temporary table for entities in project %s" , projectId ))
149158
150159 q.insert_temp <- " INSERT INTO PROJECT_STATS (ID, TIMESTAMP) SELECT ID, MAX(TIMESTAMP) AS TIMESTAMP FROM NODE_SNAPSHOT WHERE PROJECT_ID = %s GROUP BY ID;"
151160 query_statement <- sprintf(q.insert_temp , projectId )
152- insert <- DBI :: dbSendQuery(conn = con ,
153- statement = query_statement )
161+ insert <- DBI :: dbSendQuery(conn = con ,
162+ statement = query_statement )
154163 message(sprintf(" Inserted rows into temporary table for entities in project %s" , projectId ))
155164
156165 res <- plyr :: ddply(timestampBreaksDf , plyr :: .(month , year ),
157- function (x ) doQuery(con = con ,
158- template = qTemplate ,
159- projectId = projectId ,
160- start_date = x $ start_date ,
161- end_date = x $ end_date
166+ function (x ) doQuery(con = con ,
167+ template = qTemplate ,
168+ projectId = projectId ,
169+ start_date = x $ start_date ,
170+ end_date = x $ end_date
162171 ))
163172
164- foo <- DBI :: dbSendQuery(conn = con , statement = ' DROP TABLE PROJECT_STATS;' )
173+ foo <- DBI :: dbSendQuery(conn = con , statement = ' DROP TABLE PROJECT_STATS;' )
165174
166175 res
167176}
@@ -173,8 +182,8 @@ getTeamMemberDF <- function(teamId) {
173182 foo <- foo $ asList()
174183
175184 foo %> % {
176- tibble(teamId = purrr :: map_chr(. , ' teamId' ),
177- userId = purrr :: map_chr(. , c(" member" , " ownerId" )))
185+ tibble(teamId = purrr :: map_chr(. , ' teamId' ),
186+ userId = purrr :: map_chr(. , c(" member" , " ownerId" )))
178187 }
179188
180189}
@@ -186,8 +195,8 @@ processTeamMemberList <- function(teamIds) {
186195 userList <- purrr :: map_df(teamIds , getTeamMemberDF )
187196
188197 userList $ teamId <- factor (userList $ teamId ,
189- levels = teamIds ,
190- ordered = TRUE )
198+ levels = teamIds ,
199+ ordered = TRUE )
191200
192201 userList <- userList %> %
193202 dplyr :: group_by(userId ) %> %
@@ -207,12 +216,12 @@ getQueryUserProfiles <- function(queryData, useTeamGrouping, userList) {
207216
208217 accessUsers <- plyr :: llply(chunk(unique(queryData $ userId ), 50 ),
209218 function (x ) synapser :: synRestGET(sprintf(" /userGroupHeaders/batch?ids=%s" ,
210- paste(x , collapse = " ," )))$ children )
219+ paste(x , collapse = " ," )))$ children )
211220
212221 accessUsersChildren <- do.call(c , accessUsers )
213222
214223 allUsersList <- plyr :: ldply(accessUsersChildren , as.data.frame ) %> %
215- dplyr :: mutate(userId = ownerId ) %> %
224+ dplyr :: mutate(userId = ownerId ) %> %
216225 dplyr :: select(userId , userName )
217226
218227 if (useTeamGrouping ) {
@@ -235,14 +244,14 @@ getQueryUserProfiles <- function(queryData, useTeamGrouping, userList) {
235244 dplyr :: select(teamId ) %> % dplyr :: distinct(.keep_all = TRUE )
236245
237246 teamInfo <- lapply(as.list(as.character(tmp_all_users $ teamId )),
238- function (x ) synapser :: synGetTeam(x )) %> %
247+ function (x ) synapser :: synGetTeam(x )) %> %
239248 {
240- tibble(teamId = tmp_all_users $ teamId ,
241- teamName = purrr :: map_chr(. , ' name' ))
249+ tibble(teamId = tmp_all_users $ teamId ,
250+ teamName = purrr :: map_chr(. , ' name' ))
242251 }
243252
244253 if (nrow(teamInfo ) > 0 ) {
245- allUsers <- dplyr :: left_join(allUsers , teamInfo , by = " teamId" )
254+ allUsers <- dplyr :: left_join(allUsers , teamInfo , by = " teamId" )
246255 } else {
247256 allUsers $ teamName <- " Registered Synapse User"
248257 }
@@ -270,7 +279,7 @@ countByMonth <- function(queryData, useTeamGrouping) {
270279 tmp <- queryData
271280
272281 if (! useTeamGrouping ) {
273- tmp <- tmp %> % dplyr :: mutate(teamName = ' All' )
282+ tmp <- tmp %> % dplyr :: mutate(teamName = ' All' )
274283 }
275284
276285 tmp %> %
@@ -284,7 +293,7 @@ countByDay <- function(queryData, useTeamGrouping) {
284293 tmp <- queryData
285294
286295 if (! useTeamGrouping ) {
287- tmp <- tmp %> % dplyr :: mutate(teamName = " All" )
296+ tmp <- tmp %> % dplyr :: mutate(teamName = " All" )
288297 }
289298
290299 tmp %> %
@@ -296,23 +305,23 @@ countByDay <- function(queryData, useTeamGrouping) {
296305# ' @export
297306plotByDay <- function (perdayCount , useTeamGrouping ) {
298307 plotdata <- perdayCount %> %
299- reshape2 :: dcast(date ~ teamName , value.var = ' n' , fill = 0 ) %> %
300- reshape2 :: melt(. , id.vars = c(" date" ),
301- variable.name = " teamName" , value.name = " n" ) %> %
302- dplyr :: rename(group = teamName )
308+ reshape2 :: dcast(date ~ teamName , value.var = ' n' , fill = 0 ) %> %
309+ reshape2 :: melt(. , id.vars = c(" date" ),
310+ variable.name = " teamName" , value.name = " n" ) %> %
311+ dplyr :: rename(group = teamName )
303312
304- p <- ggplot2 :: ggplot(plotdata , ggplot2 :: aes(x = date , y = n ))
305- p <- p + ggplot2 :: geom_line(ggplot2 :: aes(group = group , color = group ), size = 1 )
313+ p <- ggplot2 :: ggplot(plotdata , ggplot2 :: aes(x = date , y = n ))
314+ p <- p + ggplot2 :: geom_line(ggplot2 :: aes(group = group , color = group ), size = 1 )
306315
307316 if (useTeamGrouping ) {
308317 p <- p + ggplot2 :: scale_color_brewer(palette = " Set1" )
309318 } else {
310- p <- p + ggplot2 :: scale_color_manual(values = " black" )
319+ p <- p + ggplot2 :: scale_color_manual(values = " black" )
311320 }
312321
313- p <- p + mytheme + ggplot2 :: theme(axis.title.x = ggplot2 :: element_blank(),
314- axis.text.x = ggplot2 :: element_text(size = 16 , angle = 270 ),
315- legend.position = " top" )
322+ p <- p + mytheme + ggplot2 :: theme(axis.title.x = ggplot2 :: element_blank(),
323+ axis.text.x = ggplot2 :: element_text(size = 16 , angle = 270 ),
324+ legend.position = " top" )
316325 p
317326
318327}
@@ -324,8 +333,8 @@ uniqueUsersPerMonth <- function(queryData) {
324333 dplyr :: distinct() %> %
325334 dplyr :: filter(userName != " anonymous" ) %> %
326335 dplyr :: group_by(dateGrouping ) %> %
327- dplyr :: summarize(Users = n_distinct(userName )) %> %
328- dplyr :: rename(Date = dateGrouping )
336+ dplyr :: summarize(Users = n_distinct(userName )) %> %
337+ dplyr :: rename(Date = dateGrouping )
329338}
330339
331340# ' @export
@@ -338,37 +347,37 @@ firstMonthToVisit <- function(queryData) {
338347 dplyr :: arrange(dateGrouping ) %> %
339348 dplyr :: slice(1 ) %> %
340349 dplyr :: ungroup() %> %
341- dplyr :: mutate(visit = 1 ) %> %
350+ dplyr :: mutate(visit = 1 ) %> %
342351 dplyr :: count(dateGrouping )
343352
344353 missing <- unique(queryData $ dateGrouping [! (queryData $ dateGrouping %in% firstMonthVisit $ dateGrouping )])
345354
346355 if (length(missing ) > 0 ) {
347356 firstMonthVisit <- rbind(firstMonthVisit ,
348- data.frame (n = 0 ,
349- dateGrouping = missing ))
357+ data.frame (n = 0 ,
358+ dateGrouping = missing ))
350359 }
351360
352361 firstMonthVisit %> %
353362 dplyr :: arrange(dateGrouping ) %> %
354- dplyr :: rename(Date = dateGrouping , Users = n )
363+ dplyr :: rename(Date = dateGrouping , Users = n )
355364}
356365
357366# ' @export
358367multiMonthVisits <- function (queryData ) {
359368 queryData %> %
360369 dplyr :: group_by(userName ) %> %
361- dplyr :: summarize(monthsVisited = n_distinct(dateGrouping )) %> %
370+ dplyr :: summarize(monthsVisited = n_distinct(dateGrouping )) %> %
362371 dplyr :: filter(monthsVisited > = 2 , userName != ' anonymous' )
363372}
364373
365374# ' @export
366375makeDateBreaks <- function (nMonths ) {
367- thisDate <- lubridate :: floor_date(lubridate :: today(), " month" )- lubridate :: period(1 , " months" )
376+ thisDate <- lubridate :: floor_date(lubridate :: today(), " month" ) - lubridate :: period(1 , " months" )
368377
369378 beginDates <- thisDate - (lubridate :: period(1 , " months" ) * 0 : (nMonths - 1 ))
370379
371- data.frame (date = beginDates , month = lubridate :: month(beginDates ), year = lubridate :: year(beginDates ))
380+ data.frame (date = beginDates , month = lubridate :: month(beginDates ), year = lubridate :: year(beginDates ))
372381 # # endDates <- beginDates + (lubridate::days_in_month(beginDates)) - lubridate::seconds(1)
373382 # # beginDates <- beginDates + lubridate::seconds(1)
374383 #
@@ -392,20 +401,20 @@ makeDateBreaksStartEnd <- function(start_date, end_date) {
392401
393402 beginDates <- end_date_floor - (lubridate :: period(1 , " months" ) * 0 : (n_months ))
394403
395- tibble :: tibble(start_date = beginDates ) %> %
404+ tibble :: tibble(start_date = beginDates ) %> %
396405 dplyr :: mutate(end_date = dplyr :: lag(start_date ),
397406 month = lubridate :: month(start_date ),
398407 year = lubridate :: year(start_date )) %> %
399408 dplyr :: arrange(start_date )
400409}
401410
402411# ' @export
403- topNEntities <- function (queryData , allUsers , topN = 20 ) {
412+ topNEntities <- function (queryData , allUsers , topN = 20 ) {
404413 plotdata <- queryData %> %
405414 dplyr :: count(userName ) %> %
406415 dplyr :: ungroup() %> %
407416 dplyr :: left_join(allUsers ) %> %
408- dplyr :: mutate(userName = reorder(userName , n , ordered = TRUE ))
417+ dplyr :: mutate(userName = reorder(userName , n , ordered = TRUE ))
409418
410419 plotdata %> %
411420 dplyr :: top_n(topN , n ) %> %
0 commit comments