Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4666a9f

Browse files
committedJun 26, 2024·
WIP plot ccdf gs
1 parent aa80443 commit 4666a9f

File tree

2 files changed

+197
-2
lines changed

2 files changed

+197
-2
lines changed
 

Diff for: ‎R/plot.citcdf.R

+195
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
2+
#' Function for plotting the CDF of all the genes within a gene set or for one gene
3+
#'
4+
#' @param x an object of class \code{\link{cit_gsa}}
5+
#'
6+
#' @param ... further arguments to be passed
7+
#'
8+
#' @return a \code{ggplot2} of gene-wise or geneset ccdf
9+
#'
10+
#'@importFrom viridisLite viridis
11+
#'@import ggplot2
12+
#'
13+
#' @export
14+
#'
15+
#' @examples
16+
#' #TO DO
17+
18+
19+
20+
21+
plot.citcdf <- function(x, ...){
22+
23+
if(x$type == "gsa"){
24+
25+
ccdf_all <- x$ccdf
26+
27+
for (i in 1:length(ccdf_all)){
28+
ccdf <- ccdf_all[[i]]
29+
30+
genes <- names(ccdf[[1]])
31+
32+
# test_ccdf[[1]][[1]] # accède à Y1 Y2 names(ccdf_all[[1]][[1]])
33+
# test_ccdf[[2]][[1]] # accède à Y3 Y4 names(ccdf_all[[2]][[1]])
34+
35+
# Create data frame with all the ccdf ----
36+
data_gene <- do.call(rbind, lapply(genes, function(name) {
37+
x <- ccdf[[name]]
38+
df <- cbind.data.frame(x$cdf, x$ccdf, x$x, x$y)
39+
colnames(df) <- c("cdf", "ccdf", "x", "y")
40+
df$Gene <- rep(name, nrow(df))
41+
return(df)
42+
}))
43+
44+
sev <- unique(data_gene$x)
45+
46+
47+
# Complete data if no values for some y ----
48+
data_gene_sep <- split(data_gene, list(data_gene$Gene, data_gene$x)) # separate the table by genes and x
49+
50+
new_data_gene_sep <- lapply(data_gene_sep, function(df) {
51+
# max value of current y
52+
max_y_cur <- max(unique(df$y))
53+
# max value of total y
54+
max_y_all <- max(unique(data_gene$y))
55+
56+
# all the y that are between max current y value and max total y value
57+
y_differ_data <- setdiff(data_gene$y, df$y) # y values that are not in current data
58+
miss_y <- subset(y_differ_data, y_differ_data >= max_y_cur & y_differ_data <= max_y_all) # y values that are not in current data (previous) and between the 2 max
59+
n_miss_y <- length(miss_y)
60+
61+
# add the new y in the data + affect the max value of the ccdf to these y
62+
# WARNING : ccdf sorted not y !
63+
if (n_miss_y != 0){
64+
df <- data.frame(cbind(rep(max(df$cdf),n_miss_y),
65+
rep(max(df$ccdf),n_miss_y),
66+
rep(df$x[1],n_miss_y), miss_y,
67+
rep(df$Gene[1],n_miss_y)))
68+
colnames(df) <- c("cdf","ccdf","x","y","Gene")
69+
df$x <- factor(df$x, levels = rep(1:length(levels(data_gene$x))) , labels = levels(data_gene$x) )
70+
} else{
71+
df <- 0
72+
}
73+
return(df)
74+
})
75+
76+
# Combine the data set
77+
final_data <- do.call(rbind,lapply(genes, function(name){
78+
# if some genes doesn't have missing y values
79+
no_zero <- do.call(rbind,Filter(function(x) {!is.numeric(x) || !all(x == 0)}, new_data_gene_sep))
80+
# new dataset completed
81+
df <- rbind(data_gene[data_gene$Gene==name,], no_zero[no_zero$Gene==name,])
82+
df$y <- as.numeric(df$y)
83+
df$cdf <- as.numeric(df$cdf)
84+
df$ccdf <- as.numeric(df$ccdf)
85+
return(df)
86+
}))
87+
88+
# Thresholds ----
89+
Y_after <- final_data$y
90+
number_y = 20
91+
# y value for each thresholds
92+
y_after <- seq(from = ifelse(length(which(Y_after==0))==0, min(Y_after), min(Y_after[-which(Y_after==0)])),
93+
to = max(Y_after[-which.max(as.matrix(Y_after))]), length.out = number_y)
94+
95+
seuils <- c(0,y_after)
96+
97+
# Compute the median ----
98+
med <- lapply(sev, function(x){
99+
med <- rep(NA, number_y)
100+
filtre_x <- final_data$x == x
101+
filtre_row_i0 <- final_data$y >= seuils[1] & final_data$y < seuils[1+1]
102+
indices0 <- which(filtre_x & filtre_row_i0)
103+
med[1] <- median(final_data$ccdf[indices0])
104+
105+
for (i in 2:length(seuils)){
106+
107+
filtre_row_i <- final_data$y >= seuils[i] & final_data$y < seuils[i+1]
108+
indices <- which(filtre_x & filtre_row_i)
109+
110+
med[i] <- median(final_data$ccdf[indices])
111+
112+
ref_value <- med[i-1]
113+
range_value <- final_data[filtre_x & filtre_row_i, ]$ccdf
114+
115+
closest_index <- min(which(range_value > ref_value))
116+
closest_value <- range_value[closest_index]
117+
118+
if (med[i] < med[i-1] & closest_index != Inf ){ # force monotony when we can, if no values > previous median leave the current median
119+
med[i] <- closest_value
120+
}
121+
}
122+
med <- data.frame(med[1:number_y])
123+
med$y <- y_after
124+
med$x <- x
125+
names(med)[1] <- "ccdf"
126+
127+
return(med)
128+
})
129+
130+
# Replace values with the max : if some values are below the max after it
131+
replace_max <- lapply(med, function(m){
132+
ind_max <- which.max(m$ccdf)
133+
m[ind_max:nrow(m),]$ccdf <- max(m$ccdf)
134+
return(m)
135+
})
136+
137+
# Step function : add the ccdf to the y values that are not a thresholds
138+
step_function <- lapply(replace_max, function(df){
139+
# y values that are not in the data of the thresholds
140+
new_y <- data.frame(setdiff(final_data$y, df$y)) ; colnames(new_y) <- "y"
141+
# separate the values between the intervals of the thresholds
142+
intervalle_indices <- findInterval(new_y$y, df$y, left.open = TRUE)
143+
indices_int <- intervalle_indices+1
144+
145+
new_y$ccdf <- df$ccdf[indices_int]
146+
new_y$x <- unique(df$x)
147+
148+
return(new_y)
149+
})
150+
151+
# Data final -----
152+
data_med <- rbind(do.call(rbind,step_function),do.call(rbind,med))
153+
154+
# Data modified for the legend
155+
data_med$Gene <- "all"
156+
data_med$Legend <- "Gene set summary"
157+
158+
data_gene <- data_gene[-1]
159+
data_gene$Legend <- "Genes"
160+
161+
162+
comb_data <- rbind(data_gene,data_med)
163+
164+
# Plot -----
165+
p <- ggplot(data = comb_data, aes(x = y, color=x, y = ccdf,linetype = Legend, size=Legend)) +
166+
geom_line(aes(group = interaction(Gene,x))) +
167+
scale_size_manual(values = c(0.8,0.2)) + # modify row size according to the variable in aes(size)
168+
labs(x = "Gene expression") +
169+
theme_minimal()
170+
171+
return(p)
172+
}
173+
174+
175+
}
176+
177+
178+
# elseif (x$type == genewise){
179+
180+
#}
181+
182+
183+
}
184+
185+
186+
187+
188+
189+
190+
191+
192+
193+
194+
195+

Diff for: ‎man/plot.citcdf.Rd

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)
Please sign in to comment.