-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdream8-granger-full.R
More file actions
197 lines (159 loc) · 5.89 KB
/
dream8-granger-full.R
File metadata and controls
197 lines (159 loc) · 5.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
## dream8-granger-full.R - Granger-based predictor that utilizes all the data
##
## by Artem Sokolov
dyn.load( "lasso.so" )
## Solves the LASSO problem: argmin (y - Xw)^2 + \lambda |w|
optLASSO <- function( X, y, lambda, max.iter=1000, eps=1e-10 )
{
n <- nrow(X)
p <- ncol(X)
## Set the initial parameter estimates
w <- rep( 0, p )
b <- mean( y )
S <- X %*% w + b
## Call the C routine
res <- .C( "optLASSO", as.double(X), as.double(y),
as.double(lambda), as.double(S), as.integer(n), as.integer(p),
as.integer(max.iter), as.double(eps),
w = as.double( w ), b = as.double(b), as.integer(1) )
names( res$w ) <- colnames(X)
list( w = res$w, b = res$b )
}
## Solves the LASSO problem: argmin (y - Xw)^2 + \lambda |w|
## The meta-parameter \lambda is chosen such that w[ipvt] are barely zero
granger.lasso <- function( X, y, ipvt )
{
## Compute the starting limits
b <- mean(y)
ll.max <- max( abs(apply( (y-b)*X, 2, mean )) )
ll.min <- 0
colnames(X) <- NULL
## Binary search for the proper \lambda parameter
for( iter in 1:20 )
{
ll.cur <- mean( c(ll.max, ll.min) )
m <- optLASSO( X, y, ll.cur )
if( sum(m$w[ipvt]) == 0 ) ll.max <- ll.cur
else ll.min <- ll.cur
}
## Auto-regression may be slightly off zero depending on where binary
## search terminated
m$w[ipvt] <- 0
m$w
}
full.granger <- function( Zl )
{
stopifnot( length(Zl) > 1 )
## Verify consistency: number of proteins
p <- unique(unlist( lapply( Zl, ncol ) ))
stopifnot( length(p) == 1 )
## Verify consistency: the order of proteins
vv <- lapply( Zl, function(z) { all( colnames(z) == colnames(Zl[[1]]) ) } )
stopifnot( all(unlist(vv)) == TRUE )
## Reduce to a common set of timepoints
pnms <- colnames( Zl[[1]] )
rnms <- rownames( Zl[[1]] )
invisible(lapply( Zl, function(z) { rnms <<- intersect(rnms, rownames(z)) } ))
Zl <- lapply( Zl, function(z) {z[rnms,]} )
stopifnot( length( rnms ) > 1 )
## Skip predicting the "baseline" point
tstart <- 1
if( rnms[1] == "bl" ) tstart <- 2
## Causal adjacency matrices
A.past <- matrix( 0, p, p )
rownames( A.past ) <- pnms
colnames( A.past ) <- pnms
A.fut <- A.past
## Traverse the targets
for( jp in 1:p ) ## Index j over the proteins
{
cat( "Processing", colnames(Zl[[1]])[jp], "\n" )
yl <- lapply( Zl, function(z) {z[,jp]} )
Xl <- lapply( Zl, function(z) {z[,-jp]} )
## Build the input space
## use transpose of Xl[[i]]'s to keep timepoints together
X.nonAR <- do.call( rbind, lapply( Xl, function(x1) {c(t(x1))} ) )
n.nonAR <- ncol(X.nonAR)
## Keep track of (past) -> (present) and (present) -> (future) links
W.past <- matrix( 0, 0, (p-1) )
colnames( W.past ) <- colnames( Xl[[1]] )
W.fut <- W.past
## Traverse the timepoints
for( it in tstart:length(rnms) ) ## Index i over the timepoints
{
cat( "Timepoint", it, "\n" )
## Build the response
y <- unlist( lapply( yl, function(y1) {y1[it]} ) )
## Determine the index threshold, past which the dependence is
## considered to be from the future, causing a reverse in the
## causal link
fut.thresh <- it * (p-1)
## Append the auto-regression component
X.AR <- do.call( rbind, lapply( yl, function(y1) {y1[-it]} ) )
X <- cbind( X.nonAR, X.AR )
ipvt <- (n.nonAR+1):ncol(X) ## AR pivot
## Solve the regression problem using LASSO regularization
w <- granger.lasso( X, y, ipvt )
wsum <- sum( abs(w) )
## Normalize the weights to [-1, 1]
if( wsum > 0 )
w <- w / wsum
## Split the weights into past and future causality
w.past <- w[1:fut.thresh]
w.fut <- numeric(0)
if( fut.thresh < n.nonAR )
w.fut <- w[(fut.thresh+1):n.nonAR]
## Compute the average contribution of each time series
W.past <- rbind( W.past, apply( matrix(w.past, nrow=p-1),1,mean ) )
if( length( w.fut ) > 0 )
W.fut <- rbind( W.fut, apply( matrix(w.fut, nrow=p-1),1,mean ) )
}
## Entry (i, j) specifies a causal link from protein i to protein j
## So, "past" entries go into the jth column, while "future" entries
## go into the jth row
A.past[colnames(W.past),jp] <- apply( W.past, 2, mean )
A.fut[jp,colnames(W.fut)] <- apply( W.fut, 2, mean )
}
## Do a simple average between past and future
(A.past + A.fut) / 2
}
combine.reps <- function( X, fcomb=median )
{
## Retrieve the timepoint names
nms <- unlist(lapply( strsplit( rownames(X), "-" ), "[[", 1 ))
nml <- unique( nms )
## Combine the replicates
res <- matrix( 0, length(nml), ncol(X) )
rownames( res ) <- nml
colnames( res ) <- colnames(X)
for( nm in nml )
res[nm,] <- apply( X[which( nms == nm ),,drop=FALSE], 2, fcomb )
res
}
process.cell <- function( clname )
{
## Stimuli
stims <- c( "EGF", "FGF1", "HGF", "IGF1", "Insulin", "NRG1", "PBS", "Serum" )
## Load the data
fn <- paste( "data/", clname, "-main.RData", sep="" )
load( fn )
## Traverse the stimuli
res <- list()
for( s in stims )
{
cat( "Processing", clname, s, "\n" )
nm <- paste( clname, s, sep="." )
X <- lapply( get(nm), combine.reps )
res[[nm]] <- full.granger( X )
}
res
}
main.granger <- function()
{
res1 <- process.cell( "BT20" )
res2 <- process.cell( "BT549" )
res3 <- process.cell( "MCF7" )
res4 <- process.cell( "UACC812" )
res <- c( res1, res2, res3, res4 )
res
}