-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmarks_column_sort_parallel.R
154 lines (117 loc) · 3.82 KB
/
benchmarks_column_sort_parallel.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
# microbenchmark matrix reoderings in matrix vector products
# approach: use approx. Nearest Neighbor to create a
# pseudo-FE-linear-problem-matrix with random non-zeros;
# a reordering is generated by a SFC on the mesh nodes,
# and the matrix is reordered accordingly, to show identical numerics.
#
#
# also reorder indices by sorting nn, both before and after SFC
#
#install.packages("SparseM")
#install.packages("RANN")
require(RANN)
require(SparseM)
require(Rcpp)
require(microbenchmark)
sourceCpp("resortgrid_SFC_Rcpp.cpp",cacheDir = ".cpp-cache")
# run this under WSL or Linux
#require(parallel)
#mcaffinity(3) # choose core to bind R-execution to
# parallel case:
# use mcapply to do a parallel job
#N=1500 # 2d nodes
#n=15 # entries per row, non-symmetric
par_matrices_list<- function(elem)
{
N<-elem[[1]];n<-elem[[2]]
set.seed(1234)
d=data.frame(x=rnorm(N),y=rnorm(N))
#plot(x,y,pch=".",type="l")
# create a matrix from n nearest neighbors
# typical for Differential equations linear problems
system.time(nn2(d,k=n)$nn.idx -> nn)
#for (i in sample(1:N,1000)) cat(i," ",length(intersect(order((x[i]-x)^2+(y[i]-y)^2)[1:n],nn[i,])),"\n")
#for (i in sample(1:N,1000)) cat(i," ",all(order((x[i]-x)^2+(y[i]-y)^2)[1:n]==nn[i,]),"\n")
# apply sorting for potential cache-reuse/ prefetch
nn[2,]
system.time({nn.s<-t(apply(nn,1,sort))})
system.time({for (i in 1:N) nn[i,]<-sort(nn[i,])})
nn[1,]
nn[2,]
dim(nn)
nnz=prod(dim(nn))
ja=t(nn) # row-wise assembly in a vector
ja[1:(2*n)]
length(ja)-nnz
ia=(1:(N+1))*n-(n-1)
#ia[1:10]
set.seed(10)
ra=rnorm(nnz,mean=1.0)
m=new("matrix.csr",ra,as.integer(ja),as.integer(ia),dimension=as.integer(c(N,N)))
v=rnorm(N)
#m%*%v
# resorting by p: p[i] is target index
mysfc(d$x,d$y) -> p
p[1:5]
# and the inverse
p.i<-rep(NA,N)
p.i[p]<-1:N
# check for correctness of inverse
all(p.i[p]==1:N)
p[1:3]
p.i[1:3]
# reordering by SFC:
# step 1
nnp=nn[p,] # reorder neighbor blocks (rows)
dim(nnp)
# step 2:
# reorder indices in each block by inverse p
nnp2<-t(apply(nnp,1,function(x) p.i[x]))
nnp2[1,]
nnp2.s.ind<-t(apply(nnp2,1,order)) # for sorting indices and later values
nnp2.s.ind[1,]
nnp3<-t(apply(nnp2,1,sort)) # now sort indices
nnp3[1,]
# check by stored order
nnp3[3,]
nnp2[3,nnp2.s.ind[3,]]
dim(nnp3)
jap=t(nnp3) # transpose to assemble in a vector by as.integer
jap[1:(2*n)]
# values: must be resorted by index order
rap=matrix(ra,ncol=n,byrow = TRUE) # important to assemble row-wise
rap1=rap[p,] # reorder rows of values first
# reorder in-place by sort order of col. indices
for (i in 1:N) rap1[i,]<-rap1[i,nnp2.s.ind[i,]]
rap2=t(rap1) # for correct assembly in a vector
dim(rap2)
m1<-new("matrix.csr",as.numeric(rap2),as.integer(jap),as.integer(ia),dimension=as.integer(c(N,N)))
v[]=1:n
v1<-v[p] # use same vector, permuted
print(sum(abs((m%*%v)[p,1]-(m1%*%v1)[,1])))
list(m=m,v=v,m1=m1,v1=v1) # return both matrices and vectors
}
# matrix-vector setup in parallel
require(parallel)
n.cores<-4
N<-floor(1500000/n.cores)
n<-15
L<-list()
for (i in 1:6) L[[i]]=list(N,n)
LMV<-mclapply(L,par_matrices_list,mc.cores=n.cores)
bench_matvec <- function(x)
{
(mb=microbenchmark({v.0<-(x$m%*%x$v)[,1]},{v.1<-(x$m1%*%x$v1)[,1]},times=50))
}
system.time(par_bench_matvec <- mclapply(LMV,bench_matvec,mc.cores =n.cores))
#plot(mb0)
# locality in ja index (standard dev of neighbour indices)
#summary(apply(ja,2,sd))
#summary(apply(jap,2,sd))
#plot((apply(ja,2,sd)),
# (apply(jap,2,sd)),pch=".")
par_bench_matvec[[1]]
par_bench_matvec[[2]]
par_bench_matvec[[3]]
par_bench_matvec[[4]]