Skip to content

Commit bb57093

Browse files
committed
Staring to work on masking feature
1 parent 85e01ba commit bb57093

File tree

3 files changed

+84
-9
lines changed

3 files changed

+84
-9
lines changed

src/GPUGraphsVector.jl

+14-6
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ mutable struct SparseGPUVector{
2525
nzval::Gv # Stored values, typically nonzeros
2626

2727
function SparseGPUVector(
28-
n::Int,
28+
n::Ti,
2929
nzind::Gi,
3030
nzval::Gv,
3131
backend::B,
@@ -39,19 +39,22 @@ mutable struct SparseGPUVector{
3939
if length(nzind) != length(nzval)
4040
throw(ArgumentError("length(nzind) must be equal to length(nzval)"))
4141
end
42+
4243
if get_backend(nzind) != backend
4344
nzind_gpu = allocate(backend, Ti, length(nzind))
4445
copyto!(nzind_gpu, nzind)
4546
else
4647
nzind_gpu = nzind
4748
end
49+
4850
if get_backend(nzval) != backend
4951
nzval_gpu = allocate(backend, Tv, length(nzval))
5052
copyto!(nzval_gpu, nzval)
5153
else
5254
nzval_gpu = nzval
55+
5356
end
54-
new{Tv,Ti,Gv,Gi}(n, nzind_gpu, nzval_gpu)
57+
new{Tv,Ti,typeof(nzval_gpu),typeof(nzind_gpu)}(n, nzind_gpu, nzval_gpu)
5558
end
5659
end
5760

@@ -72,22 +75,27 @@ function SparseGPUVector(::Type{Tv}, ::Type{Ti}, backend) where {Tv,Ti<:Integer}
7275
end
7376

7477
function SparseGPUVector(
75-
n::Int,
7678
::Type{Tv},
7779
::Type{Ti},
7880
backend::Backend,
7981
) where {Tv,Ti<:Integer}
8082
nzind = allocate(backend, Ti, 0)
8183
nzval = allocate(backend, Tv, 0)
82-
SparseGPUVector(n, nzind, nzval, backend)
84+
SparseGPUVector(zero(Ti), nzind, nzval, backend)
8385
end
8486

8587
#TODO : Bolean matrix that can omit the values and only store the indices
8688

8789
# Base methods for the SparseGPUVector type
88-
Base.size(V::SparseGPUVector) = (V.n)
89-
Base.size(V::SparseGPUVector, i::Int) = (i == 1) ? V.n : 1
90+
Base.size(V::SparseGPUVector) = (V.n,)
91+
Base.size(V::SparseGPUVector, i::Int) = (i == 1) ? (V.n,) : (1,)
9092
Base.length(V::SparseGPUVector) = V.n
93+
Base.show(io::IO, V::SparseGPUVector) = println(
94+
io,
95+
"SparseGPUVector{$(eltype(V.nzval))} $(size(V)) - $(nnz(V)) explicit elements",
96+
)
97+
Base.display(V::SparseGPUVector) = show(stdout, V)
98+
9199
function Base.getindex(V::SparseGPUVector, i::Int)
92100
@warn "Scalar indexing on a SparseGPUVector is slow. For better performance, vectorize the operation."
93101
if i < 1 || i > V.n

test/spmv.jl

+68-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ end
1111
A_cpu = sprand(Float32, 10, 10, 0.5)
1212
B_cpu = rand(Float32, 10)
1313
C_cpu = A_cpu * B_cpu
14-
println("res: ", C_cpu)
1514
res = zeros(Float32, 10)
1615
A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
1716
A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
@@ -65,3 +64,71 @@ end
6564
@test isapprox(res, C_cpu)
6665

6766
end
67+
68+
@testset "masked_mul!" begin
69+
70+
# Matrix-vector multiplication
71+
A_cpu = sprand(Float32, 10, 10, 0.5)
72+
B_cpu = rand(Float32, 10)
73+
74+
res = zeros(Float32, 10)
75+
A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
76+
A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
77+
A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND)
78+
B_gpu = allocate(TEST_BACKEND, Float32, 10)
79+
80+
mask = rand(Bool, 10)
81+
MASK = SparseGPUVector(mask, TEST_BACKEND)
82+
83+
C_cpu = A_cpu * B_cpu .* mask
84+
85+
copyto!(B_gpu, B_cpu)
86+
C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
87+
88+
gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK)
89+
KernelAbstractions.synchronize(TEST_BACKEND)
90+
@allowscalar @test C_gpu_1 == C_cpu
91+
92+
#C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
93+
#gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK)
94+
#KernelAbstractions.synchronize(TEST_BACKEND)
95+
#@allowscalar @test C_gpu_2 == C_cpu
96+
#
97+
#C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
98+
#gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK)
99+
#KernelAbstractions.synchronize(TEST_BACKEND)
100+
#copyto!(res, C_gpu_3)
101+
#@test isapprox(res, C_cpu)
102+
103+
# Large matrix
104+
LARGE_NB = 1000
105+
A_cpu = sprand(Float32, LARGE_NB, LARGE_NB, 0.2)
106+
B_cpu = rand(Float32, LARGE_NB)
107+
mask = rand(Bool, LARGE_NB)
108+
MASK = SparseGPUVector(mask, TEST_BACKEND)
109+
C_cpu = A_cpu * B_cpu .* mask
110+
A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
111+
A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
112+
A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND)
113+
B_gpu = allocate(TEST_BACKEND, Float32, LARGE_NB)
114+
copyto!(B_gpu, B_cpu)
115+
C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
116+
C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
117+
C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
118+
119+
gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK)
120+
#gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK)
121+
#gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK)
122+
KernelAbstractions.synchronize(TEST_BACKEND)
123+
124+
res = zeros(Float32, LARGE_NB)
125+
126+
copyto!(res, C_gpu_1)
127+
@test isapprox(res, C_cpu)
128+
#copyto!(res, C_gpu_2)
129+
#@test isapprox(res, C_cpu)
130+
#copyto!(res, C_gpu_3)
131+
#@test isapprox(res, C_cpu)
132+
133+
134+
end

test/structs.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ end
370370
@testset "Constructors" begin
371371
@testset "empty" begin
372372
A = SparseGPUVector(Float32, Int32, TEST_BACKEND)
373-
@test size(A) == 0
373+
@test size(A) == (0,)
374374
@test length(A) == 0
375375
end
376376

@@ -387,7 +387,7 @@ end
387387
i = 1
388388
for B in test_vectors
389389
i += 1
390-
@test size(B) == 10
390+
@test size(B) == (10,)
391391
@test length(B) == 10
392392
@allowscalar @test B.nzind == ref_nzind
393393
@allowscalar @test B.nzval == ref_nzval

0 commit comments

Comments
 (0)