Staring to work on masking feature

AntoineBut · AntoineBut · commit bb57093267bb · 2025-04-28T16:45:45.000+02:00
diff --git a/src/GPUGraphsVector.jl b/src/GPUGraphsVector.jl
@@ -25,7 +25,7 @@ mutable struct SparseGPUVector{
     nzval::Gv    # Stored values, typically nonzeros
 
     function SparseGPUVector(
-        n::Int,
+        n::Ti,
         nzind::Gi,
         nzval::Gv,
         backend::B,
@@ -39,19 +39,22 @@ mutable struct SparseGPUVector{
         if length(nzind) != length(nzval)
             throw(ArgumentError("length(nzind) must be equal to length(nzval)"))
         end
+
         if get_backend(nzind) != backend
             nzind_gpu = allocate(backend, Ti, length(nzind))
             copyto!(nzind_gpu, nzind)
         else
             nzind_gpu = nzind
         end
+
         if get_backend(nzval) != backend
             nzval_gpu = allocate(backend, Tv, length(nzval))
             copyto!(nzval_gpu, nzval)
         else
             nzval_gpu = nzval
+
         end
-        new{Tv,Ti,Gv,Gi}(n, nzind_gpu, nzval_gpu)
+        new{Tv,Ti,typeof(nzval_gpu),typeof(nzind_gpu)}(n, nzind_gpu, nzval_gpu)
     end
 end
 
@@ -72,22 +75,27 @@ function SparseGPUVector(::Type{Tv}, ::Type{Ti}, backend) where {Tv,Ti<:Integer}
 end
 
 function SparseGPUVector(
-    n::Int,
     ::Type{Tv},
     ::Type{Ti},
     backend::Backend,
 ) where {Tv,Ti<:Integer}
     nzind = allocate(backend, Ti, 0)
     nzval = allocate(backend, Tv, 0)
-    SparseGPUVector(n, nzind, nzval, backend)
+    SparseGPUVector(zero(Ti), nzind, nzval, backend)
 end
 
 #TODO : Bolean matrix that can omit the values and only store the indices
 
 # Base methods for the SparseGPUVector type
-Base.size(V::SparseGPUVector) = (V.n)
-Base.size(V::SparseGPUVector, i::Int) = (i == 1) ? V.n : 1
+Base.size(V::SparseGPUVector) = (V.n,)
+Base.size(V::SparseGPUVector, i::Int) = (i == 1) ? (V.n,) : (1,)
 Base.length(V::SparseGPUVector) = V.n
+Base.show(io::IO, V::SparseGPUVector) = println(
+    io,
+    "SparseGPUVector{$(eltype(V.nzval))} $(size(V)) - $(nnz(V)) explicit elements",
+)
+Base.display(V::SparseGPUVector) = show(stdout, V)
+
 function Base.getindex(V::SparseGPUVector, i::Int)
     @warn "Scalar indexing on a SparseGPUVector is slow. For better performance, vectorize the operation."
     if i < 1 || i > V.n
diff --git a/test/spmv.jl b/test/spmv.jl
@@ -11,7 +11,6 @@ end
     A_cpu = sprand(Float32, 10, 10, 0.5)
     B_cpu = rand(Float32, 10)
     C_cpu = A_cpu * B_cpu
-    println("res: ", C_cpu)
     res = zeros(Float32, 10)
     A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
     A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
@@ -65,3 +64,71 @@ end
     @test isapprox(res, C_cpu)
 
 end
+
+@testset "masked_mul!" begin
+
+    # Matrix-vector multiplication
+    A_cpu = sprand(Float32, 10, 10, 0.5)
+    B_cpu = rand(Float32, 10)
+    
+    res = zeros(Float32, 10)
+    A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
+    A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
+    A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND)
+    B_gpu = allocate(TEST_BACKEND, Float32, 10)
+
+    mask = rand(Bool, 10)
+    MASK = SparseGPUVector(mask, TEST_BACKEND)
+
+    C_cpu = A_cpu * B_cpu .* mask
+
+    copyto!(B_gpu, B_cpu)
+    C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
+
+    gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK)
+    KernelAbstractions.synchronize(TEST_BACKEND)
+    @allowscalar @test C_gpu_1 == C_cpu
+
+    #C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
+    #gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK)
+    #KernelAbstractions.synchronize(TEST_BACKEND)
+    #@allowscalar @test C_gpu_2 == C_cpu
+    #
+    #C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10)
+    #gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK)
+    #KernelAbstractions.synchronize(TEST_BACKEND)
+    #copyto!(res, C_gpu_3)
+    #@test isapprox(res, C_cpu)
+
+    # Large matrix
+    LARGE_NB = 1000
+    A_cpu = sprand(Float32, LARGE_NB, LARGE_NB, 0.2)
+    B_cpu = rand(Float32, LARGE_NB)
+    mask = rand(Bool, LARGE_NB)
+    MASK = SparseGPUVector(mask, TEST_BACKEND)
+    C_cpu = A_cpu * B_cpu .* mask
+    A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
+    A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
+    A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND)
+    B_gpu = allocate(TEST_BACKEND, Float32, LARGE_NB)
+    copyto!(B_gpu, B_cpu)
+    C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
+    C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
+    C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB)
+
+    gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK)
+    #gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK)
+    #gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK)
+    KernelAbstractions.synchronize(TEST_BACKEND)
+
+    res = zeros(Float32, LARGE_NB)
+
+    copyto!(res, C_gpu_1)
+    @test isapprox(res, C_cpu)
+    #copyto!(res, C_gpu_2)
+    #@test isapprox(res, C_cpu)
+    #copyto!(res, C_gpu_3)
+    #@test isapprox(res, C_cpu)
+
+
+end
diff --git a/test/structs.jl b/test/structs.jl
@@ -370,7 +370,7 @@ end
     @testset "Constructors" begin
         @testset "empty" begin
             A = SparseGPUVector(Float32, Int32, TEST_BACKEND)
-            @test size(A) == 0
+            @test size(A) == (0,)
             @test length(A) == 0
         end
 
@@ -387,7 +387,7 @@ end
             i = 1
             for B in test_vectors
                 i += 1
-                @test size(B) == 10
+                @test size(B) == (10,)
                 @test length(B) == 10
                 @allowscalar @test B.nzind == ref_nzind
                 @allowscalar @test B.nzval == ref_nzval