|
11 | 11 | A_cpu = sprand(Float32, 10, 10, 0.5)
|
12 | 12 | B_cpu = rand(Float32, 10)
|
13 | 13 | C_cpu = A_cpu * B_cpu
|
14 |
| - println("res: ", C_cpu) |
15 | 14 | res = zeros(Float32, 10)
|
16 | 15 | A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND)
|
17 | 16 | A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND)
|
|
65 | 64 | @test isapprox(res, C_cpu)
|
66 | 65 |
|
67 | 66 | end
|
| 67 | + |
| 68 | +@testset "masked_mul!" begin |
| 69 | + |
| 70 | + # Matrix-vector multiplication |
| 71 | + A_cpu = sprand(Float32, 10, 10, 0.5) |
| 72 | + B_cpu = rand(Float32, 10) |
| 73 | + |
| 74 | + res = zeros(Float32, 10) |
| 75 | + A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND) |
| 76 | + A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND) |
| 77 | + A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND) |
| 78 | + B_gpu = allocate(TEST_BACKEND, Float32, 10) |
| 79 | + |
| 80 | + mask = rand(Bool, 10) |
| 81 | + MASK = SparseGPUVector(mask, TEST_BACKEND) |
| 82 | + |
| 83 | + C_cpu = A_cpu * B_cpu .* mask |
| 84 | + |
| 85 | + copyto!(B_gpu, B_cpu) |
| 86 | + C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10) |
| 87 | + |
| 88 | + gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK) |
| 89 | + KernelAbstractions.synchronize(TEST_BACKEND) |
| 90 | + @allowscalar @test C_gpu_1 == C_cpu |
| 91 | + |
| 92 | + #C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10) |
| 93 | + #gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK) |
| 94 | + #KernelAbstractions.synchronize(TEST_BACKEND) |
| 95 | + #@allowscalar @test C_gpu_2 == C_cpu |
| 96 | + # |
| 97 | + #C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, 10) |
| 98 | + #gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK) |
| 99 | + #KernelAbstractions.synchronize(TEST_BACKEND) |
| 100 | + #copyto!(res, C_gpu_3) |
| 101 | + #@test isapprox(res, C_cpu) |
| 102 | + |
| 103 | + # Large matrix |
| 104 | + LARGE_NB = 1000 |
| 105 | + A_cpu = sprand(Float32, LARGE_NB, LARGE_NB, 0.2) |
| 106 | + B_cpu = rand(Float32, LARGE_NB) |
| 107 | + mask = rand(Bool, LARGE_NB) |
| 108 | + MASK = SparseGPUVector(mask, TEST_BACKEND) |
| 109 | + C_cpu = A_cpu * B_cpu .* mask |
| 110 | + A_gpu_csr = SparseGPUMatrixCSR(A_cpu, TEST_BACKEND) |
| 111 | + A_gpu_ell = SparseGPUMatrixELL(A_cpu, TEST_BACKEND) |
| 112 | + A_gpu_csc = SparseGPUMatrixCSC(A_cpu, TEST_BACKEND) |
| 113 | + B_gpu = allocate(TEST_BACKEND, Float32, LARGE_NB) |
| 114 | + copyto!(B_gpu, B_cpu) |
| 115 | + C_gpu_1 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB) |
| 116 | + C_gpu_2 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB) |
| 117 | + C_gpu_3 = KernelAbstractions.zeros(TEST_BACKEND, Float32, LARGE_NB) |
| 118 | + |
| 119 | + gpu_spmv!(C_gpu_1, A_gpu_csr, B_gpu; mask = MASK) |
| 120 | + #gpu_spmv!(C_gpu_2, A_gpu_ell, B_gpu; mask = MASK) |
| 121 | + #gpu_spmv!(C_gpu_3, A_gpu_csc, B_gpu; mask = MASK) |
| 122 | + KernelAbstractions.synchronize(TEST_BACKEND) |
| 123 | + |
| 124 | + res = zeros(Float32, LARGE_NB) |
| 125 | + |
| 126 | + copyto!(res, C_gpu_1) |
| 127 | + @test isapprox(res, C_cpu) |
| 128 | + #copyto!(res, C_gpu_2) |
| 129 | + #@test isapprox(res, C_cpu) |
| 130 | + #copyto!(res, C_gpu_3) |
| 131 | + #@test isapprox(res, C_cpu) |
| 132 | + |
| 133 | + |
| 134 | +end |
0 commit comments