-
Notifications
You must be signed in to change notification settings - Fork 3
L1 Gauss Seidel preconditioner #191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 50 commits
2382a8f
e8991ba
a127bc0
d216e34
086ed78
99b1ab6
64a247c
343c99d
ee66f73
fbb3338
2b2dc17
5e1203a
bc1cec3
c8cc291
457c110
7f17845
7c9b474
42026cd
1b3ce6f
8a5675b
476928b
8bcca25
77f7148
aecbf67
1aa8986
552f8b8
dc8d4e4
24b72ab
a87c66f
5778fe9
b104cfb
0e93f05
51fa896
1f56bad
226f55a
292aacc
36f5754
33f1de7
3b52869
a056a67
bf2cc96
a84ab45
0794dce
2532179
8203cac
7ef9e15
869d3db
9609d59
8e62678
4a6454c
cce6547
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ FerriteGmsh = "4f95f4f8-b27c-4ae5-9a39-ea55e634e36b" | |
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" | ||
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" | ||
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" | ||
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | ||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | ||
LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" | ||
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" | ||
|
@@ -47,6 +48,7 @@ FastBroadcast = "0.3.5" | |
Ferrite = "1" | ||
ForwardDiff = "0.10.38" | ||
JET = "0.9" | ||
KernelAbstractions = "0.9.34" | ||
LinearSolve = "2" | ||
Logging = "1.10" | ||
ModelingToolkit = "9" | ||
|
@@ -64,6 +66,8 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" | |
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" | ||
Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4" | ||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | ||
MatrixDepot = "b51810bb-c9f3-55da-ae3c-350fc1fbce05" | ||
ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" | ||
|
||
[targets] | ||
test = ["Aqua", "DelimitedFiles", "JET", "Pkg", "StaticArrays", "Tensors", "Test"] | ||
test = ["Aqua", "DelimitedFiles", "JET", "Pkg", "StaticArrays", "Tensors", "Test","MatrixDepot","ThreadPinning"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need ThreadPinning for testing? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,11 @@ | ||
module CuThunderboltExt | ||
|
||
using Thunderbolt | ||
using LinearSolve | ||
using KernelAbstractions | ||
using SparseMatricesCSR | ||
|
||
import SparseArrays | ||
|
||
import CUDA: | ||
CUDA, CuArray, CuVector, CUSPARSE,blockDim,blockIdx,gridDim,threadIdx, | ||
|
@@ -10,7 +15,7 @@ import CUDA: | |
import Thunderbolt: | ||
UnPack.@unpack, | ||
SimpleMesh, | ||
SparseMatrixCSR, SparseMatrixCSC, | ||
SparseMatrixCSR, SparseMatrixCSC, AbstractSparseMatrix, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we import these types here from their respective packages instead of Thunderbolt? We might hit weird bugs otherwise. |
||
AbstractSemidiscreteFunction, AbstractPointwiseFunction, solution_size, | ||
AbstractPointwiseSolverCache,assemble_element!, | ||
LinearOperator,QuadratureRuleCollection, | ||
|
@@ -26,6 +31,9 @@ import Thunderbolt.FerriteUtils: | |
FeMemShape, KeMemShape, KeFeMemShape, DeviceCellIterator,DeviceOutOfBoundCellIterator,DeviceCellCache, | ||
FeCellMem, KeCellMem, KeFeCellMem,NoCellMem,AbstractMemShape | ||
|
||
import Thunderbolt.Preconditioners: | ||
sparsemat_format_type, CSCFormat, CSRFormat | ||
|
||
|
||
import Ferrite: | ||
AbstractDofHandler,get_grid,CellIterator,get_node_coordinate,getcoordinates,get_coordinate_eltype,getcells, | ||
|
@@ -80,9 +88,19 @@ function Thunderbolt.adapt_vector_type(::Type{<:CuVector}, v::VT) where {VT <: V | |
return CuVector(v) | ||
end | ||
|
||
const __cuda_version__ = pkgversion(CUDA) | ||
@info("CuThunderboltExt.jl: CUDA version: ", __cuda_version__) | ||
|
||
include("cuda/cuda_utils.jl") | ||
include("cuda/cuda_operator.jl") | ||
include("cuda/cuda_memalloc.jl") | ||
include("cuda/cuda_adapt.jl") | ||
include("cuda/cuda_iterator.jl") | ||
|
||
if __cuda_version__ >= v"5.7.3" #TODO: better way? support back compatibility? | ||
include("cuda/cuda_preconditioner.jl") | ||
else | ||
@warn("CuThunderboltExt.jl: CUDA version is too old <$__cuda_version__, skipping CUDA preconditioner.") | ||
end | ||
|
||
Abdelrahman912 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,35 @@ | ||||||||
######################################### | ||||||||
## CUDA L1 Gauss Seidel Preconditioner ## | ||||||||
######################################### | ||||||||
|
||||||||
# PIRACY ALERT: this code exhibit piratic nature because both `adapt` and its arguments are foreign objects. | ||||||||
# Therefore, `adapt` behavior is going to be different depending on whether `Thunderbolt` and `CuThunderboltExt` are loaded or not. | ||||||||
# Reference: https://juliatesting.github.io/Aqua.jl/stable/piracies/ | ||||||||
# Note: the problem is with `AbstractSparseMatrix` as the default behavior of `adapt` is to return the same object, whatever the backend is. | ||||||||
# Adapt.adapt(::CUDABackend, A::CUSPARSE.AbstractCuSparseMatrix) = A | ||||||||
# Adapt.adapt(::CUDABackend,A::AbstractSparseMatrix) = A |> cu | ||||||||
# Adapt.adapt(::CUDABackend, x::Vector) = x |> cu # not needed | ||||||||
# Adapt.adapt(::CUDABackend, x::CuVector) = x # not needed | ||||||||
|
||||||||
# TODO: remove this function if back compatibility is not needed | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Backward compat with what? |
||||||||
Preconditioners.convert_to_backend(::CUDABackend, A::AbstractSparseMatrix) = adapt(CUDABackend(), A) | ||||||||
|
||||||||
|
||||||||
# For some reason, these properties are not automatically defined for Device Arrays, | ||||||||
# TODO: remove the following code when https://github.com/JuliaGPU/CUDA.jl/pull/2738 is merged | ||||||||
#SparseArrays.rowvals(A::CUSPARSE.CuSparseDeviceMatrixCSC{Tv,Ti,1}) where {Tv,Ti} = A.rowVal | ||||||||
#SparseArrays.getcolptr(A::CUSPARSE.CuSparseDeviceMatrixCSC{Tv,Ti,1}) where {Tv,Ti} = A.colPtr | ||||||||
#SparseArrays.getnzval(A::CUSPARSE.CuSparseDeviceMatrixCSC{Tv,Ti,1}) where {Tv,Ti} = A.nzVal | ||||||||
#SparseMatricesCSR.getnzval(A::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = A.nzVal | ||||||||
|
||||||||
# PIRACY ALERT: the following code is commented out to avoid piracy | ||||||||
# SparseMatricesCSR.colvals(A::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = A.colVal | ||||||||
# SparseMatricesCSR.getrowptr(A::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = A.rowPtr | ||||||||
Comment on lines
+6
to
+7
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these two lines should be added in JuliaGPU/CUDA.jl#2720, right ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually I am not sure about this one. Probably not, because SparseMatricesCSR is not an interface package. I would discuss this in the linked PR.
Comment on lines
+5
to
+7
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
|
||||||||
# workaround for the issue with SparseMatricesCSR | ||||||||
# TODO: find a more robust solution to dispatch the correct function | ||||||||
Preconditioners.colvals(A::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = A.colVal | ||||||||
Preconditioners.getrowptr(A::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = A.rowPtr | ||||||||
|
||||||||
Preconditioners.sparsemat_format_type(::CUSPARSE.CuSparseDeviceMatrixCSC{Tv,Ti,1}) where {Tv,Ti} = CSCFormat | ||||||||
Abdelrahman912 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
Preconditioners.sparsemat_format_type(::CUSPARSE.CuSparseDeviceMatrixCSR{Tv,Ti,1}) where {Tv,Ti} = CSRFormat | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove these here for now and add a comment to the tests that prior to 2720 we need to add these dispatches? I do not want to artificially postpone this PR here further due to missing optional stuff in the dependencies. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# remove the following code once PR (https://github.com/JuliaGPU/CUDA.jl/pull/2720) is merged ## | ||
CUDA.CUSPARSE.CuSparseMatrixCSR{T}(Mat::SparseMatrixCSR) where {T} = | ||
CUDA.CUSPARSE.CuSparseMatrixCSR{T}(CuVector{Cint}(Mat.rowptr), CuVector{Cint}(Mat.colval), | ||
CuVector{T}(Mat.nzval), size(Mat)) | ||
|
||
|
||
CUSPARSE.CuSparseMatrixCSC{T}(Mat::SparseMatrixCSR) where {T} = | ||
CUSPARSE.CuSparseMatrixCSC{T}(CUSPARSE.CuSparseMatrixCSR(Mat)) | ||
|
||
SparseMatricesCSR.SparseMatrixCSR(A::CUSPARSE.CuSparseMatrixCSR) = | ||
SparseMatrixCSR(CUSPARSE.SparseMatrixCSC(A)) # no direct conversion (gpu_CSR -> cpu_CSC -> cpu_CSR) | ||
|
||
Adapt.adapt_storage(::Type{CuArray}, xs::SparseMatrixCSR) = | ||
CUSPARSE.CuSparseMatrixCSR(xs) | ||
|
||
Adapt.adapt_storage(::Type{CuArray{T}}, xs::SparseMatrixCSR) where {T} = | ||
CUSPARSE.CuSparseMatrixCSR{T}(xs) | ||
|
||
Adapt.adapt_storage(::Type{Array}, mat::CUSPARSE.CuSparseMatrixCSR) = | ||
SparseMatrixCSR(mat) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
module Preconditioners | ||
|
||
using SparseArrays, SparseMatricesCSR | ||
using LinearSolve | ||
import LinearSolve: \ | ||
using Adapt | ||
using UnPack | ||
import KernelAbstractions: Backend, @kernel, @index, @ndrange, @groupsize, @print, functional, | ||
CPU,synchronize | ||
import SparseArrays: getcolptr,getnzval | ||
import SparseMatricesCSR: getnzval | ||
import LinearAlgebra: Symmetric | ||
|
||
## Generic Code # | ||
|
||
# CSR and CSC are exact the same in symmetric matrices,so we need to hold symmetry info | ||
# in order to be exploited in cases in which one format has better access pattern than the other. | ||
abstract type AbstractMatrixSymmetry end | ||
struct SymmetricMatrix <: AbstractMatrixSymmetry end | ||
struct NonSymmetricMatrix <: AbstractMatrixSymmetry end | ||
termi-official marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
abstract type AbstractMatrixFormat end | ||
struct CSRFormat <: AbstractMatrixFormat end | ||
struct CSCFormat <: AbstractMatrixFormat end | ||
|
||
|
||
# Why using these traits? | ||
# Since we are targeting multiple backends, but unfortunately, all the sparse matrix CSC/CSR on all | ||
# backends don't share the same supertype (e.g. AbstractSparseMatrixCSC/AbstractSparseMatrixCSR) | ||
# e.g. CUSPARSE.CuSparseDeviceMatrixCSC <:SparseArrays.AbstractSparseMatrixCSC → false | ||
# So we need to define our own traits to identify the format of the sparse matrix | ||
sparsemat_format_type(::SparseMatrixCSC) = CSCFormat | ||
sparsemat_format_type(::SparseMatrixCSR) = CSRFormat | ||
|
||
#TODO: remove once https://github.com/JuliaGPU/CUDA.jl/pull/2740 is merged | ||
convert_to_backend(backend::Backend, A::AbstractSparseMatrix) = | ||
adapt(backend, A) # fallback value, specific backends are to be extended in their corresponding extensions. | ||
|
||
# Why? because we want to circumvent piracy when extending these functions for device backend (e.g. CuSparseDeviceMatrixCSR) | ||
# TODO: find a more robust solution to dispatch the correct function | ||
colvals(A::SparseMatrixCSR) = SparseMatricesCSR.colvals(A) | ||
getrowptr(A::SparseMatrixCSR) = SparseMatricesCSR.getrowptr(A) | ||
|
||
include("l1_gauss_seidel.jl") | ||
|
||
export L1GSPrecBuilder | ||
|
||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a compat entry for the new CUDA release?