diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml index d8f505bed..31d8df00f 100644 --- a/.github/workflows/Tests.yml +++ b/.github/workflows/Tests.yml @@ -37,6 +37,7 @@ jobs: - "LinearSolvePETSc" - "LinearSolvePardiso" - "LinearSolveGinkgo" + - "LinearSolveElemental" - "NoPre" - "LinearSolveAutotune" - "LinearSolvePyAMG" @@ -59,6 +60,9 @@ jobs: # Ginkgo.jl is not available on Windows - group: LinearSolveGinkgo os: windows-latest + # Elemental_jll has no Windows binary + - group: LinearSolveElemental + os: windows-latest uses: "SciML/.github/.github/workflows/tests.yml@v1" with: group: "${{ matrix.group }}" diff --git a/Project.toml b/Project.toml index 25ddebb54..931ed6048 100644 --- a/Project.toml +++ b/Project.toml @@ -36,6 +36,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c" CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8" +Elemental = "902c3f28-d1ec-5e7e-8399-a24c3845ee38" EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641" @@ -66,6 +67,7 @@ LinearSolveCUDAExt = "CUDA" LinearSolveCUDSSExt = "CUDSS" LinearSolveCUSOLVERRFExt = ["CUSOLVERRF", "SparseArrays"] LinearSolveCliqueTreesExt = ["CliqueTrees", "SparseArrays"] +LinearSolveElementalExt = "Elemental" LinearSolveEnzymeExt = ["EnzymeCore", "SparseArrays"] LinearSolveFastAlmostBandedMatricesExt = "FastAlmostBandedMatrices" LinearSolveFastLapackInterfaceExt = "FastLapackInterface" @@ -100,6 +102,7 @@ CliqueTrees = "1.13.1" ComponentArrays = "0.15.29" ConcreteStructs = "0.2.3" DocStringExtensions = "0.9.3" +Elemental = "0.6" EnumX = "1.0.4" EnzymeCore = "0.8.5" ExplicitImports = "1.10" @@ -159,6 +162,7 @@ BandedMatrices = "aae01518-5342-5314-be14-df237901396f" BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" CliqueTrees = "60701a23-6482-424a-84db-faee86b9b1f8" ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" +Elemental = "902c3f28-d1ec-5e7e-8399-a24c3845ee38" ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7" FastAlmostBandedMatrices = "9d29842c-ecb8-4973-b1e9-a27b1157504e" FastLapackInterface = "29a986be-02c6-4525-aec4-84b980013641" @@ -188,4 +192,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["AlgebraicMultigrid", "Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "PETSc", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "CliqueTrees", "FastLapackInterface", "SparseArrays", "ExplicitImports", "ComponentArrays"] +test = ["AlgebraicMultigrid", "Aqua", "Test", "IterativeSolvers", "InteractiveUtils", "KrylovKit", "KrylovPreconditioners", "Pkg", "Random", "SafeTestsets", "MultiFloats", "ForwardDiff", "HYPRE", "MPI", "PETSc", "BlockDiagonals", "FiniteDiff", "BandedMatrices", "FastAlmostBandedMatrices", "StaticArrays", "AllocCheck", "StableRNGs", "Zygote", "RecursiveFactorization", "Sparspak", "CliqueTrees", "FastLapackInterface", "SparseArrays", "ExplicitImports", "ComponentArrays", "Elemental"] diff --git a/ext/LinearSolveElementalExt.jl b/ext/LinearSolveElementalExt.jl new file mode 100644 index 000000000..bbc5a77fd --- /dev/null +++ b/ext/LinearSolveElementalExt.jl @@ -0,0 +1,88 @@ +module LinearSolveElementalExt + +using LinearSolve, Elemental, LinearAlgebra +using LinearSolve: LinearCache, LinearVerbosity, OperatorAssumptions +using SciMLBase: SciMLBase, ReturnCode + +const ElementalNumeric = Union{Float32, Float64, ComplexF32, ComplexF64} + +function _elemental_eltype(A) + T = eltype(A) + return T <: ElementalNumeric ? T : (T <: Complex ? ComplexF64 : Float64) +end + +function _to_elemental_matrix(A::Base.AbstractVecOrMat, ::Type{T}) where {T} + return convert(Elemental.Matrix{T}, Matrix{T}(A)) +end + +# Always copy: lu!/qr!/cholesky! are in-place. Returning A directly would +# corrupt the user's matrix and cause reinit! + re-solve to factorize garbage. +function _to_elemental_matrix(A::Elemental.Matrix, ::Type{T}) where {T} + B = Elemental.Matrix(T) + Elemental.resize!(B, size(A, 1), size(A, 2)) + copyto!(B, A) + return B +end + +function _b_to_elemental(b::AbstractVector, ::Type{T}) where {T} + return convert(Elemental.Matrix{T}, reshape(Vector{T}(b), length(b), 1)) +end + +function _b_to_elemental(b::Elemental.Matrix{T}, ::Type{T}) where {T} + return b +end + +function _b_to_elemental(b::Elemental.Matrix, ::Type{T}) where {T} + return convert(Elemental.Matrix{T}, convert(Base.Matrix{T}, b)) +end + +function _elemental_factorize(alg::ElementalJL, A_el::Elemental.Matrix) + if alg.method === :LU + return LinearAlgebra.lu!(A_el) + elseif alg.method === :QR + return LinearAlgebra.qr!(A_el) + elseif alg.method === :Cholesky + # Must call cholesky!(A_el) directly, not cholesky!(Hermitian(A_el)). + # The Hermitian path returns LinearAlgebra.Cholesky whose .factors is a + # raw Elemental.Matrix with no \ defined, silently falling back to Julia's + # generic LU on already-destroyed data. The direct path returns + # CholeskyMatrix{T} which has \ → ElSolveAfterCholesky. + return LinearAlgebra.cholesky!(A_el) + else + error( + "Unknown method $(alg.method) for ElementalJL. " * + "Valid choices are :LU (default), :QR, or :Cholesky." + ) + end +end + +function LinearSolve.init_cacheval( + alg::ElementalJL, A, b, u, Pl, Pr, maxiters::Int, abstol, reltol, + verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions + ) + # Must return a properly typed factorization so that LinearCache is + # parameterized with the correct cacheval type. solve! will re-factorize + # immediately (isfresh = true on the first call) and overwrite this value. + T = _elemental_eltype(A) + A_el = _to_elemental_matrix(A, T) + return _elemental_factorize(alg, A_el) +end + +function SciMLBase.solve!(cache::LinearCache, alg::ElementalJL; kwargs...) + A = cache.A + T = _elemental_eltype(A) + + if cache.isfresh + A_el = _to_elemental_matrix(A, T) + cache.cacheval = _elemental_factorize(alg, A_el) + cache.isfresh = false + end + + fact = LinearSolve.@get_cacheval(cache, :ElementalJL) + x_jl = convert(Base.Matrix{T}, fact \ _b_to_elemental(cache.b, T)) + copyto!(cache.u, vec(x_jl)) + + return SciMLBase.build_linear_solution(alg, cache.u, nothing, cache; retcode = ReturnCode.Success) +end + +end # module diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl index 8e65f41c8..0c3c06d6c 100644 --- a/src/LinearSolve.jl +++ b/src/LinearSolve.jl @@ -523,6 +523,8 @@ export KrylovJL, KrylovJL_CG, KrylovJL_MINRES, KrylovJL_GMRES, IterativeSolversJL_BICGSTAB, IterativeSolversJL_MINRES, IterativeSolversJL_IDRS, KrylovKitJL, KrylovKitJL_CG, KrylovKitJL_GMRES, KrylovJL_MINARES, AlgebraicMultigridJL +export ElementalJL + export GinkgoJL, GinkgoJL_CG, GinkgoJL_GMRES export SimpleGMRES diff --git a/src/extension_algs.jl b/src/extension_algs.jl index 0c63bb6b4..832e9207c 100644 --- a/src/extension_algs.jl +++ b/src/extension_algs.jl @@ -1241,3 +1241,69 @@ struct ParUFactorization <: AbstractSparseFactorization return new(reuse_symbolic) end end + +""" + ElementalJL(; method = :LU) + +A wrapper for [Elemental.jl](https://github.com/JuliaParallel/Elemental.jl), +providing distributed-memory dense linear algebra solvers built on the +[Elemental](https://github.com/elemental/Elemental) C++ library by Jack Poulson. +(LLNL maintains an active GPU-focused fork of Elemental called +[Hydrogen](https://github.com/LLNL/Elemental).) + +## Keyword Arguments + + - `method`: The factorization method to use. Options: + - `:LU` (default) — LU factorization with partial pivoting. Suitable for + general square systems. + - `:QR` — QR factorization. Suitable for square or overdetermined systems. + - `:Cholesky` — Cholesky factorization. Requires the matrix to be Hermitian + positive definite. + +## Supported Element Types + +`Float32`, `Float64`, `ComplexF32`, `ComplexF64`. Matrices with other element +types are promoted to `Float64` (real) or `ComplexF64` (complex) before being +passed to Elemental. + +## Notes + + - Serial `Elemental.Matrix` values are accepted directly as the problem + matrix `A`; they are copied before factorization so the original is + never mutated. + - When `A` is a standard Julia `AbstractMatrix`, it is copied into an + `Elemental.Matrix` for the factorization. + - The factorization is cached across repeated solves with the same matrix + (i.e. when `isfresh = false`). + +!!! note + + Using this solver requires adding the package Elemental.jl: + ```julia + using Elemental + ``` + Elemental.jl automatically initialises MPI when loaded; no explicit + `MPI.Init()` call is needed for serial usage. + +## Example + +```julia +using LinearSolve, Elemental + +A = rand(100, 100); A = A + A' + 100I # well-conditioned +b = rand(100) +prob = LinearProblem(A, b) + +# LU (default) +sol = solve(prob, ElementalJL()) +# Cholesky (symmetric positive definite) +sol = solve(prob, ElementalJL(method = :Cholesky)) +``` +""" +struct ElementalJL <: AbstractDenseFactorization + method::Symbol +end + +function ElementalJL(; method::Symbol = :LU) + return ElementalJL(method) +end diff --git a/test/elemental/Project.toml b/test/elemental/Project.toml new file mode 100644 index 000000000..167c67c40 --- /dev/null +++ b/test/elemental/Project.toml @@ -0,0 +1,11 @@ +[deps] +Elemental = "902c3f28-d1ec-5e7e-8399-a24c3845ee38" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +Elemental = "0.6" +julia = "1.10" diff --git a/test/elemental/elemental.jl b/test/elemental/elemental.jl new file mode 100644 index 000000000..f4421d3ac --- /dev/null +++ b/test/elemental/elemental.jl @@ -0,0 +1,74 @@ +using LinearSolve, LinearAlgebra, Test, Random, SciMLBase +using Elemental + +@testset "ElementalJL" begin + rng = Random.MersenneTwister(42) + n = 50 + + # ── General square Float64 system ───────────────────────────────────────── + A_el = Matrix{Float64}(I, n, n) + 0.1 * rand(rng, n, n) + b_el = rand(rng, n) + prob_el = LinearProblem(A_el, b_el) + + # Default method: LU + sol_lu = solve(prob_el, ElementalJL()) + @test A_el * sol_lu.u ≈ b_el atol = 1.0e-8 + @test sol_lu.retcode == ReturnCode.Success + + # QR factorization + sol_qr = solve(prob_el, ElementalJL(method = :QR)) + @test A_el * sol_qr.u ≈ b_el atol = 1.0e-8 + + # ── Symmetric positive definite system → Cholesky ───────────────────────── + B_spd = rand(rng, n, n) + A_spd = B_spd * B_spd' + n * I # guaranteed SPD + b_spd = rand(rng, n) + sol_ch = solve(LinearProblem(A_spd, b_spd), ElementalJL(method = :Cholesky)) + @test A_spd * sol_ch.u ≈ b_spd atol = 1.0e-8 + + # ── Float32 system (all 4 eltypes are supported by Elemental_jll) ────────── + A_f32 = Matrix{Float32}(I, n, n) + 0.1f0 * rand(rng, Float32, n, n) + b_f32 = rand(rng, Float32, n) + sol_f32 = solve(LinearProblem(A_f32, b_f32), ElementalJL()) + @test A_f32 * sol_f32.u ≈ b_f32 atol = 1.0f-4 + + # ── Complex system ───────────────────────────────────────────────────────── + A_cplx = Matrix{ComplexF64}(I, n, n) + 0.1 * (rand(rng, n, n) + im * rand(rng, n, n)) + b_cplx = rand(rng, ComplexF64, n) + sol_cplx = solve(LinearProblem(A_cplx, b_cplx), ElementalJL()) + @test A_cplx * sol_cplx.u ≈ b_cplx atol = 1.0e-8 + + # ── Pass an Elemental.Matrix directly as A ──────────────────────────────── + A_emat = convert(Elemental.Matrix{Float64}, A_el) + b_emat = rand(rng, n) + sol_emat = solve(LinearProblem(A_emat, b_emat), ElementalJL()) + @test A_el * sol_emat.u ≈ b_emat atol = 1.0e-8 + + # ── Elemental.Matrix as A: re-factorization must not use corrupted data ─── + # Regression: _to_elemental_matrix must copy A. If it returned A directly, + # lu! would factorize in-place; the second reinit!(A=...) + solve! would + # re-factorize already-destroyed data and produce a wrong result. + A_emat2 = convert(Elemental.Matrix{Float64}, A_el) + b_emat2a = rand(rng, n) + b_emat2b = rand(rng, n) + cache_emat = SciMLBase.init(LinearProblem(A_emat2, b_emat2a), ElementalJL()) + solve!(cache_emat) + @test A_el * cache_emat.u ≈ b_emat2a atol = 1.0e-8 + # Pass A= to reinit! so isfresh=true, forcing _to_elemental_matrix to run again. + reinit!(cache_emat; A = A_emat2, b = b_emat2b) + solve!(cache_emat) + @test A_el * cache_emat.u ≈ b_emat2b atol = 1.0e-8 + + # ── Cache reuse (same A, different b) ───────────────────────────────────── + b_el2 = rand(rng, n) + cache = SciMLBase.init(prob_el, ElementalJL()) + solve!(cache) + @test A_el * cache.u ≈ b_el atol = 1.0e-8 + + reinit!(cache; b = b_el2) + solve!(cache) + @test A_el * cache.u ≈ b_el2 atol = 1.0e-8 + + # ── Unknown method raises an error ──────────────────────────────────────── + @test_throws ErrorException solve(prob_el, ElementalJL(method = :Bogus)) +end diff --git a/test/resolve.jl b/test/resolve.jl index c7a5e156a..2a02695e2 100644 --- a/test/resolve.jl +++ b/test/resolve.jl @@ -51,6 +51,10 @@ for alg in vcat( ( !(alg == ParUFactorization) || Base.get_extension(LinearSolve, :LinearSolveParUExt) !== nothing + ) && + ( + !(alg == ElementalJL) || + Base.get_extension(LinearSolve, :LinearSolveElementalExt) !== nothing ) A = [1.0 2.0; 3.0 4.0] alg in [ diff --git a/test/runtests.jl b/test/runtests.jl index df24888c0..f18e973c5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -98,6 +98,13 @@ if !Base.Sys.iswindows() && GROUP == "LinearSolveGinkgo" @time @safetestset "Ginkgo" include("ginkgo/ginkgo.jl") end +if !Base.Sys.iswindows() && GROUP == "LinearSolveElemental" + Pkg.activate("elemental") + Pkg.develop(PackageSpec(path = dirname(@__DIR__))) + Pkg.instantiate() + @time @safetestset "Elemental" include("elemental/elemental.jl") +end + if Base.Sys.islinux() && (GROUP == "All" || GROUP == "LinearSolveHYPRE") && HAS_EXTENSIONS @time @safetestset "LinearSolveHYPRE" include("hypretests.jl") end