JuliaPackaging · giordano · May 23, 2025 · May 20, 2025 · May 20, 2025 · May 20, 2025
diff --git a/M/MAGMA/build_tarballs.jl b/M/MAGMA/build_tarballs.jl
@@ -7,35 +7,73 @@ include(joinpath(YGGDRASIL_DIR, "fancy_toys.jl"))
 include(joinpath(YGGDRASIL_DIR, "platforms", "cuda.jl"))
 
 name = "MAGMA"
-version = v"2.7.0"
+version = v"2.9.0"
+
+# Note: Hopper should still build with CUDA v11.8
+# on x86_64, but aarch64 requires CUDA v12.0
+MIN_CUDA_VERSION = v"12"
 
 # Collection of sources required to complete build
 sources = [
     ArchiveSource("http://icl.utk.edu/projectsfiles/magma/downloads/magma-$(version).tar.gz",
-                  "fda1cbc4607e77cacd8feb1c0f633c5826ba200a018f647f1c5436975b39fd18"),
+                  "ff77fd3726b3dfec3bfb55790b06480aa5cc384396c2db35c56fdae4a82c641c"),
     DirectorySource("./bundled")
 ]
 
 # Bash recipe for building across all platforms
 script = raw"""
-cd $WORKSPACE/srcdir/magma*
+cd $WORKSPACE/srcdir
+
+export TMPDIR=${WORKSPACE}/tmpdir # we need a lot of tmp space
+mkdir -p ${TMPDIR}
+
+PTROPT=""
+
+# Necessary operations to cross compile CUDA from x86_64 to aarch64
+if [[ "${target}" == aarch64-linux-* ]]; then
+
+   # Add /usr/lib/csl-musl-x86_64 to LD_LIBRARY_PATH to be able to use host nvcc
+   export LD_LIBRARY_PATH="/usr/lib/csl-musl-x86_64:/usr/lib/csl-glibc-x86_64:${LD_LIBRARY_PATH}"
+
+   # Make sure we use host CUDA executable by copying from the x86_64 CUDA redist
+   NVCC_DIR=(/workspace/srcdir/cuda_nvcc-*-archive)
+   rm -rf ${prefix}/cuda/bin
+   cp -r ${NVCC_DIR}/bin ${prefix}/cuda/bin
+
+   rm -rf ${prefix}/cuda/nvvm/bin
+   cp -r ${NVCC_DIR}/nvvm/bin ${prefix}/cuda/nvvm/bin
+
+   # Workaround failed execution of sizeptr in cross-compile builds
+   PTROPT="PTRSIZE=8"
+fi
+
+export CUDADIR=${prefix}/cuda
+export PATH=${PATH}:${CUDADIR}/bin
+export CUDACXX=${CUDADIR}/bin/nvcc
+
+# This flag reduces the size of the compiled binaries; if
+# they become over 2GB (e.g. due to targeting too many
+# compute_XX), linking fails.
+# See: https://github.com/NixOS/nixpkgs/pull/220402
+export NVCC_PREPEND_FLAGS+=' -Xfatbin=-compress-all'
 
-export CUDADIR=${WORKSPACE}/destdir/cuda
-export PATH=${PATH}:${CUDADIR}
+cd magma*
 cp ../make.inc .
+
 # Patch to _64_ suffixes
 atomic_patch -p1 ../0001-mangle-to-ILP64.patch
-# reduce parallelism since otherwise the builder may OOM.
-(( nproc=1+nproc/3 ))
-make -j${nproc} sparse-shared
-make install prefix=${prefix}
+
+make ${PTROPT} -j${nproc} sparse-shared
+make ${PTROPT} install prefix=${prefix}
+
 install_license COPYRIGHT
-"""
 
-augment_platform_block = CUDA.augment
+# ensure products directory is clean
+rm -rf ${CUDADIR}
+"""
 
-platforms = CUDA.supported_platforms()
-filter!(p -> arch(p) == "x86_64", platforms)
+platforms = CUDA.supported_platforms(min_version = MIN_CUDA_VERSION)
+filter!(p -> arch(p) == "x86_64" || arch(p) == "aarch64", platforms)
 platforms = expand_cxxstring_abis(platforms)
 
 
@@ -56,10 +94,18 @@ for platform in platforms
 
     cuda_deps = CUDA.required_dependencies(platform)
 
-    build_tarballs(ARGS, name, version, sources, script, [platform],
+    cuda_ver = platform["cuda"]
+
+    platform_sources = BinaryBuilder.AbstractSource[sources...]
+
+    if arch(platform) == "aarch64"
+        push!(platform_sources, CUDA.cuda_nvcc_redist_source(cuda_ver, "x86_64"))
+    end
+
+    build_tarballs(ARGS, name, version, platform_sources, script, [platform],
                    products, [dependencies; cuda_deps];
                    preferred_gcc_version=v"8",
                    julia_compat="1.8",
-                   augment_platform_block,
+                   augment_platform_block=CUDA.augment,
                    skip_audit=true, dont_dlopen=true)
 end
diff --git a/M/MAGMA/bundled/make.inc b/M/MAGMA/bundled/make.inc
@@ -1,5 +1,5 @@
 #//////////////////////////////////////////////////////////////////////////////
-#   -- MAGMA (version 2.0) --
+#   -- MAGMA (version 2.x) --
 #      Univ. of Tennessee, Knoxville
 #      Univ. of California, Berkeley
 #      Univ. of Colorado, Denver
@@ -52,9 +52,9 @@ RANLIB    = ranlib
 
 # set our GPU targets
 ifeq ($(BACKEND),cuda)
-    GPU_TARGET = Pascal Volta Turing Ampere
+    GPU_TARGET = Pascal sm_61 Volta Turing Ampere sm_86 Ada Hopper
 else ifeq ($(BACKEND),hip)
-    GPU_TARGET = gfx900 gfx901
+    GPU_TARGET = gfx900 gfx902 gfx904 gfx906 gfx908 gfx909 gfx90a gfx940 gfx941 gfx942 gfx90c gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1033
 endif
 
 # Use -fPIC to make shared (.so) and static (.a) library;
@@ -112,7 +112,7 @@ ifeq ($(BACKEND),cuda)
     DEVCCFLAGS += -Xcompiler "$(FPIC)" -std=c++11
 
     # link with cuda specific libraries
-    LIB += -L$(CUDADIR)/lib64 -lcublas -lcusparse -lcudart -lcudadevrt
+    LIB += -L$(CUDADIR)/lib64 -L$(CUDADIR)/lib -lcublas -lcusparse -lcudart -lcudadevrt
     INC += -I$(CUDADIR)/include
 
 endif