Initial GPUArray transition (#1020)

jeremiahpslewis · web-flow · commit 96bd7f04720c · 2024-03-03T20:34:59.000+01:00
* Drop LocalPreferences.toml from .gitignore

* Transition Project.toml to GPUArrays

* Support Metal in RLCore tests

* Log GPU_Backend

* Make tests work w/o CUDA

* Fix CUDA import

* Drop CUDA requirement

* Make tests GPU aware

* Drop CUDA from tests (keep in dependencies for now)

* Drop CUDA from test dependencies

* Add info statement Flux.GPU

* Check if Apple silicon

* Apple silicon check

* Fix duplicate compat entry

* Fix CUDA compat (allow v5)

* Fix RLZoo compat

* Add missing test import

* Use julia-actions cache instead of generic Github one
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -38,16 +38,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v1
       - name: Get changed files
         id: RLBase-changed
         uses: tj-actions/changed-files@v42
@@ -86,16 +77,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v1
       - name: Get changed files
         id: RLCore-changed
         uses: tj-actions/changed-files@v42
@@ -137,16 +119,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v1
       - name: Get changed files
         id: RLZoo-changed
         uses: tj-actions/changed-files@v42
@@ -190,16 +163,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
+      - uses: julia-actions/cache@v1
       - name: Get changed files
         id: RLEnvironments-changed
         uses: tj-actions/changed-files@v42
@@ -245,17 +209,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v4
-        env:
-          cache-name: cache-artifacts
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
-          restore-keys: |
-            ${{ runner.os }}-test-${{ env.cache-name }}-
-            ${{ runner.os }}-test-
-            ${{ runner.os }}-
-
+      - uses: julia-actions/cache@v1
       - name: Get changed files
         id: RLExperiments-changed
         uses: tj-actions/changed-files@v42
diff --git a/.gitignore b/.gitignore
@@ -33,6 +33,7 @@ docs/experiments
 # environment.
 Manifest.toml
 **/Manifest.toml
+LocalPreferences.toml
 
 .vscode/*
 
@@ -59,3 +60,5 @@ Sessionx.vim
 tags
 # Persistent undo
 [._]*.un~
+
+
diff --git a/src/ReinforcementLearningCore/Project.toml b/src/ReinforcementLearningCore/Project.toml
@@ -39,6 +39,7 @@ FillArrays = "0.8, 0.9, 0.10, 0.11, 0.12, 0.13, 1"
 Flux = "0.13, 0.14"
 Functors = "0.1, 0.2, 0.3, 0.4"
 GPUArrays = "8, 9, 10"
+Metal = "1.0"
 Parsers = "2"
 ProgressMeter = "1"
 Reexport = "1"
@@ -54,8 +55,11 @@ julia = "1.9"
 [extras]
 CommonRLInterface = "d842c3ba-07a1-494f-bbec-f5741b0a3e98"
 DomainSets = "5b8099bc-c8ec-5219-889f-1d9e522a28bf"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [targets]
-test = ["CommonRLInterface", "DomainSets", "Test", "Random"]
+test = ["CommonRLInterface", "DomainSets", "Metal", "Preferences", "Test", "UUIDs"]
diff --git a/src/ReinforcementLearningCore/test/runtests.jl b/src/ReinforcementLearningCore/test/runtests.jl
@@ -1,12 +1,25 @@
+using Test
+using UUIDs
+using Preferences
+
+if Sys.isapple() && Sys.ARCH === :aarch64
+    flux_uuid = UUID("587475ba-b771-5e3f-ad9e-33799f191a9c")
+    set_preferences!(flux_uuid, "gpu_backend" => "Metal")
+
+    using Metal
+else
+    using CUDA, cuDNN
+end
+
 using ReinforcementLearningBase
 using ReinforcementLearningCore
 using ReinforcementLearningTrajectories
 
-using Test
-using CUDA
 using CircularArrayBuffers
 using Flux
 
+@info "Flux.GPU_BACKEND = $(Flux.GPU_BACKEND)"
+
 include("environments/randomwalk1D.jl")
 include("environments/tictactoe.jl")
 include("environments/rockpaperscissors.jl")
diff --git a/src/ReinforcementLearningCore/test/utils/device.jl b/src/ReinforcementLearningCore/test/utils/device.jl
@@ -5,7 +5,7 @@
     @test device(Conv((2, 2), 1 => 16, relu)) == Val(:cpu)
     @test device(Chain(x -> x .^ 2, Dense(2, 3))) == Val(:cpu)
 
-    if CUDA.functional()
+    if (@isdefined CUDA) && CUDA.functional()
         @test device(rand(2) |> gpu) isa CuDevice
         @test device(Dense(2, 3) |> gpu) isa CuDevice
         @test device(Conv((2, 2), 1 => 16, relu) |> gpu) isa CuDevice
diff --git a/src/ReinforcementLearningCore/test/utils/distributions.jl b/src/ReinforcementLearningCore/test/utils/distributions.jl
@@ -1,4 +1,4 @@
-using Test, LinearAlgebra, Distributions, CUDA, Flux
+using Test, LinearAlgebra, Distributions, Flux
 
 @testset "utils/distributions" begin
     @testset "logdetLorU" begin
@@ -8,7 +8,7 @@ using Test, LinearAlgebra, Distributions, CUDA, Flux
         logdetM = logdet(M)
         @test logdetM == ReinforcementLearningCore.logdetLorU(L) 
         @test logdetM == ReinforcementLearningCore.logdetLorU(U)
-        if CUDA.functional()
+        if (@isdefined CUDA) && CUDA.functional()
             L_d = cu(L)
             U_d = cu(U)
             @test logdetM == ReinforcementLearningCore.logdetLorU(L_d) 
@@ -126,7 +126,7 @@ using Test, LinearAlgebra, Distributions, CUDA, Flux
         end
     end
     @testset "CUDA" begin
-        if CUDA.functional()
+        if (@isdefined CUDA) && CUDA.functional()
             CUDA.allowscalar(false)
             #These only test GPU compatibility, exactness of results is tested above on the CPU
             @testset "Diagonal Gaussian" begin
diff --git a/src/ReinforcementLearningCore/test/utils/networks.jl b/src/ReinforcementLearningCore/test/utils/networks.jl
@@ -1,5 +1,7 @@
-using Test, Flux, CUDA, ChainRulesCore, LinearAlgebra, Distributions
-using Flux: params, gradient, unsqueeze
+using Test, Flux, ChainRulesCore, LinearAlgebra, Distributions, ReinforcementLearningCore
+using Flux: params, gradient, unsqueeze, InvDecay, gpu, cpu
+import ReinforcementLearningBase: RLBase
+
 @testset "Approximators" begin
     #= These may need to be updated due to recent changes
     @testset "TabularApproximator" begin
@@ -110,7 +112,7 @@ using Flux: params, gradient, unsqueeze
             end
         end
         @testset "CUDA" begin
-            if CUDA.functional()
+            if (@isdefined CUDA) && CUDA.functional()
                 CUDA.allowscalar(false)
                 gn = GaussianNetwork(Dense(20,15), Dense(15,10), Dense(15,10, softplus)) |> gpu
                 state = rand(20,3)  |> gpu #batch of 3 states
@@ -262,7 +264,7 @@ using Flux: params, gradient, unsqueeze
             end
         end
         @testset "CUDA" begin
-            if CUDA.functional()
+            if (@isdefined CUDA) && CUDA.functional()
                 CUDA.allowscalar(false) 
                 rng = CUDA.CURAND.RNG()
                 pre = Dense(20,15) |> gpu
diff --git a/src/ReinforcementLearningExperiments/Project.toml b/src/ReinforcementLearningExperiments/Project.toml
@@ -19,10 +19,11 @@ Weave = "44d3d7a6-8a23-5bf8-98c5-b353f8df5ec9"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [compat]
-CUDA = "5"
+CUDA = "4, 5"
 Distributions = "0.25"
 Flux = "0.13, 0.14"
 IntervalSets = "0.7"
+Metal = "1.0"
 Reexport = "1"
 ReinforcementLearningBase = "0.12"
 ReinforcementLearningCore = "0.13.1"
@@ -35,7 +36,11 @@ julia = "1.9"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [targets]
-test = ["CUDA", "Test"]
+test = ["Metal", "Preferences", "Test", "UUIDs"]
diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/CFR/JuliaRL_DeepCFR_OpenSpiel.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/CFR/JuliaRL_DeepCFR_OpenSpiel.jl
@@ -59,7 +59,7 @@ function RLCore.Experiment(
         n_training_steps_Π = 2000,
         batchsize_V = 2048,
         batchsize_Π = 2048,
-        initializer = glorot_normal(CUDA.CURAND.default_rng()),
+        initializer = glorot_normal((@isdefined CUDA) && CUDA.functional() ? CUDA.CURAND.RNG() : rng),
     )
     Experiment(p, env, StopAfterStep(500, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), "# run DeepcCFR on leduc_poker")
 end
diff --git a/src/ReinforcementLearningExperiments/deps/experiments/experiments/DQN/Dopamine_IQN_Atari.jl b/src/ReinforcementLearningExperiments/deps/experiments/experiments/DQN/Dopamine_IQN_Atari.jl
@@ -182,7 +182,7 @@ function RLCore.Experiment(
 )
     rng = Random.default_rng()
     Random.seed!(rng, seed)
-    device_rng = CUDA.functional() ? CUDA.CURAND.RNG() : rng
+    device_rng = ((@isdefined CUDA) && CUDA.functional()) ? CUDA.CURAND.RNG() : rng
     Random.seed!(device_rng, isnothing(seed) ? nothing : hash(seed + 1))
 
     if isnothing(save_dir)
diff --git a/src/ReinforcementLearningExperiments/test/runtests.jl b/src/ReinforcementLearningExperiments/test/runtests.jl
@@ -1,7 +1,19 @@
-using ReinforcementLearningExperiments
-using CUDA
+using UUIDs
+using Preferences
+
+if Sys.isapple() && Sys.ARCH === :aarch64
+    flux_uuid = UUID("587475ba-b771-5e3f-ad9e-33799f191a9c")
+    set_preferences!(flux_uuid, "gpu_backend" => "Metal")
+
+    using Metal
+else
+    using CUDA, cuDNN
+    CUDA.allowscalar(false)
+end
 
-CUDA.allowscalar(false)
+@info "Flux.GPU_BACKEND = $(Flux.GPU_BACKEND)"
+
+using ReinforcementLearningExperiments
 
 run(E`JuliaRL_BasicDQN_CartPole`)
 run(E`JuliaRL_DQN_CartPole`)
diff --git a/src/ReinforcementLearningZoo/Project.toml b/src/ReinforcementLearningZoo/Project.toml
@@ -18,7 +18,6 @@ ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
 ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
-cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [compat]
 CUDA = "4, 5"
@@ -27,6 +26,7 @@ Distributions = "0.25"
 Flux = "0.13, 0.14"
 Functors = "0.2, 0.3, 0.4"
 LogExpFunctions = "0.3"
+
 MLUtils = "0.4"
 NNlib = "0.8, 0.9"
 Optim = "1"
@@ -38,7 +38,11 @@ cuDNN = "1"
 julia = "1.9"
 
 [extras]
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["Test"]
+test = ["Metal", "Preferences", "Test", "UUIDs", "cuDNN"]
diff --git a/src/ReinforcementLearningZoo/test/runtests.jl b/src/ReinforcementLearningZoo/test/runtests.jl
@@ -1,5 +1,17 @@
-using Test
+using UUIDs
+using Preferences
+
+if Sys.isapple()
+    flux_uuid = UUID("587475ba-b771-5e3f-ad9e-33799f191a9c")
+    set_preferences!(flux_uuid, "gpu_backend" => "Metal")
 
+    using Metal
+else
+    using CUDA, cuDNN
+    CUDA.allowscalar(false)
+end
+
+using Test
 @testset "ReinforcementLearningZoo.jl" begin
     # include("cfr/cfr.jl")
     # include("hooks.jl")

Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ function RLCore.Experiment(`
`59`	`59`	`n_training_steps_Π = 2000,`
`60`	`60`	`batchsize_V = 2048,`
`61`	`61`	`batchsize_Π = 2048,`
`62`		`- initializer = glorot_normal(CUDA.CURAND.default_rng()),`
	`62`	`+ initializer = glorot_normal((@isdefined CUDA) && CUDA.functional() ? CUDA.CURAND.RNG() : rng),`
`63`	`63`	`)`
`64`	`64`	`Experiment(p, env, StopAfterStep(500, is_show_progress=!haskey(ENV, "CI")), EmptyHook(), "# run DeepcCFR on leduc_poker")`
`65`	`65`	`end`
Original file line number	Diff line number	Diff line change
`@@ -182,7 +182,7 @@ function RLCore.Experiment(`
`182`	`182`	`)`
`183`	`183`	`rng = Random.default_rng()`
`184`	`184`	`Random.seed!(rng, seed)`
`185`		`- device_rng = CUDA.functional() ? CUDA.CURAND.RNG() : rng`
	`185`	`+ device_rng = ((@isdefined CUDA) && CUDA.functional()) ? CUDA.CURAND.RNG() : rng`
`186`	`186`	`Random.seed!(device_rng, isnothing(seed) ? nothing : hash(seed + 1))`
`187`	`187`
`188`	`188`	`if isnothing(save_dir)`