fixes

Jeremiah Lewis · Jeremiah Lewis · commit 5aed0b2d6c9a · 2024-03-19T15:41:57.000+01:00
diff --git a/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
@@ -16,7 +16,8 @@ For `table` of 2-d, it will serve as a state-action value approximator.
 function TabularApproximator(table::A, opt::O) where {A<:AbstractArray,O}
     n = ndims(table)
     n <= 2 || throw(ArgumentError("the dimension of table must be <= 2"))
-    TabularApproximator{A,O}(table, opt)
+    optimiser_state = Flux.setup(optimiser, table)
+    TabularApproximator{A,O}(table, optimiser_state)
 end
 
 TabularVApproximator(; n_state, opt, init = 0.0) =
diff --git a/src/ReinforcementLearningCore/test/policies/q_based_policy.jl b/src/ReinforcementLearningCore/test/policies/q_based_policy.jl
@@ -77,8 +77,10 @@
         )
         t = (state=2, action=3)
         push!(trajectory, t)
-        t = (next_state=3, reward=5.0, terminal=false)
+        next_state = 4
+        t = (action=3, state=next_state, reward=5.0, terminal=false)
         push!(trajectory, t)
+        trajectory.container[1]
         RLBase.optimise!(policy, PostActStage(), trajectory)
         # Add assertions here
     end