Make QBasedPolicy general for AbstractLearner s (#1069)

dharux · web-flow · commit 30f310506f14 · 2024-04-18T13:32:44.000+02:00
diff --git a/src/ReinforcementLearningCore/src/policies/q_based_policy.jl b/src/ReinforcementLearningCore/src/policies/q_based_policy.jl
@@ -10,32 +10,32 @@ action of an environment at its current state. It is typically a table or a neur
 QBasedPolicy can be queried for an action with `RLBase.plan!`, the explorer will affect the action selection
 accordingly.
 """
-struct QBasedPolicy{L<:TDLearner,E<:AbstractExplorer} <: AbstractPolicy
+struct QBasedPolicy{L<:AbstractLearner,E<:AbstractExplorer} <: AbstractPolicy
     "estimate the Q value"
     learner::L
     "select the action based on Q values calculated by the learner"
     explorer::E
 
-    function QBasedPolicy(; learner::L, explorer::E) where {L<:TDLearner, E<:AbstractExplorer}
+    function QBasedPolicy(; learner::L, explorer::E) where {L<:AbstractLearner, E<:AbstractExplorer}
         new{L,E}(learner, explorer)
     end
 
-    function QBasedPolicy(learner::L, explorer::E) where {L<:TDLearner, E<:AbstractExplorer}
+    function QBasedPolicy(learner::L, explorer::E) where {L<:AbstractLearner, E<:AbstractExplorer}
         new{L,E}(learner, explorer)
     end
 end
 
 Flux.@layer QBasedPolicy trainable=(learner,)
 
-function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E) where {Ex<:AbstractExplorer,L<:TDLearner,E<:AbstractEnv}
+function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E) where {Ex<:AbstractExplorer,L<:AbstractLearner,E<:AbstractEnv}
     RLBase.plan!(policy.explorer, policy.learner, env)
 end
 
-function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E, player::Player) where {Ex<:AbstractExplorer,L<:TDLearner,E<:AbstractEnv, Player<:AbstractPlayer}
+function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E, player::Player) where {Ex<:AbstractExplorer,L<:AbstractLearner,E<:AbstractEnv, Player<:AbstractPlayer}
     RLBase.plan!(policy.explorer, policy.learner, env, player)
 end
 
-RLBase.prob(policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:TDLearner,Ex<:AbstractExplorer} =
+RLBase.prob(policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:AbstractLearner,Ex<:AbstractExplorer} =
     prob(policy.explorer, forward(policy.learner, env), legal_action_space_mask(env))
 
 #the internal learner defines the optimization stage.