You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: src/ReinforcementLearningCore/src/policies/learners/td_learner.jl
+16-16Lines changed: 16 additions & 16 deletions
Original file line number
Diff line number
Diff line change
@@ -45,32 +45,32 @@ Update the Q-value of the given state-action pair.
45
45
"""
46
46
functionbellman_update!(
47
47
approx::TabularApproximator,
48
-
s::I1,
49
-
s_plus_one::I2,
50
-
a::I3,
51
-
r::F1,# reward
48
+
state::I1,
49
+
next_state::I2,
50
+
action::I3,
51
+
reward::F1,
52
52
γ::Float64, # discount factor
53
53
) where {I1<:Integer,I2<:Integer,I3<:Integer,F1<:AbstractFloat}
54
54
# Q-learning formula following https://github.com/JuliaPOMDP/TabularTDLearning.jl/blob/25c4d3888e178c51ed1ff448f36b0fcaf7c1d8e8/src/q_learn.jl#LL63C26-L63C95
55
55
# Terminology following https://en.wikipedia.org/wiki/Q-learning
0 commit comments