Open
Description
See JuliaReinforcementLearning/ReinforcementLearning.jl#980
To reproduce, check out the EpisodeSampleRatioController branch in my fork and run the JuliaRL_NFQ_CartPole
experiment
Stacktrace
ERROR: type NamedTuple has no field terminal
Stacktrace:
[1] getproperty
@ ./Base.jl:37 [inlined]
[2] on_insert!
@ ~/.julia/dev/ReinforcementLearningTrajectories/src/controllers.jl:78 [inlined]
[3] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, n::Int64, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
@ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:114
[4] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
@ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:113
[5] push!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
@ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:105
[6] push!(agent::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, #unused#::PreEpisodeStage, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64})
@ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl:44
[7] macro expansion
@ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
[8] _run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal)
@ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:92
[9] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal)
@ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:75
[10] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64})
@ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:74
[11] run(ex::Experiment{:JuliaRL_NFQ_CartPole})
@ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:63
[12] top-level scope
@ REPL[5]:1
[13] top-level scope
@ ~/.julia/packages/CUDA/35NC6/src/initialization.jl:190
Metadata
Metadata
Assignees
Labels
No labels