Skip to content

EpisodeSampleRatioController NamedTuple has no field terminal #63

Open
@CasBex

Description

@CasBex

See JuliaReinforcementLearning/ReinforcementLearning.jl#980

To reproduce, check out the EpisodeSampleRatioController branch in my fork and run the JuliaRL_NFQ_CartPole experiment

Stacktrace

ERROR: type NamedTuple has no field terminal
Stacktrace:
  [1] getproperty
    @ ./Base.jl:37 [inlined]
  [2] on_insert!
    @ ~/.julia/dev/ReinforcementLearningTrajectories/src/controllers.jl:78 [inlined]
  [3] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, n::Int64, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
    @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:114
  [4] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
    @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:113
  [5] push!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}})
    @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:105
  [6] push!(agent::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, #unused#::PreEpisodeStage, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64})
    @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl:44
  [7] macro expansion
    @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined]
  [8] _run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal)
    @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:92
  [9] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal)
    @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:75
 [10] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64})
    @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:74
 [11] run(ex::Experiment{:JuliaRL_NFQ_CartPole})
    @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:63
 [12] top-level scope
    @ REPL[5]:1
 [13] top-level scope
    @ ~/.julia/packages/CUDA/35NC6/src/initialization.jl:190

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions