diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py index e83d4c61..55ad40d5 100644 --- a/all/experiments/experiment.py +++ b/all/experiments/experiment.py @@ -53,7 +53,7 @@ def test(self, episodes=100): def _log_training_episode(self, returns, fps): if not self._quiet: - print('episode: {}, frame: {}, fps: {}, returns: {}'.format(self.episode, self.frame, fps, returns)) + print('episode: {}, frame: {}, fps: {}, returns: {}'.format(self.episode, self.frame, int(fps), returns)) if returns > self._best_returns: self._best_returns = returns self._returns100.append(returns) diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index 0190bd55..d93c2658 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -1,4 +1,5 @@ +from timeit import default_timer as timer import torch import numpy as np from all.environments import State @@ -26,6 +27,8 @@ def __init__( self._returns = [] self._frame = 1 self._episode = 1 + self._episode_start_times = [] * self._n_envs + self._episode_start_frames = [] * self._n_envs # test state self._test_episodes = 100 @@ -66,6 +69,8 @@ def _reset(self): device=self._envs[0].device ) self._returns = rewards + self._episode_start_times = [timer()] * self._n_envs + self._episode_start_frames = [self._frame] * self._n_envs def _step(self): states = self._aggregate_states() @@ -80,10 +85,12 @@ def _step_envs(self, actions): if env.done: self._returns[i] += env.reward - self._log_training_episode(self._returns[i].item(), 0) + self._log_training_episode(self._returns[i].item(), self._fps(i)) env.reset() self._returns[i] = 0 self._episode += 1 + self._episode_start_times[i] = timer() + self._episode_start_frames[i] = self._frame else: action = actions[i] if action is not None: @@ -134,5 +141,9 @@ def _aggregate_rewards(self): device=self._envs[0].device ) + def _fps(self, i): + end_time = timer() + return (self._frame - self._episode_start_frames[i]) / (end_time - self._episode_start_times[i]) + def _make_writer(self, agent_name, env_name, write_loss): return ExperimentWriter(self, agent_name, env_name, loss=write_loss) diff --git a/setup.py b/setup.py index 8bd0c06d..df361941 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="autonomous-learning-library", - version="0.5.0", + version="0.5.1", description=("A library for building reinforcement learning agents in Pytorch"), packages=find_packages(), url="https://github.com/cpnota/autonomous-learning-library.git",