From a56ccc8ec51ba556d9d7ff80e18c7572ea90d506 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Mon, 3 Oct 2022 10:03:13 -0400 Subject: [PATCH 01/26] change opencv dependency to headless and upgrade to version 4 (#275) * change opencv dependency to headless and upgrade to version 4 * revert test changes --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 812c7427..af46231e 100644 --- a/setup.py +++ b/setup.py @@ -63,7 +63,7 @@ "gym~={}".format(GYM_VERSION), # common environment interface "numpy>=1.22.3", # math library "matplotlib>=3.5.1", # plotting library - "opencv-python~=3.4.0", # used by atari wrappers + "opencv-python-headless>=4.0.0", # used by atari wrappers "torch>=1.11.0", # core deep learning library "tensorboard>=2.8.0", # logging and visualization "cloudpickle>=2.0.0", # used to copy environments From 2052d1641dd1745bf642832a5e837f31daa9cec4 Mon Sep 17 00:00:00 2001 From: "Nota, Christopher" Date: Wed, 29 Nov 2023 08:47:14 -0500 Subject: [PATCH 02/26] switch to gymnasium and update imports --- all/agents/ddpg.py | 2 +- all/environments/atari.py | 4 ++-- all/environments/atari_wrappers.py | 24 +++++++++++------------ all/environments/duplicate_env.py | 2 +- all/environments/duplicate_env_test.py | 2 +- all/environments/gym.py | 14 ++++++------- all/environments/gym_test.py | 2 +- all/environments/multiagent_atari.py | 2 +- all/environments/multiagent_pettingzoo.py | 6 +++--- all/environments/vector_env.py | 2 +- all/environments/vector_env_test.py | 4 ++-- all/experiments/watch.py | 2 +- all/nn/nn_test.py | 4 ++-- all/policies/deterministic.py | 2 +- all/policies/deterministic_test.py | 2 +- all/policies/gaussian.py | 2 +- all/policies/gaussian_test.py | 2 +- all/policies/soft_deterministic.py | 2 +- all/policies/soft_deterministic_test.py | 2 +- setup.py | 10 +++++----- 20 files changed, 46 insertions(+), 46 deletions(-) diff --git a/all/agents/ddpg.py b/all/agents/ddpg.py index 3cbec351..6a01f0ae 100644 --- a/all/agents/ddpg.py +++ b/all/agents/ddpg.py @@ -20,7 +20,7 @@ class DDPG(Agent): q (QContinuous): An Approximation of the continuous action Q-function. policy (DeterministicPolicy): An Approximation of a deterministic policy. replay_buffer (ReplayBuffer): The experience replay buffer. - action_space (gym.spaces.Box): Description of the action space. + action_space (gymnasium.spaces.Box): Description of the action space. discount_factor (float): Discount factor for future rewards. minibatch_size (int): The number of experiences to sample in each training update. noise (float): the amount of noise to add to each action (before scaling). diff --git a/all/environments/atari.py b/all/environments/atari.py index ea123889..8db1606a 100644 --- a/all/environments/atari.py +++ b/all/environments/atari.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import State from .duplicate_env import DuplicateEnvironment @@ -16,7 +16,7 @@ class AtariEnvironment(Environment): def __init__(self, name, device='cpu'): # construct the environment - env = gym.make(name + "NoFrameskip-v4") + env = gymnasium.make(name + "NoFrameskip-v4") # apply a subset of wrappers env = NoopResetEnv(env, noop_max=30) diff --git a/all/environments/atari_wrappers.py b/all/environments/atari_wrappers.py index 3d2b89d0..71e05a2a 100644 --- a/all/environments/atari_wrappers.py +++ b/all/environments/atari_wrappers.py @@ -7,18 +7,18 @@ import os os.environ.setdefault('PATH', '') from collections import deque -import gym -from gym import spaces +import gymnasium +from gymnasium import spaces import cv2 cv2.ocl.setUseOpenCL(False) -class NoopResetEnv(gym.Wrapper): +class NoopResetEnv(gymnasium.Wrapper): def __init__(self, env, noop_max=30): '''Sample initial states by taking random number of no-ops on reset. No-op is assumed to be action 0. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None self.noop_action = 0 @@ -43,14 +43,14 @@ def step(self, ac): return self.env.step(ac) -class FireResetEnv(gym.Wrapper): +class FireResetEnv(gymnasium.Wrapper): def __init__(self, env): ''' Take action on reset for environments that are fixed until firing. Important: This was modified to also fire on lives lost. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == 'FIRE' assert len(env.unwrapped.get_action_meanings()) >= 3 self.lives = 0 @@ -84,10 +84,10 @@ def lost_life(self): return lives < self.lives and lives > 0 -class MaxAndSkipEnv(gym.Wrapper): +class MaxAndSkipEnv(gymnasium.Wrapper): def __init__(self, env, skip=4): '''Return only every `skip`-th frame''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.uint8) self._skip = skip @@ -115,7 +115,7 @@ def reset(self, **kwargs): return self.env.reset(**kwargs) -class WarpFrame(gym.ObservationWrapper): +class WarpFrame(gymnasium.ObservationWrapper): def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): ''' Warp frames to 84x84 as done in the Nature paper and later work. @@ -132,7 +132,7 @@ def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None else: num_colors = 3 - new_space = gym.spaces.Box( + new_space = gymnasium.spaces.Box( low=0, high=255, shape=(self._height, self._width, num_colors), @@ -168,14 +168,14 @@ def observation(self, obs): return np.moveaxis(obs, -1, 0) -class LifeLostEnv(gym.Wrapper): +class LifeLostEnv(gymnasium.Wrapper): def __init__(self, env): ''' Modified wrapper to add a "life_lost" key to info. This allows the agent Body to make the episode as done if it desires. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) self.lives = 0 def reset(self): diff --git a/all/environments/duplicate_env.py b/all/environments/duplicate_env.py index e83f601a..0f7cec42 100644 --- a/all/environments/duplicate_env.py +++ b/all/environments/duplicate_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import State from ._vector_environment import VectorEnvironment diff --git a/all/environments/duplicate_env_test.py b/all/environments/duplicate_env_test.py index cac235bd..23c2c66e 100644 --- a/all/environments/duplicate_env_test.py +++ b/all/environments/duplicate_env_test.py @@ -1,5 +1,5 @@ import unittest -import gym +import gymnasium import torch from all.environments import DuplicateEnvironment, GymEnvironment diff --git a/all/environments/gym.py b/all/environments/gym.py index 337b97d8..73b820d1 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -1,19 +1,19 @@ -import gym +import gymnasium import torch from all.core import State from ._environment import Environment from .duplicate_env import DuplicateEnvironment -gym.logger.set_level(40) +gymnasium.logger.set_level(40) class GymEnvironment(Environment): ''' - A wrapper for OpenAI Gym environments (see: https://gym.openai.com). + A wrapper for OpenAI Gym environments (see: https://gymnasium.openai.com). This wrapper converts the output of the gym environment to PyTorch tensors, and wraps them in a State object that can be passed to an Agent. This constructor supports either a string, which will be passed to the - gym.make(name) function, or a preconstructed gym environment. Note that + gymnasium.make(name) function, or a preconstructed gym environment. Note that in the latter case, the name property is set to be the whatever the name of the outermost wrapper on the environment is. @@ -24,7 +24,7 @@ class GymEnvironment(Environment): ''' def __init__(self, id, device=torch.device('cpu'), name=None): - self._env = gym.make(id) + self._env = gymnasium.make(id) self._id = id self._name = name if name else id self._state = None @@ -85,9 +85,9 @@ def device(self): def _convert(self, action): if torch.is_tensor(action): - if isinstance(self.action_space, gym.spaces.Discrete): + if isinstance(self.action_space, gymnasium.spaces.Discrete): return action.item() - if isinstance(self.action_space, gym.spaces.Box): + if isinstance(self.action_space, gymnasium.spaces.Box): return action.cpu().detach().numpy().reshape(-1) raise TypeError("Unknown action space type") return action diff --git a/all/environments/gym_test.py b/all/environments/gym_test.py index 7ec4d80d..31dea2e2 100644 --- a/all/environments/gym_test.py +++ b/all/environments/gym_test.py @@ -1,5 +1,5 @@ import unittest -import gym +import gymnasium from all.environments import GymEnvironment diff --git a/all/environments/multiagent_atari.py b/all/environments/multiagent_atari.py index 1cc9feb4..626c92d9 100644 --- a/all/environments/multiagent_atari.py +++ b/all/environments/multiagent_atari.py @@ -1,7 +1,7 @@ import importlib import numpy as np import torch -import gym +import gymnasium from all.core import MultiagentState from ._multiagent_environment import MultiagentEnvironment from .multiagent_pettingzoo import MultiagentPettingZooEnv diff --git a/all/environments/multiagent_pettingzoo.py b/all/environments/multiagent_pettingzoo.py index 54519e76..7e1716a3 100644 --- a/all/environments/multiagent_pettingzoo.py +++ b/all/environments/multiagent_pettingzoo.py @@ -2,7 +2,7 @@ import numpy as np import torch import cloudpickle -import gym +import gymnasium from all.core import MultiagentState from ._multiagent_environment import MultiagentEnvironment @@ -104,9 +104,9 @@ def _convert(self, action): agent = self._env.agent_selection action_space = self.action_space(agent) if torch.is_tensor(action): - if isinstance(action_space, gym.spaces.Discrete): + if isinstance(action_space, gymnasium.spaces.Discrete): return action.item() - if isinstance(action_space, gym.spaces.Box): + if isinstance(action_space, gymnasium.spaces.Box): return action.cpu().detach().numpy().reshape(-1) raise TypeError("Unknown action space type") return action diff --git a/all/environments/vector_env.py b/all/environments/vector_env.py index b8f56b70..b57150e9 100644 --- a/all/environments/vector_env.py +++ b/all/environments/vector_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import StateArray from ._vector_environment import VectorEnvironment diff --git a/all/environments/vector_env_test.py b/all/environments/vector_env_test.py index b72df860..3eaaa864 100644 --- a/all/environments/vector_env_test.py +++ b/all/environments/vector_env_test.py @@ -1,11 +1,11 @@ import unittest -import gym +import gymnasium import torch from all.environments import GymVectorEnvironment, GymEnvironment, DuplicateEnvironment def make_vec_env(num_envs=3): - env = gym.vector.SyncVectorEnv([lambda: gym.make('CartPole-v0')] * num_envs) + env = gymnasium.vector.SyncVectorEnv([lambda: gymnasium.make('CartPole-v0')] * num_envs) return env diff --git a/all/experiments/watch.py b/all/experiments/watch.py index 5c0b1d03..9e346d7a 100644 --- a/all/experiments/watch.py +++ b/all/experiments/watch.py @@ -1,7 +1,7 @@ import os import time import torch -import gym +import gymnasium from all.agents import Agent diff --git a/all/nn/nn_test.py b/all/nn/nn_test.py index a1a7bb56..5e465532 100644 --- a/all/nn/nn_test.py +++ b/all/nn/nn_test.py @@ -2,7 +2,7 @@ import numpy as np import torch import torch_testing as tt -import gym +import gymnasium from all import nn from all.core import StateArray @@ -64,7 +64,7 @@ def test_list(self): ) def test_tanh_action_bound(self): - space = gym.spaces.Box(np.array([-1.0, 10.0]), np.array([1, 20])) + space = gymnasium.spaces.Box(np.array([-1.0, 10.0]), np.array([1, 20])) model = nn.TanhActionBound(space) x = torch.tensor([[100.0, 100], [-100, -100], [-100, 100], [0, 0]]) tt.assert_almost_equal( diff --git a/all/policies/deterministic.py b/all/policies/deterministic.py index a5b8991f..e69253ec 100644 --- a/all/policies/deterministic.py +++ b/all/policies/deterministic.py @@ -13,7 +13,7 @@ class DeterministicPolicy(Approximation): and the output shape should be the same as the shape of the action space. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/deterministic_test.py b/all/policies/deterministic_test.py index f018a0d1..683800a6 100644 --- a/all/policies/deterministic_test.py +++ b/all/policies/deterministic_test.py @@ -2,7 +2,7 @@ import torch import torch_testing as tt import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from all import nn from all.approximation import FixedTarget, DummyCheckpointer from all.core import State diff --git a/all/policies/gaussian.py b/all/policies/gaussian.py index 977a73b1..eaf76c67 100644 --- a/all/policies/gaussian.py +++ b/all/policies/gaussian.py @@ -22,7 +22,7 @@ class GaussianPolicy(Approximation): and the last n outputs will be the logarithm of the variance. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/gaussian_test.py b/all/policies/gaussian_test.py index d514c01c..3bcaf83f 100644 --- a/all/policies/gaussian_test.py +++ b/all/policies/gaussian_test.py @@ -3,7 +3,7 @@ import torch from torch import nn import torch_testing as tt -from gym.spaces import Box +from gymnasium.spaces import Box from all.approximation import DummyCheckpointer from all.core import State from all.policies import GaussianPolicy diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py index 11f7ca25..ffb8a0d0 100644 --- a/all/policies/soft_deterministic.py +++ b/all/policies/soft_deterministic.py @@ -15,7 +15,7 @@ class SoftDeterministicPolicy(Approximation): and the second n outputs will be the logarithm of the variance. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/soft_deterministic_test.py b/all/policies/soft_deterministic_test.py index 9af809f8..f8c4a62a 100644 --- a/all/policies/soft_deterministic_test.py +++ b/all/policies/soft_deterministic_test.py @@ -2,7 +2,7 @@ import torch import numpy as np import torch_testing as tt -from gym.spaces import Box +from gymnasium.spaces import Box from all import nn from all.approximation import DummyCheckpointer from all.core import State diff --git a/setup.py b/setup.py index 933c89cc..ecfe3c20 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,16 @@ from setuptools import setup, find_packages -GYM_VERSION = "0.23.1" -PETTINGZOO_VERSION = "1.17.0" +GYM_VERSION = "0.29.1" +PETTINGZOO_VERSION = "1.24.2" extras = { "atari": [ - "gym[atari, accept-rom-license]~={}".format(GYM_VERSION), + "gymnasium[atari, accept-rom-license]~={}".format(GYM_VERSION), ], "box2d": [ - "gym[box2d]~={}".format(GYM_VERSION), + "gymnasium[box2d]~={}".format(GYM_VERSION), ], "pybullet": [ "pybullet>=3.2.2", @@ -60,7 +60,7 @@ ], }, install_requires=[ - "gym~={}".format(GYM_VERSION), # common environment interface + "gymnasium~={}".format(GYM_VERSION), # common environment interface "numpy>=1.22.3", # math library "matplotlib>=3.5.1", # plotting library "opencv-python-headless>=4.0.0", # used by atari wrappers From 8e88660596c8b26de844bbf54ebb3ff3b4dcdf20 Mon Sep 17 00:00:00 2001 From: "Nota, Christopher" Date: Wed, 29 Nov 2023 09:14:31 -0500 Subject: [PATCH 03/26] update state and state test --- all/core/state.py | 38 +++++++++++++++++++------------------- all/core/state_test.py | 19 +++++++++++++++---- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/all/core/state.py b/all/core/state.py index f39ca6d7..d556d312 100644 --- a/all/core/state.py +++ b/all/core/state.py @@ -158,39 +158,39 @@ def update(self, key, value): return self.__class__(x, device=self.device) @classmethod - def from_gym(cls, state, device='cpu', dtype=np.float32): + def from_gym(cls, gym_output, device='cpu', dtype=np.float32): """ Constructs a State object given the return value of an OpenAI gym reset()/step(action) call. Args: - state (tuple): The return value of an OpenAI gym reset()/step(action) call + gym_output (tuple): The output value of an OpenAI gym reset()/step(action) call device (string): The device on which to store resulting tensors. dtype: The type of the observation. Returns: A State object. """ - if not isinstance(state, tuple): - return State({ - 'observation': torch.from_numpy( + if not isinstance(gym_output, tuple) and (len(gym_output) == 2 or len(gym_output) == 5): + raise TypeError(f"gym_output should be a tuple, either (observation, info) or (observation, reward, terminated, truncated, info). Recieved {gym_output}.") + + # extract info from timestep + if len(gym_output) == 5: + observation, reward, terminated, truncated, info = gym_output + if len(gym_output) == 2: + observation, info = gym_output + reward = 0. + terminated = False + truncated = False + x = { + 'observation': torch.from_numpy( np.array( - state, + observation, dtype=dtype ), - ).to(device) - }, device=device) - - observation, reward, done, info = state - observation = torch.from_numpy( - np.array( - observation, - dtype=dtype - ), - ).to(device) - x = { - 'observation': observation, + ).to(device), 'reward': float(reward), - 'done': done, + 'done': terminated or truncated, + 'mask': 1. - terminated } info = info if info else {} for key in info: diff --git a/all/core/state_test.py b/all/core/state_test.py index 17b77abe..0de2814b 100644 --- a/all/core/state_test.py +++ b/all/core/state_test.py @@ -49,16 +49,17 @@ def test_auto_mask_false(self): def test_from_gym_reset(self): observation = np.array([1, 2, 3]) - state = State.from_gym(observation) + state = State.from_gym((observation, {'coolInfo': 3})) tt.assert_equal(state.observation, torch.from_numpy(observation)) self.assertEqual(state.mask, 1.) self.assertEqual(state.done, False) self.assertEqual(state.reward, 0.) self.assertEqual(state.shape, ()) + self.assertEqual(state['coolInfo'], 3.) def test_from_gym_step(self): observation = np.array([1, 2, 3]) - state = State.from_gym((observation, 2., True, {'coolInfo': 3.})) + state = State.from_gym((observation, 2., True, False, {'coolInfo': 3.})) tt.assert_equal(state.observation, torch.from_numpy(observation)) self.assertEqual(state.mask, 0.) self.assertEqual(state.done, True) @@ -66,6 +67,16 @@ def test_from_gym_step(self): self.assertEqual(state['coolInfo'], 3.) self.assertEqual(state.shape, ()) + def test_from_truncated_gym_step(self): + observation = np.array([1, 2, 3]) + state = State.from_gym((observation, 2., False, True, {'coolInfo': 3.})) + tt.assert_equal(state.observation, torch.from_numpy(observation)) + self.assertEqual(state.mask, 1.) + self.assertEqual(state.done, True) + self.assertEqual(state.reward, 2.) + self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.shape, ()) + def test_as_input(self): observation = torch.randn(3, 4) state = State(observation) @@ -79,7 +90,7 @@ def test_as_output(self): def test_apply_mask(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, {})) + state = State.from_gym((observation, 0., True, False, {})) tt.assert_equal(state.apply_mask(observation), torch.zeros(3, 4)) def test_apply(self): @@ -92,7 +103,7 @@ def test_apply(self): def test_apply_done(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, {})) + state = State.from_gym((observation, 0., True, False, {})) model = torch.nn.Conv1d(3, 5, 2) output = state.apply(model, 'observation') self.assertEqual(output.shape, (5, 3)) From abeece26c4ab822ca6b3c49c3f9b331173f8af46 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Wed, 6 Dec 2023 10:03:27 -0500 Subject: [PATCH 04/26] Feature/gymnasium (#278) * Release/0.8.2 (#276) * change opencv dependency to headless and upgrade to version 4 (#275) * change opencv dependency to headless and upgrade to version 4 * revert test changes * version 0.8.2 * switch to gymnasium and update imports * update state and state test * update atari and duplicate env * delete direct atari model test * update vector env * support legacy pybullet env * update multiagent pettingzoo * fix multi-agent atari * fix ma atari preset test * fix single_env_experiment tests * update parallel env experiment test * update multiagent env experiment test * run formatter * remove legacy gym import * add legacy gym dependency * specify pybullet gym dependency correctly * update to pong_v3 in multiagent integration test --- all/agents/ddpg.py | 2 +- all/core/state.py | 244 ++++++++++-------- all/core/state_test.py | 27 +- all/environments/atari.py | 7 +- all/environments/atari_wrappers.py | 62 +++-- all/environments/duplicate_env.py | 13 +- all/environments/duplicate_env_test.py | 3 +- all/environments/gym.py | 26 +- all/environments/gym_test.py | 2 +- all/environments/multiagent_atari.py | 6 +- all/environments/multiagent_atari_test.py | 32 +-- all/environments/multiagent_pettingzoo.py | 16 +- .../multiagent_pettingzoo_test.py | 10 +- all/environments/pybullet.py | 2 +- all/environments/vector_env.py | 17 +- all/environments/vector_env_test.py | 13 +- .../multiagent_env_experiment_test.py | 13 +- .../parallel_env_experiment_test.py | 28 +- all/experiments/single_env_experiment_test.py | 10 +- all/experiments/watch.py | 2 +- all/nn/nn_test.py | 4 +- all/policies/deterministic.py | 2 +- all/policies/deterministic_test.py | 2 +- all/policies/gaussian.py | 2 +- all/policies/gaussian_test.py | 2 +- all/policies/soft_deterministic.py | 2 +- all/policies/soft_deterministic_test.py | 2 +- all/presets/atari/models/test_.py | 146 ----------- all/presets/multiagent_atari_test.py | 9 +- docs/source/conf.py | 2 +- integration/multiagent_atari_test.py | 4 +- setup.py | 75 +++--- 32 files changed, 343 insertions(+), 444 deletions(-) delete mode 100644 all/presets/atari/models/test_.py diff --git a/all/agents/ddpg.py b/all/agents/ddpg.py index 3cbec351..6a01f0ae 100644 --- a/all/agents/ddpg.py +++ b/all/agents/ddpg.py @@ -20,7 +20,7 @@ class DDPG(Agent): q (QContinuous): An Approximation of the continuous action Q-function. policy (DeterministicPolicy): An Approximation of a deterministic policy. replay_buffer (ReplayBuffer): The experience replay buffer. - action_space (gym.spaces.Box): Description of the action space. + action_space (gymnasium.spaces.Box): Description of the action space. discount_factor (float): Discount factor for future rewards. minibatch_size (int): The number of experiences to sample in each training update. noise (float): the amount of noise to add to each action (before scaling). diff --git a/all/core/state.py b/all/core/state.py index f39ca6d7..29e785ae 100644 --- a/all/core/state.py +++ b/all/core/state.py @@ -33,19 +33,19 @@ class State(dict): The torch device on which component tensors are stored. """ - def __init__(self, x, device='cpu', **kwargs): + def __init__(self, x, device="cpu", **kwargs): if not isinstance(x, dict): - x = {'observation': x} + x = {"observation": x} for k, v in kwargs.items(): x[k] = v - if 'observation' not in x: - raise Exception('State must contain an observation') - if 'reward' not in x: - x['reward'] = 0. - if 'done' not in x: - x['done'] = False - if 'mask' not in x: - x['mask'] = 1. - x['done'] + if "observation" not in x: + raise Exception("State must contain an observation") + if "reward" not in x: + x["reward"] = 0.0 + if "done" not in x: + x["done"] = False + if "mask" not in x: + x["mask"] = 1.0 - x["done"] super().__init__(x) self._shape = () self.device = device @@ -71,17 +71,33 @@ def array(cls, list_of_states): v = list_of_states[0][key] try: if isinstance(v, list) and len(v) > 0 and torch.is_tensor(v[0]): - x[key] = torch.stack([torch.stack(state[key]) for state in list_of_states]) + x[key] = torch.stack( + [torch.stack(state[key]) for state in list_of_states] + ) elif torch.is_tensor(v): x[key] = torch.stack([state[key] for state in list_of_states]) else: - x[key] = torch.tensor([state[key] for state in list_of_states], device=device) + x[key] = torch.tensor( + [state[key] for state in list_of_states], device=device + ) except KeyError: - warnings.warn('KeyError while creating StateArray for key "{}", omitting.'.format(key)) + warnings.warn( + 'KeyError while creating StateArray for key "{}", omitting.'.format( + key + ) + ) except ValueError: - warnings.warn('ValueError while creating StateArray for key "{}", omitting.'.format(key)) + warnings.warn( + 'ValueError while creating StateArray for key "{}", omitting.'.format( + key + ) + ) except TypeError: - warnings.warn('TypeError while creating StateArray for key "{}", omitting.'.format(key)) + warnings.warn( + 'TypeError while creating StateArray for key "{}", omitting.'.format( + key + ) + ) return StateArray(x, shape, device=device) @@ -100,7 +116,9 @@ def apply(self, model, *keys): Returns: The output of the model. """ - return self.apply_mask(self.as_output(model(*[self.as_input(key) for key in keys]))) + return self.apply_mask( + self.as_output(model(*[self.as_input(key) for key in keys])) + ) def as_input(self, key): """ @@ -158,39 +176,47 @@ def update(self, key, value): return self.__class__(x, device=self.device) @classmethod - def from_gym(cls, state, device='cpu', dtype=np.float32): + def from_gym(cls, gym_output, device="cpu", dtype=np.float32): """ Constructs a State object given the return value of an OpenAI gym reset()/step(action) call. Args: - state (tuple): The return value of an OpenAI gym reset()/step(action) call + gym_output (tuple): The output value of an OpenAI gym reset()/step(action) call device (string): The device on which to store resulting tensors. dtype: The type of the observation. Returns: A State object. """ - if not isinstance(state, tuple): - return State({ - 'observation': torch.from_numpy( - np.array( - state, - dtype=dtype - ), - ).to(device) - }, device=device) - - observation, reward, done, info = state - observation = torch.from_numpy( - np.array( - observation, - dtype=dtype - ), - ).to(device) + # extract info from timestep + if isinstance(gym_output, tuple) and len(gym_output) == 5: + # gymanisum step() + observation, reward, terminated, truncated, info = gym_output + elif isinstance(gym_output, tuple) and len(gym_output) == 4: + # legacy gym step() + observation, reward, done, info = gym_output + terminated = done + truncated = False + elif isinstance(gym_output, tuple) and len(gym_output) == 2: + # gymnasium reset() + observation, info = gym_output + reward = 0.0 + terminated = False + truncated = False + else: + # legacy gym reset() + observation = gym_output + reward = 0.0 + terminated = False + truncated = False + info = {} x = { - 'observation': observation, - 'reward': float(reward), - 'done': done, + "observation": torch.from_numpy( + np.array(observation, dtype=dtype), + ).to(device), + "reward": float(reward), + "done": terminated or truncated, + "mask": 1.0 - terminated, } info = info if info else {} for key in info: @@ -211,22 +237,22 @@ def to(self, device): @property def observation(self): """A tensor containing the current observation.""" - return self['observation'] + return self["observation"] @property def reward(self): """A float representing the reward for the previous state/action pair.""" - return self['reward'] + return self["reward"] @property def done(self): """A boolean that is true if the state is a terminal state, and false otherwise.""" - return self['done'] + return self["done"] @property def mask(self): """A float that is 1. if the state is non-terminal, or 0. otherwise.""" - return self['mask'] + return self["mask"] @property def shape(self): @@ -239,47 +265,49 @@ def __len__(self): class StateArray(State): """ - An n-dimensional array of environment State objects. + An n-dimensional array of environment State objects. - Internally, all components of the states are represented as n-dimensional tensors. - This allows for batch-style processing and easy manipulation of states. - Usually, a StateArray should be constructed using the State.array() function. + Internally, all components of the states are represented as n-dimensional tensors. + This allows for batch-style processing and easy manipulation of states. + Usually, a StateArray should be constructed using the State.array() function. - Args: - x (dict): - A dictionary containing all state information. - Each value should be a tensor in which the first n-dimensions - match the shape of the StateArray. - The following keys are standard: + Args: + x (dict): + A dictionary containing all state information. + Each value should be a tensor in which the first n-dimensions + match the shape of the StateArray. + The following keys are standard: - observation (torch.tensor) (required): - A tensor representing the observations for each state + observation (torch.tensor) (required): + A tensor representing the observations for each state - reward (torch.FloatTensor) (optional): - A tensor representing rewards for the previous state/action pairs + reward (torch.FloatTensor) (optional): + A tensor representing rewards for the previous state/action pairs - done (torch.BoolTensors) (optional): - A tensor representing whether each state is terminal + done (torch.BoolTensors) (optional): + A tensor representing whether each state is terminal - mask (torch.FloatTensor) (optional): - A tensor representing the mask for each state. - device (string): - The torch device on which component tensors are stored. + mask (torch.FloatTensor) (optional): + A tensor representing the mask for each state. + device (string): + The torch device on which component tensors are stored. """ - def __init__(self, x, shape, device='cpu', **kwargs): + def __init__(self, x, shape, device="cpu", **kwargs): if not isinstance(x, dict): - x = {'observation': x} + x = {"observation": x} for k, v in kwargs.items(): x[k] = v - if 'observation' not in x: - raise Exception('StateArray must contain an observation') - if 'reward' not in x: - x['reward'] = torch.zeros(shape, device=device) - if 'done' not in x: - x['done'] = torch.tensor([False] * np.prod(shape), device=device).view(shape) - if 'mask' not in x: - x['mask'] = 1. - x['done'].float() + if "observation" not in x: + raise Exception("StateArray must contain an observation") + if "reward" not in x: + x["reward"] = torch.zeros(shape, device=device) + if "done" not in x: + x["done"] = torch.tensor([False] * np.prod(shape), device=device).view( + shape + ) + if "mask" not in x: + x["mask"] = 1.0 - x["done"].float() super().__init__(x, device=device) self._shape = shape @@ -305,7 +333,9 @@ def update(self, key, value): def as_input(self, key): value = self[key] - return value.view((np.prod(self.shape), *value.shape[len(self.shape):])).float() + return value.view( + (np.prod(self.shape), *value.shape[len(self.shape):]) + ).float() def as_output(self, tensor): return tensor.view((*self.shape, *tensor.shape[1:])) @@ -343,31 +373,33 @@ def view(self, shape): @property def observation(self): - return self['observation'] + return self["observation"] @property def reward(self): - return self['reward'] + return self["reward"] @property def done(self): - return self['done'] + return self["done"] @property def mask(self): - return self['mask'] + return self["mask"] def __getitem__(self, key): if isinstance(key, slice) or isinstance(key, int): - shape = self['mask'][key].shape + shape = self["mask"][key].shape if len(shape) == 0: return State({k: v[key] for (k, v) in self.items()}, device=self.device) - return StateArray({k: v[key] for (k, v) in self.items()}, shape, device=self.device) + return StateArray( + {k: v[key] for (k, v) in self.items()}, shape, device=self.device + ) if torch.is_tensor(key): # some things may get lost d = {} - shape = self['mask'][key].shape - for (k, v) in self.items(): + shape = self["mask"][key].shape + for k, v in self.items(): try: d[k] = v[key] except KeyError: @@ -389,7 +421,7 @@ def __len__(self): @classmethod def cat(cls, state_array_list, axis=0): - '''Concatenates along batch dimention''' + """Concatenates along batch dimention""" if len(state_array_list) == 0: raise ValueError("cat accepts a non-zero size list of StateArrays") @@ -400,13 +432,15 @@ def cat(cls, state_array_list, axis=0): new_shape = tuple(new_shape) keys = list(state_array_list[0].keys()) for key in keys: - d[key] = torch.cat([state_array[key] for state_array in state_array_list], axis=axis) + d[key] = torch.cat( + [state_array[key] for state_array in state_array_list], axis=axis + ) return StateArray(d, new_shape, device=state_array_list[0].device) def batch_execute(self, minibatch_size, fn): - ''' + """ execute in batches to reduce memory consumption - ''' + """ data = self batch_size = self.shape[0] results = [] @@ -424,17 +458,17 @@ def batch_execute(self, minibatch_size, fn): class MultiagentState(State): - def __init__(self, x, device='cpu', **kwargs): - if 'agent' not in x: - raise Exception('MultiagentState must contain an agent ID') + def __init__(self, x, device="cpu", **kwargs): + if "agent" not in x: + raise Exception("MultiagentState must contain an agent ID") super().__init__(x, device=device, **kwargs) @property def agent(self): - return self['agent'] + return self["agent"] @classmethod - def from_zoo(cls, agent, state, device='cpu', dtype=np.float32): + def from_zoo(cls, agent, state, device="cpu", dtype=np.float32): """ Constructs a State object given the return value of an OpenAI gym reset()/step(action) call. @@ -446,29 +480,15 @@ def from_zoo(cls, agent, state, device='cpu', dtype=np.float32): Returns: A State object. """ - if not isinstance(state, tuple): - return MultiagentState({ - 'agent': agent, - 'observation': torch.from_numpy( - np.array( - state, - dtype=dtype - ), - ).to(device) - }, device=device) - - observation, reward, done, info = state - observation = torch.from_numpy( - np.array( - observation, - dtype=dtype - ), - ).to(device) + observation, reward, terminated, truncated, info = state x = { - 'agent': agent, - 'observation': observation, - 'reward': float(reward), - 'done': done, + "agent": agent, + "observation": torch.from_numpy( + np.array(observation, dtype=dtype), + ).to(device), + "reward": float(reward), + "done": terminated or truncated, + "mask": 1.0 - terminated, } info = info if info else {} for key in info: diff --git a/all/core/state_test.py b/all/core/state_test.py index 17b77abe..6670b553 100644 --- a/all/core/state_test.py +++ b/all/core/state_test.py @@ -49,14 +49,35 @@ def test_auto_mask_false(self): def test_from_gym_reset(self): observation = np.array([1, 2, 3]) - state = State.from_gym(observation) + state = State.from_gym((observation, {'coolInfo': 3})) tt.assert_equal(state.observation, torch.from_numpy(observation)) self.assertEqual(state.mask, 1.) self.assertEqual(state.done, False) self.assertEqual(state.reward, 0.) self.assertEqual(state.shape, ()) + self.assertEqual(state['coolInfo'], 3.) def test_from_gym_step(self): + observation = np.array([1, 2, 3]) + state = State.from_gym((observation, 2., True, False, {'coolInfo': 3.})) + tt.assert_equal(state.observation, torch.from_numpy(observation)) + self.assertEqual(state.mask, 0.) + self.assertEqual(state.done, True) + self.assertEqual(state.reward, 2.) + self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.shape, ()) + + def test_from_truncated_gym_step(self): + observation = np.array([1, 2, 3]) + state = State.from_gym((observation, 2., False, True, {'coolInfo': 3.})) + tt.assert_equal(state.observation, torch.from_numpy(observation)) + self.assertEqual(state.mask, 1.) + self.assertEqual(state.done, True) + self.assertEqual(state.reward, 2.) + self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.shape, ()) + + def test_legacy_gym_step(self): observation = np.array([1, 2, 3]) state = State.from_gym((observation, 2., True, {'coolInfo': 3.})) tt.assert_equal(state.observation, torch.from_numpy(observation)) @@ -79,7 +100,7 @@ def test_as_output(self): def test_apply_mask(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, {})) + state = State.from_gym((observation, 0., True, False, {})) tt.assert_equal(state.apply_mask(observation), torch.zeros(3, 4)) def test_apply(self): @@ -92,7 +113,7 @@ def test_apply(self): def test_apply_done(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, {})) + state = State.from_gym((observation, 0., True, False, {})) model = torch.nn.Conv1d(3, 5, 2) output = state.apply(model, 'observation') self.assertEqual(output.shape, (5, 3)) diff --git a/all/environments/atari.py b/all/environments/atari.py index ea123889..204fbbdf 100644 --- a/all/environments/atari.py +++ b/all/environments/atari.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import State from .duplicate_env import DuplicateEnvironment @@ -16,7 +16,7 @@ class AtariEnvironment(Environment): def __init__(self, name, device='cpu'): # construct the environment - env = gym.make(name + "NoFrameskip-v4") + env = gymnasium.make(name + "NoFrameskip-v4") # apply a subset of wrappers env = NoopResetEnv(env, noop_max=30) @@ -37,8 +37,7 @@ def __init__(self, name, device='cpu'): self._device = device def reset(self): - state = self._env.reset(), 0., False, None - self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device) + self._state = State.from_gym(self._env.reset(), dtype=self._env.observation_space.dtype, device=self._device) return self._state def step(self, action): diff --git a/all/environments/atari_wrappers.py b/all/environments/atari_wrappers.py index 3d2b89d0..df6013ef 100644 --- a/all/environments/atari_wrappers.py +++ b/all/environments/atari_wrappers.py @@ -7,18 +7,18 @@ import os os.environ.setdefault('PATH', '') from collections import deque -import gym -from gym import spaces +import gymnasium +from gymnasium import spaces import cv2 cv2.ocl.setUseOpenCL(False) -class NoopResetEnv(gym.Wrapper): +class NoopResetEnv(gymnasium.Wrapper): def __init__(self, env, noop_max=30): '''Sample initial states by taking random number of no-ops on reset. No-op is assumed to be action 0. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None self.noop_action = 0 @@ -34,8 +34,8 @@ def reset(self, **kwargs): assert noops > 0 obs = None for _ in range(noops): - obs, _, done, _ = self.env.step(self.noop_action) - if done: + obs, _, terminated, truncated, _ = self.env.step(self.noop_action) + if terminated or truncated: obs = self.env.reset(**kwargs) return obs @@ -43,14 +43,14 @@ def step(self, ac): return self.env.step(ac) -class FireResetEnv(gym.Wrapper): +class FireResetEnv(gymnasium.Wrapper): def __init__(self, env): ''' Take action on reset for environments that are fixed until firing. Important: This was modified to also fire on lives lost. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == 'FIRE' assert len(env.unwrapped.get_action_meanings()) >= 3 self.lives = 0 @@ -58,36 +58,35 @@ def __init__(self, env): def reset(self, **kwargs): self.env.reset(**kwargs) - obs, _ = self.fire() + obs, info = self.fire() self.lives = self.env.unwrapped.ale.lives() - return obs + return obs, info def step(self, action): - obs, reward, done, info = self.env.step(action) + obs, reward, terminated, truncated, info = self.env.step(action) if self.lost_life(): - obs, done = self.fire() + obs, info = self.fire() self.lives = self.env.unwrapped.ale.lives() - return obs, reward, done, info + return obs, reward, terminated, truncated, info def fire(self): - obs, _, done, _ = self.env.step(1) - if done: + obs, _, terminated, truncated, info = self.env.step(1) + if terminated or truncated: self.env.reset() - obs, _, done, _ = self.env.step(2) - if done: - obs = self.env.reset() - done = False - return obs, done + obs, _, terminated, truncated, info = self.env.step(2) + if terminated or truncated: + obs, info = self.env.reset() + return obs, info def lost_life(self): lives = self.env.unwrapped.ale.lives() return lives < self.lives and lives > 0 -class MaxAndSkipEnv(gym.Wrapper): +class MaxAndSkipEnv(gymnasium.Wrapper): def __init__(self, env, skip=4): '''Return only every `skip`-th frame''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.uint8) self._skip = skip @@ -95,27 +94,26 @@ def __init__(self, env, skip=4): def step(self, action): '''Repeat action, sum reward, and max over last observations.''' total_reward = 0.0 - done = None for i in range(self._skip): - obs, reward, done, info = self.env.step(action) + obs, reward, terminated, truncated, info = self.env.step(action) if i == self._skip - 2: self._obs_buffer[0] = obs if i == self._skip - 1: self._obs_buffer[1] = obs total_reward += reward - if done: + if terminated or truncated: break # Note that the observation on the done=True frame # doesn't matter max_frame = self._obs_buffer.max(axis=0) - return max_frame, total_reward, done, info + return max_frame, total_reward, terminated, truncated, info def reset(self, **kwargs): return self.env.reset(**kwargs) -class WarpFrame(gym.ObservationWrapper): +class WarpFrame(gymnasium.ObservationWrapper): def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): ''' Warp frames to 84x84 as done in the Nature paper and later work. @@ -132,7 +130,7 @@ def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None else: num_colors = 3 - new_space = gym.spaces.Box( + new_space = gymnasium.spaces.Box( low=0, high=255, shape=(self._height, self._width, num_colors), @@ -168,14 +166,14 @@ def observation(self, obs): return np.moveaxis(obs, -1, 0) -class LifeLostEnv(gym.Wrapper): +class LifeLostEnv(gymnasium.Wrapper): def __init__(self, env): ''' Modified wrapper to add a "life_lost" key to info. This allows the agent Body to make the episode as done if it desires. ''' - gym.Wrapper.__init__(self, env) + gymnasium.Wrapper.__init__(self, env) self.lives = 0 def reset(self): @@ -183,9 +181,9 @@ def reset(self): return self.env.reset() def step(self, action): - obs, reward, done, _ = self.env.step(action) + obs, reward, terminated, truncated, _ = self.env.step(action) lives = self.env.unwrapped.ale.lives() life_lost = (lives < self.lives and lives > 0) self.lives = lives info = {'life_lost': life_lost} - return obs, reward, done, info + return obs, reward, terminated, truncated, info diff --git a/all/environments/duplicate_env.py b/all/environments/duplicate_env.py index e83f601a..42fbd3b4 100644 --- a/all/environments/duplicate_env.py +++ b/all/environments/duplicate_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import State from ._vector_environment import VectorEnvironment @@ -32,8 +32,11 @@ def __init__(self, envs, device=torch.device('cpu')): def name(self): return self._name - def reset(self): - self._state = State.array([sub_env.reset() for sub_env in self._envs]) + def reset(self, seed=None, **kwargs): + if seed is not None: + self._state = State.array([sub_env.reset(seed=(seed + i), **kwargs) for i, sub_env in enumerate(self._envs)]) + else: + self._state = State.array([sub_env.reset(**kwargs) for sub_env in self._envs]) return self._state def step(self, actions): @@ -48,10 +51,6 @@ def step(self, actions): def close(self): return self._env.close() - def seed(self, seed): - for i, env in enumerate(self._envs): - env.seed(seed + i) - @property def state_space(self): return self._envs[0].observation_space diff --git a/all/environments/duplicate_env_test.py b/all/environments/duplicate_env_test.py index cac235bd..1c0c750e 100644 --- a/all/environments/duplicate_env_test.py +++ b/all/environments/duplicate_env_test.py @@ -1,5 +1,5 @@ import unittest -import gym +import gymnasium import torch from all.environments import DuplicateEnvironment, GymEnvironment @@ -42,7 +42,6 @@ def test_step(self): def test_step_until_done(self): num_envs = 3 env = DuplicateEnvironment(make_vec_env(num_envs)) - env.seed(5) env.reset() for _ in range(100): state = env.step(torch.ones(num_envs, dtype=torch.int32)) diff --git a/all/environments/gym.py b/all/environments/gym.py index 337b97d8..dcc82b11 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -1,19 +1,19 @@ -import gym +import gymnasium import torch from all.core import State from ._environment import Environment from .duplicate_env import DuplicateEnvironment -gym.logger.set_level(40) +gymnasium.logger.set_level(40) class GymEnvironment(Environment): ''' - A wrapper for OpenAI Gym environments (see: https://gym.openai.com). + A wrapper for OpenAI Gym environments (see: https://gymnasium.openai.com). This wrapper converts the output of the gym environment to PyTorch tensors, and wraps them in a State object that can be passed to an Agent. This constructor supports either a string, which will be passed to the - gym.make(name) function, or a preconstructed gym environment. Note that + gymnasium.make(name) function, or a preconstructed gym environment. Note that in the latter case, the name property is set to be the whatever the name of the outermost wrapper on the environment is. @@ -21,10 +21,15 @@ class GymEnvironment(Environment): env: Either a string or an OpenAI gym environment name (str, optional): the name of the environment device (str, optional): the device on which tensors will be stored + legacy_gym (str, optional): If true, calls gym.make() instead of gymnasium.make() ''' - def __init__(self, id, device=torch.device('cpu'), name=None): - self._env = gym.make(id) + def __init__(self, id, device=torch.device('cpu'), name=None, legacy_gym=False): + if legacy_gym: + import gym + self._env = gym.make(id) + else: + self._env = gymnasium.make(id) self._id = id self._name = name if name else id self._state = None @@ -38,9 +43,8 @@ def __init__(self, id, device=torch.device('cpu'), name=None): def name(self): return self._name - def reset(self): - state = self._env.reset(), 0., False, None - self._state = State.from_gym(state, dtype=self._env.observation_space.dtype, device=self._device) + def reset(self, **kwargs): + self._state = State.from_gym(self._env.reset(**kwargs), dtype=self._env.observation_space.dtype, device=self._device) return self._state def step(self, action): @@ -85,9 +89,9 @@ def device(self): def _convert(self, action): if torch.is_tensor(action): - if isinstance(self.action_space, gym.spaces.Discrete): + if isinstance(self.action_space, gymnasium.spaces.Discrete): return action.item() - if isinstance(self.action_space, gym.spaces.Box): + if isinstance(self.action_space, gymnasium.spaces.Box): return action.cpu().detach().numpy().reshape(-1) raise TypeError("Unknown action space type") return action diff --git a/all/environments/gym_test.py b/all/environments/gym_test.py index 7ec4d80d..31dea2e2 100644 --- a/all/environments/gym_test.py +++ b/all/environments/gym_test.py @@ -1,5 +1,5 @@ import unittest -import gym +import gymnasium from all.environments import GymEnvironment diff --git a/all/environments/multiagent_atari.py b/all/environments/multiagent_atari.py index 1cc9feb4..e9a10de1 100644 --- a/all/environments/multiagent_atari.py +++ b/all/environments/multiagent_atari.py @@ -1,7 +1,7 @@ import importlib import numpy as np import torch -import gym +import gymnasium from all.core import MultiagentState from ._multiagent_environment import MultiagentEnvironment from .multiagent_pettingzoo import MultiagentPettingZooEnv @@ -25,10 +25,10 @@ def __init__(self, env_name, device='cuda', **pettingzoo_params): def _load_env(self, env_name, pettingzoo_params): from pettingzoo import atari - from supersuit import resize_v0, frame_skip_v0, reshape_v0, max_observation_v0 + from supersuit import resize_v1, frame_skip_v0, reshape_v0, max_observation_v0 env = importlib.import_module('pettingzoo.atari.{}'.format(env_name)).env(obs_type='grayscale_image', **pettingzoo_params) env = max_observation_v0(env, 2) env = frame_skip_v0(env, 4) - env = resize_v0(env, 84, 84) + env = resize_v1(env, 84, 84) env = reshape_v0(env, (1, 84, 84)) return env diff --git a/all/environments/multiagent_atari_test.py b/all/environments/multiagent_atari_test.py index 251e98f4..c5c5e7a8 100644 --- a/all/environments/multiagent_atari_test.py +++ b/all/environments/multiagent_atari_test.py @@ -5,12 +5,12 @@ class MultiagentAtariEnvTest(unittest.TestCase): def test_init(self): - MultiagentAtariEnv('pong_v2', device='cpu') - MultiagentAtariEnv('mario_bros_v2', device='cpu') - MultiagentAtariEnv('entombed_cooperative_v2', device='cpu') + MultiagentAtariEnv('pong_v3', device='cpu') + MultiagentAtariEnv('mario_bros_v3', device='cpu') + MultiagentAtariEnv('entombed_cooperative_v3', device='cpu') def test_reset(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') state = env.reset() self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) @@ -19,7 +19,7 @@ def test_reset(self): self.assertEqual(state['agent'], 'first_0') def test_step(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') env.reset() state = env.step(0) self.assertEqual(state.observation.shape, (1, 84, 84)) @@ -29,7 +29,7 @@ def test_step(self): self.assertEqual(state['agent'], 'second_0') def test_step_tensor(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') env.reset() state = env.step(torch.tensor([0])) self.assertEqual(state.observation.shape, (1, 84, 84)) @@ -39,37 +39,37 @@ def test_step_tensor(self): self.assertEqual(state['agent'], 'second_0') def test_name(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') - self.assertEqual(env.name, 'pong_v2') + env = MultiagentAtariEnv('pong_v3', device='cpu') + self.assertEqual(env.name, 'pong_v3') def test_agent_iter(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') env.reset() it = iter(env.agent_iter()) self.assertEqual(next(it), 'first_0') def test_state_spaces(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') self.assertEqual(env.state_space('first_0').shape, (1, 84, 84)) self.assertEqual(env.state_space('second_0').shape, (1, 84, 84)) def test_action_spaces(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') - self.assertEqual(env.action_space('first_0').n, 18) - self.assertEqual(env.action_space('second_0').n, 18) + env = MultiagentAtariEnv('pong_v3', device='cpu') + self.assertEqual(env.action_space('first_0').n, 6) + self.assertEqual(env.action_space('second_0').n, 6) def test_list_agents(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') self.assertEqual(env.agents, ['first_0', 'second_0']) def test_is_done(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') env.reset() self.assertFalse(env.is_done('first_0')) self.assertFalse(env.is_done('second_0')) def test_last(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') + env = MultiagentAtariEnv('pong_v3', device='cpu') env.reset() state = env.last() self.assertEqual(state.observation.shape, (1, 84, 84)) diff --git a/all/environments/multiagent_pettingzoo.py b/all/environments/multiagent_pettingzoo.py index 54519e76..3376e519 100644 --- a/all/environments/multiagent_pettingzoo.py +++ b/all/environments/multiagent_pettingzoo.py @@ -2,7 +2,7 @@ import numpy as np import torch import cloudpickle -import gym +import gymnasium from all.core import MultiagentState from ._multiagent_environment import MultiagentEnvironment @@ -38,8 +38,8 @@ def __init__(self, zoo_env, name, device='cuda'): An initial MultiagentState object. ''' - def reset(self): - self._env.reset() + def reset(self, **kwargs): + self._env.reset(**kwargs) return self.last() ''' @@ -72,15 +72,15 @@ def agent_iter(self): return self._env.agent_iter() def is_done(self, agent): - return self._env.dones[agent] + return self._env.terminations[agent] def duplicate(self, n): return [MultiagentPettingZooEnv(cloudpickle.loads(cloudpickle.dumps(self._env)), self._name, device=self.device) for _ in range(n)] def last(self): - observation, reward, done, info = self._env.last() + observation, reward, terminated, truncated, info = self._env.last() selected_obs_space = self._env.observation_space(self._env.agent_selection) - return MultiagentState.from_zoo(self._env.agent_selection, (observation, reward, done, info), device=self._device, dtype=selected_obs_space.dtype) + return MultiagentState.from_zoo(self._env.agent_selection, (observation, reward, terminated, truncated, info), device=self._device, dtype=selected_obs_space.dtype) @property def name(self): @@ -104,9 +104,9 @@ def _convert(self, action): agent = self._env.agent_selection action_space = self.action_space(agent) if torch.is_tensor(action): - if isinstance(action_space, gym.spaces.Discrete): + if isinstance(action_space, gymnasium.spaces.Discrete): return action.item() - if isinstance(action_space, gym.spaces.Box): + if isinstance(action_space, gymnasium.spaces.Box): return action.cpu().detach().numpy().reshape(-1) raise TypeError("Unknown action space type") return action diff --git a/all/environments/multiagent_pettingzoo_test.py b/all/environments/multiagent_pettingzoo_test.py index d2e4c7df..482e50e5 100644 --- a/all/environments/multiagent_pettingzoo_test.py +++ b/all/environments/multiagent_pettingzoo_test.py @@ -1,7 +1,7 @@ import unittest import torch from all.environments import MultiagentPettingZooEnv -from pettingzoo.mpe import simple_world_comm_v2 +from pettingzoo.mpe import simple_world_comm_v3 class MultiagentPettingZooEnvTest(unittest.TestCase): @@ -39,7 +39,7 @@ def test_step_tensor(self): def test_name(self): env = self._make_env() - self.assertEqual(env.name, 'simple_world_comm_v2') + self.assertEqual(env.name, 'simple_world_comm_v3') def test_agent_iter(self): env = self._make_env() @@ -61,7 +61,7 @@ def test_list_agents(self): env = self._make_env() self.assertEqual(env.agents, ['leadadversary_0', 'adversary_0', 'adversary_1', 'adversary_2', 'agent_0', 'agent_1']) - def test_is_done(self): + def test_terminated(self): env = self._make_env() env.reset() self.assertFalse(env.is_done('leadadversary_0')) @@ -78,7 +78,7 @@ def test_last(self): self.assertEqual(state['agent'], 'leadadversary_0') def test_variable_spaces(self): - env = MultiagentPettingZooEnv(simple_world_comm_v2.env(), name="simple_world_comm_v2", device='cpu') + env = MultiagentPettingZooEnv(simple_world_comm_v3.env(), name="simple_world_comm_v2", device='cpu') state = env.reset() # tests that action spaces work for agent in env.agents: @@ -87,7 +87,7 @@ def test_variable_spaces(self): env.step(env.action_space(env.agent_selection).sample()) def _make_env(self): - return MultiagentPettingZooEnv(simple_world_comm_v2.env(), name="simple_world_comm_v2", device='cpu') + return MultiagentPettingZooEnv(simple_world_comm_v3.env(), name="simple_world_comm_v3", device='cpu') if __name__ == "__main__": diff --git a/all/environments/pybullet.py b/all/environments/pybullet.py index a986e5b4..70f379fb 100644 --- a/all/environments/pybullet.py +++ b/all/environments/pybullet.py @@ -14,4 +14,4 @@ def __init__(self, name, **kwargs): import pybullet_envs if name in self.short_names: name = self.short_names[name] - super().__init__(name, **kwargs) + super().__init__(name, legacy_gym=True, **kwargs) diff --git a/all/environments/vector_env.py b/all/environments/vector_env.py index b8f56b70..d74a46af 100644 --- a/all/environments/vector_env.py +++ b/all/environments/vector_env.py @@ -1,4 +1,4 @@ -import gym +import gymnasium import torch from all.core import StateArray from ._vector_environment import VectorEnvironment @@ -35,16 +35,16 @@ def __init__(self, vec_env, name, device=torch.device('cpu')): def name(self): return self._name - def reset(self): - state_tuple = self._env.reset(), np.zeros(self._env.num_envs), np.zeros(self._env.num_envs), None - self._state = self._to_state(*state_tuple) + def reset(self, **kwargs): + obs, info = self._env.reset(**kwargs) + self._state = self._to_state(obs, np.zeros(self._env.num_envs), np.zeros(self._env.num_envs), np.zeros(self._env.num_envs), info) return self._state - def _to_state(self, obs, rew, done, info): + def _to_state(self, obs, rew, terminated, truncated, info): obs = obs.astype(self.observation_space.dtype) rew = rew.astype("float32") - done = done.astype("bool") - mask = (1 - done).astype("float32") + done = (terminated + truncated).astype("bool") + mask = (1 - terminated).astype("float32") return StateArray({ "observation": torch.tensor(obs, device=self._device), "reward": torch.tensor(rew, device=self._device), @@ -60,9 +60,6 @@ def step(self, action): def close(self): return self._env.close() - def seed(self, seed): - self._env.seed(seed) - @property def state_space(self): return getattr(self._env, "single_observation_space", getattr(self._env, "observation_space")) diff --git a/all/environments/vector_env_test.py b/all/environments/vector_env_test.py index b72df860..a4cfba77 100644 --- a/all/environments/vector_env_test.py +++ b/all/environments/vector_env_test.py @@ -1,11 +1,11 @@ import unittest -import gym +import gymnasium import torch from all.environments import GymVectorEnvironment, GymEnvironment, DuplicateEnvironment def make_vec_env(num_envs=3): - env = gym.vector.SyncVectorEnv([lambda: gym.make('CartPole-v0')] * num_envs) + env = gymnasium.vector.SyncVectorEnv([lambda: gymnasium.make('CartPole-v0')] * num_envs) return env @@ -42,8 +42,7 @@ def test_step(self): def test_step_until_done(self): num_envs = 3 env = GymVectorEnvironment(make_vec_env(num_envs), "CartPole") - env.seed(5) - env.reset() + env.reset(seed=5) for _ in range(100): state = env.step(torch.ones(num_envs, dtype=torch.int32)) if state.done[0]: @@ -60,10 +59,8 @@ def test_same_as_duplicate(self): torch.manual_seed(42) env1 = DuplicateEnvironment([GymEnvironment('CartPole-v0') for i in range(n_envs)]) env2 = GymVectorEnvironment(make_vec_env(n_envs), "CartPole-v0") - env1.seed(42) - env2.seed(42) - state1 = env1.reset() - state2 = env2.reset() + state1 = env1.reset(seed=42) + state2 = env2.reset(seed=42) self.assertEqual(env1.name, env2.name) self.assertEqual(env1.action_space.n, env2.action_space.n) self.assertEqual(env1.observation_space.shape, env2.observation_space.shape) diff --git a/all/experiments/multiagent_env_experiment_test.py b/all/experiments/multiagent_env_experiment_test.py index 5e7a5a4b..ceb8a47a 100644 --- a/all/experiments/multiagent_env_experiment_test.py +++ b/all/experiments/multiagent_env_experiment_test.py @@ -18,24 +18,25 @@ class TestMultiagentEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) - self.env = MultiagentAtariEnv('space_invaders_v1', device='cpu') - self.env.seed(0) + self.env = MultiagentAtariEnv('space_invaders_v2', device='cpu') + self.env.reset(seed=0) self.experiment = None def test_adds_default_name(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) - self.assertEqual(experiment._logger.label, "independent_space_invaders_v1") + self.assertEqual(experiment._logger.label, "independent_space_invaders_v2") def test_adds_custom_name(self): experiment = MockExperiment(self.make_preset(), self.env, name='custom', quiet=True, save_freq=float('inf')) - self.assertEqual(experiment._logger.label, "custom_space_invaders_v1") + self.assertEqual(experiment._logger.label, "custom_space_invaders_v2") def test_writes_training_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) experiment.train(episodes=3) + self.maxDiff = None self.assertEqual(experiment._logger.data, { - 'eval/first_0/returns/frame': {'values': [465.0, 235.0, 735.0, 415.0], 'steps': [766, 1524, 2440, 3038]}, - 'eval/second_0/returns/frame': {'values': [235.0, 465.0, 170.0, 295.0], 'steps': [766, 1524, 2440, 3038]} + 'eval/first_0/returns/frame': {'values': [705.0, 490.0, 230.0, 435.0], 'steps': [808, 1580, 2120, 3300]}, + 'eval/second_0/returns/frame': {'values': [115.0, 525.0, 415.0, 665.0], 'steps': [808, 1580, 2120, 3300]} }) def test_writes_test_returns(self): diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py index cf7c343d..28a9558b 100644 --- a/all/experiments/parallel_env_experiment_test.py +++ b/all/experiments/parallel_env_experiment_test.py @@ -9,7 +9,7 @@ class MockExperiment(ParallelEnvExperiment): def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - self._logger = MockLogger(self, agent_name + '_' + env_name, verbose) + self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -17,28 +17,28 @@ class TestParallelEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) - self.env = GymEnvironment('CartPole-v0') - self.env.seed(0) + self.env = GymEnvironment("CartPole-v0") + self.env.reset(seed=0) self.experiment = MockExperiment(self.make_agent(), self.env, quiet=True) - self.experiment._env.seed(0) + self.experiment._env.reset(seed=0) def test_adds_default_label(self): self.assertEqual(self.experiment._logger.label, "a2c_CartPole-v0") def test_adds_custom_label(self): - env = GymEnvironment('CartPole-v0') - experiment = MockExperiment(self.make_agent(), env, name='a2c', quiet=True) + env = GymEnvironment("CartPole-v0") + experiment = MockExperiment(self.make_agent(), env, name="a2c", quiet=True) self.assertEqual(experiment._logger.label, "a2c_CartPole-v0") def test_writes_training_returns_eps(self): - self.experiment.train(episodes=3) + self.experiment.train(episodes=4) np.testing.assert_equal( self.experiment._logger.data["eval/returns/episode"]["steps"], - np.array([1, 2, 3]), + np.array([1, 2, 3, 3]), ) np.testing.assert_equal( self.experiment._logger.data["eval/returns/episode"]["values"], - np.array([10., 12., 19.]), + np.array([12.0, 13.0, 16.0, 16.0]), ) def test_writes_test_returns(self): @@ -55,13 +55,17 @@ def test_writes_test_returns(self): ) def test_writes_loss(self): - experiment = MockExperiment(self.make_agent(), self.env, quiet=True, verbose=True) + experiment = MockExperiment( + self.make_agent(), self.env, quiet=True, verbose=True + ) self.assertTrue(experiment._logger.verbose) - experiment = MockExperiment(self.make_agent(), self.env, quiet=True, verbose=False) + experiment = MockExperiment( + self.make_agent(), self.env, quiet=True, verbose=False + ) self.assertFalse(experiment._logger.verbose) def make_agent(self): - return a2c.device('cpu').env(self.env).build() + return a2c.device("cpu").env(self.env).build() if __name__ == "__main__": diff --git a/all/experiments/single_env_experiment_test.py b/all/experiments/single_env_experiment_test.py index f5f03789..6c11b04b 100644 --- a/all/experiments/single_env_experiment_test.py +++ b/all/experiments/single_env_experiment_test.py @@ -58,7 +58,7 @@ def setUp(self): np.random.seed(0) torch.manual_seed(0) self.env = GymEnvironment('CartPole-v0') - self.env.seed(0) + self.env.reset(seed=0) self.experiment = None def test_adds_default_name(self): @@ -74,7 +74,7 @@ def test_writes_training_returns_eps(self): experiment.train(episodes=3) np.testing.assert_equal( experiment._logger.data["eval/returns/episode"]["values"], - np.array([18., 23., 27.]), + np.array([22., 17., 28.]), ) np.testing.assert_equal( experiment._logger.data["eval/returns/episode"]["steps"], @@ -85,8 +85,8 @@ def test_writes_test_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=5) returns = experiment.test(episodes=4) - expected_mean = 8.75 - expected_std = 0.433013 + expected_mean = 8.5 + expected_std = 0.5 np.testing.assert_equal(np.mean(returns), expected_mean) np.testing.assert_equal( experiment._logger.data["summary/returns-test/mean"]["values"], @@ -99,7 +99,7 @@ def test_writes_test_returns(self): ) np.testing.assert_equal( experiment._logger.data["summary/returns-test/mean"]["steps"], - np.array([94]), + np.array([93]), ) def test_writes_loss(self): diff --git a/all/experiments/watch.py b/all/experiments/watch.py index 5c0b1d03..9e346d7a 100644 --- a/all/experiments/watch.py +++ b/all/experiments/watch.py @@ -1,7 +1,7 @@ import os import time import torch -import gym +import gymnasium from all.agents import Agent diff --git a/all/nn/nn_test.py b/all/nn/nn_test.py index a1a7bb56..5e465532 100644 --- a/all/nn/nn_test.py +++ b/all/nn/nn_test.py @@ -2,7 +2,7 @@ import numpy as np import torch import torch_testing as tt -import gym +import gymnasium from all import nn from all.core import StateArray @@ -64,7 +64,7 @@ def test_list(self): ) def test_tanh_action_bound(self): - space = gym.spaces.Box(np.array([-1.0, 10.0]), np.array([1, 20])) + space = gymnasium.spaces.Box(np.array([-1.0, 10.0]), np.array([1, 20])) model = nn.TanhActionBound(space) x = torch.tensor([[100.0, 100], [-100, -100], [-100, 100], [0, 0]]) tt.assert_almost_equal( diff --git a/all/policies/deterministic.py b/all/policies/deterministic.py index a5b8991f..e69253ec 100644 --- a/all/policies/deterministic.py +++ b/all/policies/deterministic.py @@ -13,7 +13,7 @@ class DeterministicPolicy(Approximation): and the output shape should be the same as the shape of the action space. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/deterministic_test.py b/all/policies/deterministic_test.py index f018a0d1..683800a6 100644 --- a/all/policies/deterministic_test.py +++ b/all/policies/deterministic_test.py @@ -2,7 +2,7 @@ import torch import torch_testing as tt import numpy as np -from gym.spaces import Box +from gymnasium.spaces import Box from all import nn from all.approximation import FixedTarget, DummyCheckpointer from all.core import State diff --git a/all/policies/gaussian.py b/all/policies/gaussian.py index 977a73b1..eaf76c67 100644 --- a/all/policies/gaussian.py +++ b/all/policies/gaussian.py @@ -22,7 +22,7 @@ class GaussianPolicy(Approximation): and the last n outputs will be the logarithm of the variance. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/gaussian_test.py b/all/policies/gaussian_test.py index d514c01c..3bcaf83f 100644 --- a/all/policies/gaussian_test.py +++ b/all/policies/gaussian_test.py @@ -3,7 +3,7 @@ import torch from torch import nn import torch_testing as tt -from gym.spaces import Box +from gymnasium.spaces import Box from all.approximation import DummyCheckpointer from all.core import State from all.policies import GaussianPolicy diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py index 11f7ca25..ffb8a0d0 100644 --- a/all/policies/soft_deterministic.py +++ b/all/policies/soft_deterministic.py @@ -15,7 +15,7 @@ class SoftDeterministicPolicy(Approximation): and the second n outputs will be the logarithm of the variance. optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. - action_space (gym.spaces.Box): The Box representing the action space. + action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation ''' diff --git a/all/policies/soft_deterministic_test.py b/all/policies/soft_deterministic_test.py index 9af809f8..f8c4a62a 100644 --- a/all/policies/soft_deterministic_test.py +++ b/all/policies/soft_deterministic_test.py @@ -2,7 +2,7 @@ import torch import numpy as np import torch_testing as tt -from gym.spaces import Box +from gymnasium.spaces import Box from all import nn from all.approximation import DummyCheckpointer from all.core import State diff --git a/all/presets/atari/models/test_.py b/all/presets/atari/models/test_.py deleted file mode 100644 index 68b1360d..00000000 --- a/all/presets/atari/models/test_.py +++ /dev/null @@ -1,146 +0,0 @@ -import unittest -import torch -import torch_testing as tt -from all.environments import AtariEnvironment -from all.presets.atari.models import nature_rainbow - - -class TestAtariModels(unittest.TestCase): - def setUp(self): - torch.manual_seed(0) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - def test_rainbow_model_cpu(self): - env = AtariEnvironment('Breakout') - model = nature_rainbow(env) - env.reset() - x = torch.cat([env.state.raw] * 4, dim=1).float() - out = model(x) - tt.assert_almost_equal( - out, - torch.tensor([[ - 0.0676, -0.0235, 0.0690, -0.0713, -0.0287, 0.0053, -0.0463, 0.0495, - -0.0222, -0.0504, 0.0064, -0.0204, 0.0168, 0.0127, -0.0113, -0.0586, - -0.0544, 0.0114, -0.0077, 0.0666, -0.0663, -0.0420, -0.0698, -0.0314, - 0.0272, 0.0361, -0.0537, 0.0301, 0.0036, -0.0472, -0.0499, 0.0114, - 0.0182, 0.0008, -0.0132, -0.0803, -0.0087, -0.0017, 0.0598, -0.0627, - 0.0859, 0.0117, 0.0105, 0.0309, -0.0370, -0.0111, -0.0262, 0.0338, - 0.0141, -0.0385, 0.0547, 0.0648, -0.0370, 0.0107, -0.0629, -0.0163, - 0.0282, -0.0670, 0.0161, -0.0244, -0.0030, 0.0038, -0.0208, 0.0005, - 0.0125, 0.0608, -0.0089, 0.0026, 0.0562, -0.0678, 0.0841, -0.0265, - -0.0461, -0.0124, 0.0276, 0.0364, 0.0195, -0.0309, -0.0337, -0.0603, - -0.0252, -0.0356, 0.0221, 0.0184, -0.0154, -0.0136, -0.0277, 0.0283, - 0.0495, 0.0185, -0.0357, 0.0305, -0.0052, -0.0432, -0.0135, -0.0554, - -0.0094, 0.0272, 0.1030, 0.0049, 0.0012, -0.0140, 0.0146, -0.0979, - 0.0487, 0.0122, -0.0204, 0.0496, -0.0055, -0.0015, -0.0170, 0.0053, - 0.0104, -0.0742, 0.0742, -0.0381, 0.0104, -0.0065, -0.0564, 0.0453, - -0.0057, -0.0029, -0.0722, 0.0094, -0.0561, 0.0284, 0.0402, 0.0233, - -0.0716, -0.0424, 0.0165, -0.0505, 0.0006, 0.0219, -0.0601, 0.0656, - -0.0175, -0.0524, 0.0355, 0.0007, -0.0042, -0.0443, 0.0871, -0.0403, - -0.0031, 0.0171, -0.0359, -0.0520, -0.0344, 0.0239, 0.0099, 0.0004, - 0.0235, 0.0238, -0.0153, 0.0501, -0.0052, 0.0162, 0.0313, -0.0121, - 0.0009, -0.0366, -0.0628, 0.0386, -0.0671, 0.0480, -0.0595, 0.0568, - -0.0604, -0.0540, 0.0403, -0.0187, 0.0649, 0.0029, -0.0003, 0.0020, - -0.0056, 0.0471, -0.0145, -0.0126, -0.0395, -0.0455, -0.0437, 0.0056, - 0.0331, 0.0004, 0.0127, -0.0022, -0.0502, 0.0362, 0.0624, -0.0012, - -0.0515, 0.0303, -0.0357, -0.0420, 0.0321, -0.0162, 0.0007, -0.0272, - 0.0227, 0.0187, -0.0459, 0.0496 - ]]), - decimal=3 - ) - - def test_rainbow_model_cuda(self): - env = AtariEnvironment('Breakout') - model = nature_rainbow(env).cuda() - env.reset() - x = torch.cat([env.state.raw] * 4, dim=1).float().cuda() - out = model(x) - tt.assert_almost_equal( - out.cpu(), - torch.tensor([[ - -1.4765e-02, -4.0353e-02, -2.1705e-02, -2.2314e-02, 3.6881e-02, - -1.4175e-02, 1.2442e-02, -6.8713e-03, 2.4970e-02, 2.5681e-02, - -4.5859e-02, -2.3327e-02, 3.6205e-02, 7.1024e-03, -2.7564e-02, - 2.1592e-02, -3.2728e-02, 1.3602e-02, -1.1690e-02, -4.3082e-02, - -1.2996e-02, 1.7184e-02, 1.3446e-02, -3.3587e-03, -4.6350e-02, - -1.7646e-02, 2.1954e-02, 8.5546e-03, -2.1359e-02, -2.4206e-02, - -2.3151e-02, -3.6330e-02, 4.4699e-02, 3.9887e-03, 1.5609e-02, - -4.3950e-02, 1.0955e-02, -2.4277e-02, 1.4915e-02, 3.2508e-03, - 6.1454e-02, 3.5242e-02, -1.5274e-02, -2.6729e-02, -2.4072e-02, - 1.5696e-02, 2.6622e-02, -3.5404e-02, 5.1701e-02, -5.3047e-02, - -1.8412e-02, 8.6640e-03, -3.1722e-02, 4.0329e-02, 1.2896e-02, - -1.4139e-02, -4.9200e-02, -4.6193e-02, -2.9064e-03, -2.2078e-02, - -4.0084e-02, -8.3519e-03, -2.7589e-02, -4.9979e-03, -1.6055e-02, - -4.5311e-02, -2.6951e-02, 2.8032e-02, -4.0069e-03, 3.2405e-02, - -5.3164e-03, -3.0139e-03, 6.6179e-04, -4.9243e-02, 3.2515e-02, - 9.8307e-03, -3.4257e-03, -3.9522e-02, 1.2594e-02, -2.7210e-02, - 2.3451e-02, 4.2257e-02, 2.2239e-02, 1.4304e-04, 4.2905e-04, - 1.5193e-02, 3.1897e-03, -1.0828e-02, -4.8345e-02, 6.8747e-02, - -7.1725e-03, -9.7815e-03, -1.6331e-02, 1.0434e-02, -8.8083e-04, - 3.8219e-02, 6.8332e-03, -2.0189e-02, 2.8141e-02, 1.4913e-02, - -2.4925e-02, -2.8922e-02, -7.1546e-03, 1.9791e-02, 1.1160e-02, - 1.0306e-02, -1.3631e-02, 2.7318e-03, 1.4050e-03, -8.2064e-03, - 3.5836e-02, -1.5877e-02, -1.1198e-02, 1.9514e-02, 3.0832e-03, - -6.2730e-02, 6.1493e-03, -1.2340e-02, 3.9110e-02, -2.6895e-02, - -5.1718e-03, 7.5017e-03, 1.2673e-03, 4.7525e-02, 1.7373e-03, - -5.1745e-03, -2.8621e-02, 3.4984e-02, -3.2622e-02, 1.0748e-02, - 1.2499e-02, -1.8788e-02, -8.6717e-03, 4.3620e-02, 2.8460e-02, - -6.8146e-03, -3.5824e-02, 9.2931e-03, 3.7893e-03, 2.4187e-02, - 1.3393e-02, -5.9393e-03, -9.9837e-03, -8.1019e-03, -2.1840e-02, - -3.8945e-02, 1.6736e-02, -4.7475e-02, 4.9770e-02, 3.4695e-02, - 1.8961e-02, 2.7416e-02, -1.3578e-02, -9.8595e-03, 2.2834e-03, - 2.4829e-02, -4.3998e-02, 3.2398e-02, -1.4200e-02, 2.4907e-02, - -2.2542e-02, -9.2765e-03, 2.0658e-03, -4.1246e-03, -1.8095e-02, - -1.2732e-02, -3.2090e-03, 1.3127e-02, -2.0888e-02, 1.4931e-02, - -4.0576e-02, 4.2877e-02, 7.9411e-05, -4.4377e-02, 3.2357e-03, - 1.6201e-02, 4.0387e-02, -1.9023e-02, 5.8033e-02, -3.3424e-02, - 2.9598e-03, -1.8526e-02, -2.2967e-02, 4.3449e-02, -1.2564e-02, - -9.3756e-03, -2.1745e-02, -2.7089e-02, -3.6791e-02, -5.2018e-02, - 2.4588e-02, 1.0037e-03, 3.9753e-02, 4.3534e-02, 2.6446e-02, - -1.1808e-02, 2.1426e-02, 7.5522e-03, 2.2847e-03, -2.7211e-02, - 4.1364e-02, -1.1281e-02, 1.6523e-03, -1.9913e-03 - ]]), - decimal=3 - ) - optimizer = torch.optim.SGD(model.parameters(), lr=0.01) - loss = out.sum() - loss.backward() - optimizer.step() - out = model(x) - tt.assert_almost_equal( - out.cpu(), - torch.tensor([[ - -0.0247, -0.0172, -0.0633, -0.0154, -0.0156, -0.1156, -0.0793, -0.0184, - -0.0408, 0.0005, -0.0920, -0.0481, -0.0597, -0.0243, 0.0006, -0.1045, - -0.0476, -0.0030, -0.0230, -0.0869, -0.0149, -0.0412, -0.0753, -0.0640, - -0.1106, -0.0632, -0.0645, -0.0474, -0.0124, -0.0698, -0.0275, -0.0415, - -0.0916, -0.0957, -0.0851, -0.1296, -0.1049, -0.0196, -0.0823, -0.0380, - -0.1085, -0.0526, -0.0083, -0.1274, -0.0426, -0.0183, -0.0585, -0.0366, - -0.1111, -0.0074, -0.1238, -0.0324, -0.0166, -0.0719, -0.0285, -0.0427, - -0.1158, -0.0569, 0.0075, -0.0419, -0.0288, -0.1189, -0.0220, -0.0370, - 0.0040, 0.0228, -0.0958, -0.0258, -0.0276, -0.0405, -0.0958, -0.0201, - -0.0639, -0.0543, -0.0705, -0.0940, -0.0700, -0.0921, -0.0426, 0.0026, - -0.0556, -0.0439, -0.0386, -0.0957, -0.0915, -0.0679, -0.1272, -0.0754, - -0.0076, -0.1046, -0.0350, -0.0887, -0.0350, -0.0270, -0.1188, -0.0449, - 0.0020, -0.0406, 0.0011, -0.0842, -0.0422, -0.1280, -0.0205, 0.0002, - -0.0789, -0.0185, -0.0510, -0.1180, -0.0550, -0.0159, -0.0702, -0.0029, - -0.0891, -0.0253, -0.0485, -0.0128, 0.0010, -0.0870, -0.0230, -0.0233, - -0.0411, -0.0870, -0.0419, -0.0688, -0.0583, -0.0448, -0.0864, -0.0926, - -0.0758, -0.0540, 0.0058, -0.0843, -0.0365, -0.0608, -0.0787, -0.0938, - -0.0680, -0.0995, -0.0764, 0.0061, -0.0821, -0.0636, -0.0848, -0.0373, - -0.0285, -0.1086, -0.0464, -0.0228, -0.0464, -0.0279, -0.1053, -0.0224, - -0.1268, -0.0006, -0.0186, -0.0836, -0.0011, -0.0415, -0.1222, -0.0668, - -0.0015, -0.0535, -0.0071, -0.1202, -0.0257, -0.0503, 0.0004, 0.0099, - -0.1113, -0.0182, -0.0080, -0.0216, -0.0661, -0.0115, -0.0468, -0.0716, - -0.0404, -0.0950, -0.0681, -0.0933, -0.0699, -0.0154, -0.0853, -0.0414, - -0.0403, -0.0700, -0.0685, -0.0975, -0.0934, -0.1016, -0.0121, -0.1084, - -0.0391, -0.1006, -0.0441, -0.0024, -0.1232, -0.0159, 0.0012, -0.0480, - -0.0013, -0.0789, -0.0309, -0.1101 - ]]), - decimal=3 - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/all/presets/multiagent_atari_test.py b/all/presets/multiagent_atari_test.py index 8a213770..4f752da5 100644 --- a/all/presets/multiagent_atari_test.py +++ b/all/presets/multiagent_atari_test.py @@ -9,7 +9,7 @@ class TestMultiagentAtariPresets(unittest.TestCase): def setUp(self): - self.env = MultiagentAtariEnv('pong_v2', device='cpu') + self.env = MultiagentAtariEnv('pong_v3', device='cpu') self.env.reset() def tearDown(self): @@ -17,12 +17,11 @@ def tearDown(self): os.remove('test_preset.pt') def test_independent(self): - env = MultiagentAtariEnv('pong_v2', device='cpu') presets = { - agent_id: dqn.device('cpu').env(env.subenvs[agent_id]).build() - for agent_id in env.agents + agent_id: dqn.device('cpu').env(self.env.subenvs[agent_id]).build() + for agent_id in self.env.agents } - self.validate_preset(IndependentMultiagentPreset('independent', 'cpu', presets), env) + self.validate_preset(IndependentMultiagentPreset('independent', 'cpu', presets), self.env) def validate_preset(self, preset, env): # normal agent diff --git a/docs/source/conf.py b/docs/source/conf.py index a65fa2cd..8650a8a6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Chris Nota' # The full version, including alpha/beta/rc tags -release = '0.8.1' +release = '0.8.2' # -- General configuration --------------------------------------------------- diff --git a/integration/multiagent_atari_test.py b/integration/multiagent_atari_test.py index ab48adfd..9e0b54cc 100644 --- a/integration/multiagent_atari_test.py +++ b/integration/multiagent_atari_test.py @@ -20,7 +20,7 @@ class TestMultiagentAtariPresets(unittest.TestCase): def test_independent(self): - env = MultiagentAtariEnv('pong_v2', max_cycles=1000, device=CPU) + env = MultiagentAtariEnv('pong_v3', max_cycles=1000, device=CPU) presets = { agent_id: dqn.device(CPU).env(env.subenvs[agent_id]).build() for agent_id in env.agents @@ -28,7 +28,7 @@ def test_independent(self): validate_multiagent(IndependentMultiagentPreset('independent', CPU, presets), env) def test_independent_cuda(self): - env = MultiagentAtariEnv('pong_v2', max_cycles=1000, device=CUDA) + env = MultiagentAtariEnv('pong_v3', max_cycles=1000, device=CUDA) presets = { agent_id: dqn.device(CUDA).env(env.subenvs[agent_id]).build() for agent_id in env.agents diff --git a/setup.py b/setup.py index af46231e..0b81db8f 100644 --- a/setup.py +++ b/setup.py @@ -1,72 +1,79 @@ from setuptools import setup, find_packages -GYM_VERSION = "0.23.1" -PETTINGZOO_VERSION = "1.17.0" +GYM_VERSION = "0.29.1" +PETTINGZOO_VERSION = "1.24.2" extras = { "atari": [ - "gym[atari, accept-rom-license]~={}".format(GYM_VERSION), + "gymnasium[atari, accept-rom-license]~={}".format(GYM_VERSION), ], "box2d": [ - "gym[box2d]~={}".format(GYM_VERSION), + "gymnasium[box2d]~={}".format(GYM_VERSION), ], "pybullet": [ "pybullet>=3.2.2", + "gym>=0.10.0,<0.26.0", ], "ma-atari": [ - "PettingZoo[atari]~={}".format(PETTINGZOO_VERSION), - "supersuit~=3.3.5", + "PettingZoo[atari, accept-rom-license]~={}".format(PETTINGZOO_VERSION), + "supersuit~=3.9.1", ], "test": [ - "flake8>=3.8", # linter for pep8 compliance - "autopep8>=1.5", # automatically fixes some pep8 errors - "torch-testing>=0.0.2", # pytorch assertion library + "flake8>=3.8", # linter for pep8 compliance + "autopep8>=1.5", # automatically fixes some pep8 errors + "torch-testing>=0.0.2", # pytorch assertion library ], "docs": [ - "sphinx>=3.2.1", # documentation library + "sphinx>=3.2.1", # documentation library "sphinx-autobuild>=2020.9.1", # documentation live reload - "sphinx-rtd-theme>=0.5.0", # documentation theme - "sphinx-automodapi>=0.13", # autogenerate docs for modules + "sphinx-rtd-theme>=0.5.0", # documentation theme + "sphinx-automodapi>=0.13", # autogenerate docs for modules ], "comet": [ - "comet-ml>=3.28.3", # experiment tracking using Comet.ml - ] + "comet-ml>=3.28.3", # experiment tracking using Comet.ml + ], } -extras["all"] = extras["atari"] + extras["box2d"] + extras["pybullet"] + extras["ma-atari"] + extras["comet"] +extras["all"] = ( + extras["atari"] + + extras["box2d"] + + extras["pybullet"] + + extras["ma-atari"] + + extras["comet"] +) extras["dev"] = extras["all"] + extras["test"] + extras["docs"] + extras["comet"] setup( name="autonomous-learning-library", - version="0.8.1", + version="0.8.2", description=("A library for building reinforcement learning agents in Pytorch"), packages=find_packages(), url="https://github.com/cpnota/autonomous-learning-library.git", author="Chris Nota", author_email="cnota@cs.umass.edu", entry_points={ - 'console_scripts': [ - 'all-atari=scripts.atari:main', - 'all-classic=scripts.classic:main', - 'all-continuous=scripts.continuous:main', - 'all-plot=scripts.plot:main', - 'all-watch-atari=scripts.watch_atari:main', - 'all-watch-classic=scripts.watch_classic:main', - 'all-watch-continuous=scripts.watch_continuous:main', - 'all-benchmark-atari=benchmarks.atari40:main', - 'all-benchmark-pybullet=benchmarks.pybullet:main', + "console_scripts": [ + "all-atari=scripts.atari:main", + "all-classic=scripts.classic:main", + "all-continuous=scripts.continuous:main", + "all-plot=scripts.plot:main", + "all-watch-atari=scripts.watch_atari:main", + "all-watch-classic=scripts.watch_classic:main", + "all-watch-continuous=scripts.watch_continuous:main", + "all-benchmark-atari=benchmarks.atari40:main", + "all-benchmark-pybullet=benchmarks.pybullet:main", ], }, install_requires=[ - "gym~={}".format(GYM_VERSION), # common environment interface - "numpy>=1.22.3", # math library - "matplotlib>=3.5.1", # plotting library - "opencv-python-headless>=4.0.0", # used by atari wrappers - "torch>=1.11.0", # core deep learning library - "tensorboard>=2.8.0", # logging and visualization - "cloudpickle>=2.0.0", # used to copy environments + "gymnasium~={}".format(GYM_VERSION), # common environment interface + "numpy>=1.22.3", # math library + "matplotlib>=3.5.1", # plotting library + "opencv-python-headless>=4.0.0", # used by atari wrappers + "torch>=1.11.0", # core deep learning library + "tensorboard>=2.8.0", # logging and visualization + "cloudpickle>=2.0.0", # used to copy environments ], - extras_require=extras + extras_require=extras, ) From c390592022e35c7a56d9f42905a66acbd60f4a2a Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Fri, 8 Dec 2023 09:46:39 -0500 Subject: [PATCH 05/26] Feature/mujoco (#279) * add mujoco depnedency * add MujocoEnvironment * fix integration tests * linting * update docs version number --- all/core/state_test.py | 10 ------- all/environments/__init__.py | 17 ++++++----- all/environments/gym.py | 9 +++--- all/environments/mujoco.py | 5 ++++ all/environments/mujoco_test.py | 35 +++++++++++++++++++++++ all/nn/__init__.py | 5 ++++ all/presets/continuous/models/__init__.py | 5 ++++ docs/source/conf.py | 2 +- integration/continuous_test.py | 27 +++++++++++------ setup.py | 8 ++++-- 10 files changed, 90 insertions(+), 33 deletions(-) create mode 100644 all/environments/mujoco.py create mode 100644 all/environments/mujoco_test.py diff --git a/all/core/state_test.py b/all/core/state_test.py index 597563da..6670b553 100644 --- a/all/core/state_test.py +++ b/all/core/state_test.py @@ -87,16 +87,6 @@ def test_legacy_gym_step(self): self.assertEqual(state['coolInfo'], 3.) self.assertEqual(state.shape, ()) - def test_from_truncated_gym_step(self): - observation = np.array([1, 2, 3]) - state = State.from_gym((observation, 2., False, True, {'coolInfo': 3.})) - tt.assert_equal(state.observation, torch.from_numpy(observation)) - self.assertEqual(state.mask, 1.) - self.assertEqual(state.done, True) - self.assertEqual(state.reward, 2.) - self.assertEqual(state['coolInfo'], 3.) - self.assertEqual(state.shape, ()) - def test_as_input(self): observation = torch.randn(3, 4) state = State(observation) diff --git a/all/environments/__init__.py b/all/environments/__init__.py index ba1fd89a..20ba6fc1 100644 --- a/all/environments/__init__.py +++ b/all/environments/__init__.py @@ -1,22 +1,25 @@ from ._environment import Environment from ._multiagent_environment import MultiagentEnvironment from ._vector_environment import VectorEnvironment -from .gym import GymEnvironment from .atari import AtariEnvironment +from .duplicate_env import DuplicateEnvironment +from .gym import GymEnvironment +from .mujoco import MujocoEnvironment from .multiagent_atari import MultiagentAtariEnv from .multiagent_pettingzoo import MultiagentPettingZooEnv -from .duplicate_env import DuplicateEnvironment -from .vector_env import GymVectorEnvironment from .pybullet import PybulletEnvironment +from .vector_env import GymVectorEnvironment + __all__ = [ + "AtariEnvironment", + "DuplicateEnvironment", "Environment", - "MultiagentEnvironment", "GymEnvironment", - "AtariEnvironment", + "GymVectorEnvironment", "MultiagentAtariEnv", + "MultiagentEnvironment", "MultiagentPettingZooEnv", - "GymVectorEnvironment", - "DuplicateEnvironment", + "MujocoEnvironment", "PybulletEnvironment", ] diff --git a/all/environments/gym.py b/all/environments/gym.py index dcc82b11..997b7c56 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -27,9 +27,10 @@ class GymEnvironment(Environment): def __init__(self, id, device=torch.device('cpu'), name=None, legacy_gym=False): if legacy_gym: import gym - self._env = gym.make(id) + self._gym = gym else: - self._env = gymnasium.make(id) + self._gym = gymnasium + self._env = self._gym.make(id) self._id = id self._name = name if name else id self._state = None @@ -89,9 +90,9 @@ def device(self): def _convert(self, action): if torch.is_tensor(action): - if isinstance(self.action_space, gymnasium.spaces.Discrete): + if isinstance(self.action_space, self._gym.spaces.Discrete): return action.item() - if isinstance(self.action_space, gymnasium.spaces.Box): + if isinstance(self.action_space, self._gym.spaces.Box): return action.cpu().detach().numpy().reshape(-1) raise TypeError("Unknown action space type") return action diff --git a/all/environments/mujoco.py b/all/environments/mujoco.py new file mode 100644 index 00000000..8463cc51 --- /dev/null +++ b/all/environments/mujoco.py @@ -0,0 +1,5 @@ +from .gym import GymEnvironment + + +class MujocoEnvironment(GymEnvironment): + """Simply inherit the Gym Environment""" diff --git a/all/environments/mujoco_test.py b/all/environments/mujoco_test.py new file mode 100644 index 00000000..0672a7d3 --- /dev/null +++ b/all/environments/mujoco_test.py @@ -0,0 +1,35 @@ +import unittest +from all.environments import MujocoEnvironment, GymEnvironment + + +class MujocoEnvironmentTest(unittest.TestCase): + def test_load_env(self): + env = MujocoEnvironment("Ant-v4") + self.assertEqual(env.name, 'Ant-v4') + + def test_observation_space(self): + env = MujocoEnvironment("Ant-v4") + self.assertEqual(env.observation_space.shape, (27,)) + + def test_action_space(self): + env = MujocoEnvironment("Ant-v4") + self.assertEqual(env.action_space.shape, (8,)) + + def test_reset(self): + env = MujocoEnvironment("Ant-v4") + state = env.reset(seed=0) + self.assertEqual(state.observation.shape, (27,)) + self.assertEqual(state.reward, 0.) + self.assertFalse(state.done) + self.assertEqual(state.mask, 1) + + def test_step(self): + env = MujocoEnvironment("Ant-v4") + state = env.reset(seed=0) + state = env.step(env.action_space.sample()) + self.assertEqual(state.observation.shape, (27,)) + self.assertGreater(state.reward, -1.) + self.assertLess(state.reward, 1) + self.assertNotEqual(state.reward, 0.) + self.assertFalse(state.done) + self.assertEqual(state.mask, 1) diff --git a/all/nn/__init__.py b/all/nn/__init__.py index d8d0ecd7..c60c9a97 100644 --- a/all/nn/__init__.py +++ b/all/nn/__init__.py @@ -206,6 +206,11 @@ def forward(self, x): return torch.tanh(x) * self.weight + self.bias +class Float(nn.Module): + def forward(self, x): + return x.float() + + def td_loss(loss): def _loss(estimates, errors): return loss(estimates, errors + estimates.detach()) diff --git a/all/presets/continuous/models/__init__.py b/all/presets/continuous/models/__init__.py index 02dce45e..012b56c9 100644 --- a/all/presets/continuous/models/__init__.py +++ b/all/presets/continuous/models/__init__.py @@ -11,6 +11,7 @@ def fc_q(env, hidden1=400, hidden2=300): return nn.Sequential( + nn.Float(), nn.Linear(env.state_space.shape[0] + env.action_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), @@ -21,6 +22,7 @@ def fc_q(env, hidden1=400, hidden2=300): def fc_v(env, hidden1=400, hidden2=300): return nn.Sequential( + nn.Float(), nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), @@ -31,6 +33,7 @@ def fc_v(env, hidden1=400, hidden2=300): def fc_deterministic_policy(env, hidden1=400, hidden2=300): return nn.Sequential( + nn.Float(), nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), @@ -41,6 +44,7 @@ def fc_deterministic_policy(env, hidden1=400, hidden2=300): def fc_soft_policy(env, hidden1=400, hidden2=300): return nn.Sequential( + nn.Float(), nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), @@ -53,6 +57,7 @@ class fc_policy(nn.Module): def __init__(self, env, hidden1=400, hidden2=300): super().__init__() self.model = nn.Sequential( + nn.Float(), nn.Linear(env.state_space.shape[0] + 1, hidden1), nn.Tanh(), nn.Linear(hidden1, hidden2), diff --git a/docs/source/conf.py b/docs/source/conf.py index 8650a8a6..808ec0e0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Chris Nota' # The full version, including alpha/beta/rc tags -release = '0.8.2' +release = '0.9.0' # -- General configuration --------------------------------------------------- diff --git a/integration/continuous_test.py b/integration/continuous_test.py index f1fe7cff..f3e528a9 100644 --- a/integration/continuous_test.py +++ b/integration/continuous_test.py @@ -1,5 +1,5 @@ import unittest -from all.environments import GymEnvironment +from all.environments import GymEnvironment, PybulletEnvironment, MujocoEnvironment from all.presets.continuous import ddpg, ppo, sac from validate_agent import validate_agent @@ -7,22 +7,31 @@ class TestContinuousPresets(unittest.TestCase): def test_ddpg(self): validate_agent( - ddpg.device('cpu').hyperparameters(replay_start_size=50), - GymEnvironment('LunarLanderContinuous-v2') + ddpg.device("cpu").hyperparameters(replay_start_size=50), + GymEnvironment("LunarLanderContinuous-v2"), ) def test_ppo(self): + validate_agent(ppo.device("cpu"), GymEnvironment("LunarLanderContinuous-v2")) + + def test_sac(self): validate_agent( - ppo.device('cpu'), - GymEnvironment('LunarLanderContinuous-v2') + sac.device("cpu").hyperparameters(replay_start_size=50), + GymEnvironment("LunarLanderContinuous-v2"), ) - def test_sac(self): + def test_mujoco(self): + validate_agent( + sac.device("cpu").hyperparameters(replay_start_size=50), + MujocoEnvironment("HalfCheetah-v4"), + ) + + def test_pybullet(self): validate_agent( - sac.device('cpu').hyperparameters(replay_start_size=50), - GymEnvironment('LunarLanderContinuous-v2') + sac.device("cpu").hyperparameters(replay_start_size=50), + PybulletEnvironment("cheetah"), ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/setup.py b/setup.py index 0b81db8f..71128aae 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,9 @@ "pybullet>=3.2.2", "gym>=0.10.0,<0.26.0", ], + "mujoco": [ + "gymnasium[mujoco]~={}".format(GYM_VERSION), + ], "ma-atari": [ "PettingZoo[atari, accept-rom-license]~={}".format(PETTINGZOO_VERSION), "supersuit~=3.9.1", @@ -39,15 +42,16 @@ extras["all"] = ( extras["atari"] + extras["box2d"] + + extras["mujoco"] + extras["pybullet"] + extras["ma-atari"] + extras["comet"] ) -extras["dev"] = extras["all"] + extras["test"] + extras["docs"] + extras["comet"] +extras["dev"] = extras["all"] + extras["test"] + extras["docs"] setup( name="autonomous-learning-library", - version="0.8.2", + version="0.9.0", description=("A library for building reinforcement learning agents in Pytorch"), packages=find_packages(), url="https://github.com/cpnota/autonomous-learning-library.git", From b978310b5c726931c0a70cbdd326fd97030301b4 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Fri, 26 Jan 2024 12:35:36 -0500 Subject: [PATCH 06/26] Refactor/scripts-folder (#286) * all scripts to all package * fix watch scripts * update watch script and add test * formatting * compute sem correctly * fix multiagent atari watch * add additional random seed * explicitly seed multiagent environment * make test less flakey --- all/environments/atari.py | 4 +-- all/environments/gym.py | 5 ++-- all/environments/multiagent_pettingzoo.py | 4 +-- all/experiments/experiment.py | 2 +- all/experiments/multiagent_env_experiment.py | 6 ++-- .../multiagent_env_experiment_test.py | 13 +++++---- all/experiments/run_experiment.py | 1 + all/experiments/single_env_experiment_test.py | 22 +++++++++------ all/experiments/watch.py | 22 ++++++--------- all/experiments/watch_test.py | 28 +++++++++++++++++++ {scripts => all/scripts}/__init__.py | 0 {scripts => all/scripts}/atari.py | 0 {scripts => all/scripts}/classic.py | 0 {scripts => all/scripts}/continuous.py | 0 {scripts => all/scripts}/multiagent_atari.py | 10 ++++--- {scripts => all/scripts}/plot.py | 0 {scripts => all/scripts}/release.py | 0 {scripts => all/scripts}/watch_atari.py | 2 +- {scripts => all/scripts}/watch_classic.py | 2 +- {scripts => all/scripts}/watch_continuous.py | 6 ++-- .../scripts}/watch_multiagent_atari.py | 2 +- setup.py | 18 ++++++------ 22 files changed, 92 insertions(+), 55 deletions(-) create mode 100644 all/experiments/watch_test.py rename {scripts => all/scripts}/__init__.py (100%) rename {scripts => all/scripts}/atari.py (100%) rename {scripts => all/scripts}/classic.py (100%) rename {scripts => all/scripts}/continuous.py (100%) rename {scripts => all/scripts}/multiagent_atari.py (87%) rename {scripts => all/scripts}/plot.py (100%) rename {scripts => all/scripts}/release.py (100%) rename {scripts => all/scripts}/watch_atari.py (90%) rename {scripts => all/scripts}/watch_classic.py (90%) rename {scripts => all/scripts}/watch_continuous.py (79%) rename {scripts => all/scripts}/watch_multiagent_atari.py (95%) diff --git a/all/environments/atari.py b/all/environments/atari.py index 204fbbdf..e8a3a050 100644 --- a/all/environments/atari.py +++ b/all/environments/atari.py @@ -13,10 +13,10 @@ class AtariEnvironment(Environment): - def __init__(self, name, device='cpu'): + def __init__(self, name, device='cpu', **gym_make_kwargs): # construct the environment - env = gymnasium.make(name + "NoFrameskip-v4") + env = gymnasium.make(name + "NoFrameskip-v4", **gym_make_kwargs) # apply a subset of wrappers env = NoopResetEnv(env, noop_max=30) diff --git a/all/environments/gym.py b/all/environments/gym.py index 997b7c56..7baa3c97 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -22,15 +22,16 @@ class GymEnvironment(Environment): name (str, optional): the name of the environment device (str, optional): the device on which tensors will be stored legacy_gym (str, optional): If true, calls gym.make() instead of gymnasium.make() + **gym_make_kwargs: kwargs passed to gymnasium.make(id, **gym_make_kwargs) ''' - def __init__(self, id, device=torch.device('cpu'), name=None, legacy_gym=False): + def __init__(self, id, device=torch.device('cpu'), name=None, legacy_gym=False, **gym_make_kwargs): if legacy_gym: import gym self._gym = gym else: self._gym = gymnasium - self._env = self._gym.make(id) + self._env = self._gym.make(id, **gym_make_kwargs) self._id = id self._name = name if name else id self._state = None diff --git a/all/environments/multiagent_pettingzoo.py b/all/environments/multiagent_pettingzoo.py index 3376e519..3bc96619 100644 --- a/all/environments/multiagent_pettingzoo.py +++ b/all/environments/multiagent_pettingzoo.py @@ -62,8 +62,8 @@ def step(self, action): def seed(self, seed): self._env.seed(seed) - def render(self, mode='human'): - return self._env.render(mode=mode) + def render(self, **kwargs): + return self._env.render(**kwargs) def close(self): self._env.close() diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py index 757016b8..4de2cfe6 100644 --- a/all/experiments/experiment.py +++ b/all/experiments/experiment.py @@ -75,7 +75,7 @@ def _log_test_episode(self, episode, returns): def _log_test(self, returns): if not self._quiet: mean = np.mean(returns) - sem = np.var(returns) / np.sqrt(len(returns)) + sem = np.std(returns) / np.sqrt(len(returns)) print('test returns (mean ± sem): {} ± {}'.format(mean, sem)) self._logger.add_summary('returns-test', np.mean(returns), np.std(returns)) diff --git a/all/experiments/multiagent_env_experiment.py b/all/experiments/multiagent_env_experiment.py index 261bd5e1..3314f401 100644 --- a/all/experiments/multiagent_env_experiment.py +++ b/all/experiments/multiagent_env_experiment.py @@ -36,8 +36,8 @@ def __init__( self._logger = self._make_logger(logdir, self._name, env.name, verbose, logger) self._agent = preset.agent(logger=self._logger, train_steps=train_steps) self._env = env - self._episode = 0 - self._frame = 0 + self._episode = 1 + self._frame = 1 self._logdir = logdir self._preset = preset self._quiet = quiet @@ -171,7 +171,7 @@ def _log_test(self, returns): for agent, agent_returns in returns.items(): if not self._quiet: mean = np.mean(agent_returns) - sem = np.variance(agent_returns) / np.sqrt(len(agent_returns)) + sem = np.std(agent_returns) / np.sqrt(len(agent_returns)) print('{} test returns (mean ± sem): {} ± {}'.format(agent, mean, sem)) self._logger.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns)) diff --git a/all/experiments/multiagent_env_experiment_test.py b/all/experiments/multiagent_env_experiment_test.py index ceb8a47a..fbac6bbd 100644 --- a/all/experiments/multiagent_env_experiment_test.py +++ b/all/experiments/multiagent_env_experiment_test.py @@ -1,3 +1,4 @@ +import random import unittest import numpy as np import torch @@ -16,9 +17,10 @@ def _make_logger(self, logdir, agent_name, env_name, verbose, logger): class TestMultiagentEnvExperiment(unittest.TestCase): def setUp(self): + random.seed(0) np.random.seed(0) torch.manual_seed(0) - self.env = MultiagentAtariEnv('space_invaders_v2', device='cpu') + self.env = MultiagentAtariEnv('space_invaders_v2', device='cpu', seed=0) self.env.reset(seed=0) self.experiment = None @@ -34,10 +36,11 @@ def test_writes_training_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) experiment.train(episodes=3) self.maxDiff = None - self.assertEqual(experiment._logger.data, { - 'eval/first_0/returns/frame': {'values': [705.0, 490.0, 230.0, 435.0], 'steps': [808, 1580, 2120, 3300]}, - 'eval/second_0/returns/frame': {'values': [115.0, 525.0, 415.0, 665.0], 'steps': [808, 1580, 2120, 3300]} - }) + # could not get the exact numbers to be reproducible across enviornments :( + self.assertEqual(len(experiment._logger.data['eval/first_0/returns/frame']['values']), 3) + self.assertEqual(len(experiment._logger.data['eval/first_0/returns/frame']['steps']), 3) + self.assertEqual(len(experiment._logger.data['eval/second_0/returns/frame']['values']), 3) + self.assertEqual(len(experiment._logger.data['eval/second_0/returns/frame']['steps']), 3) def test_writes_test_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) diff --git a/all/experiments/run_experiment.py b/all/experiments/run_experiment.py index 158caf6a..5b153519 100644 --- a/all/experiments/run_experiment.py +++ b/all/experiments/run_experiment.py @@ -34,6 +34,7 @@ def run_experiment( verbose=verbose, logger=logger ) + experiment.save() experiment.train(frames=frames) experiment.save() experiment.test(episodes=test_episodes) diff --git a/all/experiments/single_env_experiment_test.py b/all/experiments/single_env_experiment_test.py index 6c11b04b..43979bdf 100644 --- a/all/experiments/single_env_experiment_test.py +++ b/all/experiments/single_env_experiment_test.py @@ -49,7 +49,7 @@ def close(self): class MockExperiment(SingleEnvExperiment): def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - self._logger = MockLogger(self, agent_name + '_' + env_name, verbose) + self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -57,7 +57,7 @@ class TestSingleEnvExperiment(unittest.TestCase): def setUp(self): np.random.seed(0) torch.manual_seed(0) - self.env = GymEnvironment('CartPole-v0') + self.env = GymEnvironment("CartPole-v0") self.env.reset(seed=0) self.experiment = None @@ -66,7 +66,9 @@ def test_adds_default_name(self): self.assertEqual(experiment._logger.label, "dqn_CartPole-v0") def test_adds_custom_name(self): - experiment = MockExperiment(self.make_preset(), self.env, name='dqn', quiet=True) + experiment = MockExperiment( + self.make_preset(), self.env, name="dqn", quiet=True + ) self.assertEqual(experiment._logger.label, "dqn_CartPole-v0") def test_writes_training_returns_eps(self): @@ -74,7 +76,7 @@ def test_writes_training_returns_eps(self): experiment.train(episodes=3) np.testing.assert_equal( experiment._logger.data["eval/returns/episode"]["values"], - np.array([22., 17., 28.]), + np.array([22.0, 17.0, 28.0]), ) np.testing.assert_equal( experiment._logger.data["eval/returns/episode"]["steps"], @@ -95,7 +97,7 @@ def test_writes_test_returns(self): np.testing.assert_approx_equal( np.array(experiment._logger.data["summary/returns-test/std"]["values"]), np.array([expected_std]), - significant=4 + significant=4, ) np.testing.assert_equal( experiment._logger.data["summary/returns-test/mean"]["steps"], @@ -103,13 +105,17 @@ def test_writes_test_returns(self): ) def test_writes_loss(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, verbose=True) + experiment = MockExperiment( + self.make_preset(), self.env, quiet=True, verbose=True + ) self.assertTrue(experiment._logger.verbose) - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, verbose=False) + experiment = MockExperiment( + self.make_preset(), self.env, quiet=True, verbose=False + ) self.assertFalse(experiment._logger.verbose) def make_preset(self): - return dqn.device('cpu').env(self.env).build() + return dqn.device("cpu").env(self.env).build() if __name__ == "__main__": diff --git a/all/experiments/watch.py b/all/experiments/watch.py index 9e346d7a..b5922feb 100644 --- a/all/experiments/watch.py +++ b/all/experiments/watch.py @@ -1,31 +1,27 @@ -import os import time import torch -import gymnasium -from all.agents import Agent +import sys -def watch(agent, env, fps=60): +def watch(agent, env, fps=60, n_episodes=sys.maxsize): action = None returns = 0 - # have to call this before initial reset for pybullet envs - env.render(mode="human") env.reset() - while True: + for _ in range(n_episodes): + env.render() action = agent.act(env.state) + env.step(action) returns += env.state.reward - time.sleep(1 / fps) if env.state.done: print('returns:', returns) env.reset() returns = 0 - else: - env.step(action) - env.render() + + time.sleep(1 / fps) -def load_and_watch(filename, env, fps=60): +def load_and_watch(filename, env, fps=60, n_episodes=sys.maxsize): agent = torch.load(filename).test_agent() - watch(agent, env, fps=fps) + watch(agent, env, fps=fps, n_episodes=n_episodes) diff --git a/all/experiments/watch_test.py b/all/experiments/watch_test.py new file mode 100644 index 00000000..607d56d4 --- /dev/null +++ b/all/experiments/watch_test.py @@ -0,0 +1,28 @@ +from .watch import load_and_watch +import unittest +from unittest import mock +import torch +from all.environments import GymEnvironment + + +class MockAgent(): + def act(self): + # sample from cartpole action space + return torch.randint(0, 2, []) + + +class MockPreset(): + def __init__(self, filename): + self.filename = filename + + def test_agent(self): + return MockAgent + + +class WatchTest(unittest.TestCase): + @mock.patch('torch.load', lambda filename: MockPreset(filename)) + @mock.patch('time.sleep', mock.MagicMock()) + def test_load_and_watch(self): + env = mock.MagicMock(GymEnvironment("CartPole-v0", render_mode="rgb_array")) + load_and_watch("file.name", env, n_episodes=3) + self.assertEqual(env.reset.call_count, 4) diff --git a/scripts/__init__.py b/all/scripts/__init__.py similarity index 100% rename from scripts/__init__.py rename to all/scripts/__init__.py diff --git a/scripts/atari.py b/all/scripts/atari.py similarity index 100% rename from scripts/atari.py rename to all/scripts/atari.py diff --git a/scripts/classic.py b/all/scripts/classic.py similarity index 100% rename from scripts/classic.py rename to all/scripts/classic.py diff --git a/scripts/continuous.py b/all/scripts/continuous.py similarity index 100% rename from scripts/continuous.py rename to all/scripts/continuous.py diff --git a/scripts/multiagent_atari.py b/all/scripts/multiagent_atari.py similarity index 87% rename from scripts/multiagent_atari.py rename to all/scripts/multiagent_atari.py index 59efbb17..6a3f1867 100644 --- a/scripts/multiagent_atari.py +++ b/all/scripts/multiagent_atari.py @@ -40,12 +40,14 @@ def main(): env = MultiagentAtariEnv(args.env, device=args.device) + assert len(env.agents) == len(args.agents), f"Must specify {len(env.agents)} agents for this environment." + presets = { agent_id: getattr(atari, agent_type) - .hyperparameters(replay_buffer_size=args.replay_buffer_size) - .device(args.device) - .env(env.subenvs[agent_id]) - .build() + .hyperparameters(replay_buffer_size=args.replay_buffer_size) + .device(args.device) + .env(env.subenvs[agent_id]) + .build() for agent_id, agent_type in zip(env.agents, args.agents) } diff --git a/scripts/plot.py b/all/scripts/plot.py similarity index 100% rename from scripts/plot.py rename to all/scripts/plot.py diff --git a/scripts/release.py b/all/scripts/release.py similarity index 100% rename from scripts/release.py rename to all/scripts/release.py diff --git a/scripts/watch_atari.py b/all/scripts/watch_atari.py similarity index 90% rename from scripts/watch_atari.py rename to all/scripts/watch_atari.py index d5309699..b5cd6a28 100644 --- a/scripts/watch_atari.py +++ b/all/scripts/watch_atari.py @@ -19,7 +19,7 @@ def main(): help="Playback speed", ) args = parser.parse_args() - env = AtariEnvironment(args.env, device=args.device) + env = AtariEnvironment(args.env, device=args.device, render_mode="human") load_and_watch(args.filename, env, fps=args.fps) diff --git a/scripts/watch_classic.py b/all/scripts/watch_classic.py similarity index 90% rename from scripts/watch_classic.py rename to all/scripts/watch_classic.py index b9806415..0b2e7416 100644 --- a/scripts/watch_classic.py +++ b/all/scripts/watch_classic.py @@ -18,7 +18,7 @@ def main(): help="Playback speed", ) args = parser.parse_args() - env = GymEnvironment(args.env, device=args.device) + env = GymEnvironment(args.env, device=args.device, render_mode="human") load_and_watch(args.filename, env, fps=args.fps) diff --git a/scripts/watch_continuous.py b/all/scripts/watch_continuous.py similarity index 79% rename from scripts/watch_continuous.py rename to all/scripts/watch_continuous.py index 29f68c2d..2cd17250 100644 --- a/scripts/watch_continuous.py +++ b/all/scripts/watch_continuous.py @@ -23,11 +23,11 @@ def main(): args = parser.parse_args() if args.env in ENVS: - env = GymEnvironment(args.env, device=args.device) + env = GymEnvironment(args.env, device=args.device, render_mode="human") elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names: - env = PybulletEnvironment(args.env, device=args.device) + env = PybulletEnvironment(args.env, device=args.device, render_mode="human") else: - env = GymEnvironment(args.env, device=args.device) + env = GymEnvironment(args.env, device=args.device, render_mode="human") load_and_watch(args.filename, env, fps=args.fps) diff --git a/scripts/watch_multiagent_atari.py b/all/scripts/watch_multiagent_atari.py similarity index 95% rename from scripts/watch_multiagent_atari.py rename to all/scripts/watch_multiagent_atari.py index 7f16e536..fe6095eb 100644 --- a/scripts/watch_multiagent_atari.py +++ b/all/scripts/watch_multiagent_atari.py @@ -52,7 +52,7 @@ def main(): action="store_true", default=False, help="Reload the model from disk after every episode" ) args = parser.parse_args() - env = MultiagentAtariEnv(args.env, device=args.device) + env = MultiagentAtariEnv(args.env, device=args.device, render_mode="human") watch(env, args.filename, args.fps, args.reload) diff --git a/setup.py b/setup.py index 71128aae..42f5b3a7 100644 --- a/setup.py +++ b/setup.py @@ -59,15 +59,15 @@ author_email="cnota@cs.umass.edu", entry_points={ "console_scripts": [ - "all-atari=scripts.atari:main", - "all-classic=scripts.classic:main", - "all-continuous=scripts.continuous:main", - "all-plot=scripts.plot:main", - "all-watch-atari=scripts.watch_atari:main", - "all-watch-classic=scripts.watch_classic:main", - "all-watch-continuous=scripts.watch_continuous:main", - "all-benchmark-atari=benchmarks.atari40:main", - "all-benchmark-pybullet=benchmarks.pybullet:main", + "all-atari=all.scripts.atari:main", + "all-classic=all.scripts.classic:main", + "all-continuous=all.scripts.continuous:main", + "all-multiagent-atari=all.scripts.multiagent_atari:main", + "all-plot=all.scripts.plot:main", + "all-watch-atari=all.scripts.watch_atari:main", + "all-watch-classic=all.scripts.watch_classic:main", + "all-watch-continuous=all.scripts.watch_continuous:main", + "all-watch-multiagent-atari=all.scripts.watch_multiagent_atari:main", ], }, install_requires=[ From 3d4c258be6d05a338991f8baed622816e19b2121 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Fri, 26 Jan 2024 13:32:01 -0500 Subject: [PATCH 07/26] add __call__ method to Builder API and unit tests (#287) * add __call__ method to Builder API and unit tests * formatting --- all/presets/builder.py | 3 ++ all/presets/builder_test.py | 67 +++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 all/presets/builder_test.py diff --git a/all/presets/builder.py b/all/presets/builder.py index 88eff793..768c2610 100644 --- a/all/presets/builder.py +++ b/all/presets/builder.py @@ -20,6 +20,9 @@ def __init__( self._hyperparameters = self._merge_hyperparameters(default_hyperparameters, hyperparameters) self._name = name or default_name + def __call__(self, **kwargs): + return self._preset_builder(**kwargs) + def device(self, device): return self._preset_builder(device=device) diff --git a/all/presets/builder_test.py b/all/presets/builder_test.py new file mode 100644 index 00000000..7250436f --- /dev/null +++ b/all/presets/builder_test.py @@ -0,0 +1,67 @@ +import unittest +from unittest.mock import Mock +from all.presets import PresetBuilder + + +class TestPresetBuilder(unittest.TestCase): + def setUp(self): + self.name = "my_preset" + self.default_hyperparameters = { + "lr": 1e-4, + "gamma": 0.99 + } + + class MockPreset(): + def __init__(self, env, name, device, **hyperparameters): + self.env = env + self.name = name + self.device = device + self.hyperparameters = hyperparameters + self.builder = PresetBuilder(self.name, self.default_hyperparameters, MockPreset) + + def test_default_name(self): + agent = self.builder.env(Mock).build() + self.assertEqual(agent.name, self.name) + + def test_override_name(self): + agent = self.builder.name("cool_name").env(Mock).build() + self.assertEqual(agent.name, "cool_name") + + def test_default_hyperparameters(self): + agent = self.builder.env(Mock).build() + self.assertEqual(agent.hyperparameters, self.default_hyperparameters) + + def test_override_hyperparameters(self): + agent = self.builder.hyperparameters(lr=0.01).env(Mock).build() + self.assertEqual(agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01}) + + def test_bad_hyperparameters(self): + with self.assertRaises(KeyError): + self.builder.hyperparameters(foo=0.01).env(Mock).build() + + def test_default_device(self): + agent = self.builder.env(Mock).build() + self.assertEqual(agent.device, 'cuda') + + def test_override_device(self): + agent = self.builder.device('cpu').env(Mock).build() + self.assertEqual(agent.device, 'cpu') + + def test_no_side_effects(self): + self.builder.device('cpu').hyperparameters(lr=0.01).device('cpu').env(Mock).build() + my_env = Mock + agent = self.builder.env(Mock).build() + self.assertEqual(agent.name, self.name) + self.assertEqual(agent.hyperparameters, self.default_hyperparameters) + self.assertEqual(agent.device, 'cuda') + self.assertEqual(agent.env, my_env) + + def test_call_api(self): + agent = self.builder(device='cpu', hyperparameters={"lr": 0.01}, name="cool_name").env(Mock).build() + self.assertEqual(agent.name, "cool_name") + self.assertEqual(agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01}) + self.assertEqual(agent.device, 'cpu') + + +if __name__ == "__main__": + unittest.main() From 07cd33c4b45d84e1d0cf0368fc5480fb6c26591d Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 11 Feb 2024 11:06:46 -0500 Subject: [PATCH 08/26] Feature/episode length (#289) * loosen requirements of mujoco test * log training episode length * update parallel env training episode length tests * update single env episode length tests * log test episode length in single env experiment * remove unwanted print from tests * run formatter * print episode length to console during training --- all/environments/mujoco_test.py | 4 +- all/experiments/experiment.py | 29 ++++++---- all/experiments/parallel_env_experiment.py | 15 ++++-- .../parallel_env_experiment_test.py | 41 ++++++++++++-- all/experiments/single_env_experiment.py | 16 ++++-- all/experiments/single_env_experiment_test.py | 53 +++++++++++++++++-- all/experiments/watch_test.py | 7 ++- 7 files changed, 138 insertions(+), 27 deletions(-) diff --git a/all/environments/mujoco_test.py b/all/environments/mujoco_test.py index 0672a7d3..7a207e8c 100644 --- a/all/environments/mujoco_test.py +++ b/all/environments/mujoco_test.py @@ -28,8 +28,8 @@ def test_step(self): state = env.reset(seed=0) state = env.step(env.action_space.sample()) self.assertEqual(state.observation.shape, (27,)) - self.assertGreater(state.reward, -1.) - self.assertLess(state.reward, 1) + self.assertGreater(state.reward, -2.) + self.assertLess(state.reward, 2) self.assertNotEqual(state.reward, 0.) self.assertFalse(state.done) self.assertEqual(state.mask, 1) diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py index 4de2cfe6..0601da34 100644 --- a/all/experiments/experiment.py +++ b/all/experiments/experiment.py @@ -52,9 +52,15 @@ def frame(self): def episode(self): '''The index of the current training episode''' - def _log_training_episode(self, returns, fps): + def _log_training_episode(self, returns, episode_length, fps): if not self._quiet: - print('episode: {}, frame: {}, fps: {}, returns: {}'.format(self.episode, self.frame, int(fps), returns)) + print('episode: {}, frame: {}, fps: {}, episode_length: {}, returns: {}'.format( + self.episode, + self.frame, + int(fps), + episode_length, + returns + )) if returns > self._best_returns: self._best_returns = returns self._returns100.append(returns) @@ -66,18 +72,23 @@ def _log_training_episode(self, returns, fps): self._logger.add_eval('returns/episode', returns, step="episode") self._logger.add_eval('returns/frame', returns, step="frame") self._logger.add_eval("returns/max", self._best_returns, step="frame") + self._logger.add_eval("episode_length", episode_length) self._logger.add_eval('fps', fps, step="frame") - def _log_test_episode(self, episode, returns): + def _log_test_episode(self, episode, returns, episode_length): if not self._quiet: - print('test episode: {}, returns: {}'.format(episode, returns)) + print('test episode: {}, episode_length: {}, returns: {}'.format(episode, episode_length, returns)) - def _log_test(self, returns): + def _log_test(self, returns, episode_lengths): if not self._quiet: - mean = np.mean(returns) - sem = np.std(returns) / np.sqrt(len(returns)) - print('test returns (mean ± sem): {} ± {}'.format(mean, sem)) - self._logger.add_summary('returns-test', np.mean(returns), np.std(returns)) + returns_mean = np.mean(returns) + returns_sem = np.std(returns) / np.sqrt(len(returns)) + print('test returns (mean ± sem): {} ± {}'.format(returns_mean, returns_sem)) + episode_length_mean = np.mean(episode_lengths) + episode_length_sem = np.std(episode_lengths) / np.sqrt(len(episode_lengths)) + print('test episode length (mean ± sem): {} ± {}'.format(episode_length_mean, episode_length_sem)) + self._logger.add_summary('test_returns', np.mean(returns), np.std(returns)) + self._logger.add_summary('test_episode_length', np.mean(episode_lengths), np.std(episode_lengths)) def save(self): return self._preset.save('{}/preset.pt'.format(self._logger.log_dir)) diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index 6d2456ef..6cb2f926 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -62,6 +62,7 @@ def episode(self): def train(self, frames=np.inf, episodes=np.inf): num_envs = int(self._env.num_envs) returns = np.zeros(num_envs) + episode_lengths = np.zeros(num_envs) state_array = self._env.reset() start_time = time.time() completed_frames = 0 @@ -72,6 +73,7 @@ def train(self, frames=np.inf, episodes=np.inf): episodes_completed = state_array.done.type(torch.IntTensor).sum().item() completed_frames += num_envs returns += state_array.reward.cpu().detach().numpy() + episode_lengths += 1 if episodes_completed > 0: dones = state_array.done.cpu().detach().numpy() cur_time = time.time() @@ -80,8 +82,9 @@ def train(self, frames=np.inf, episodes=np.inf): start_time = cur_time for i in range(num_envs): if dones[i]: - self._log_training_episode(returns[i], fps) + self._log_training_episode(returns[i], episode_lengths[i], fps) returns[i] = 0 + episode_lengths[i] = 0 self._episode += episodes_completed def test(self, episodes=100): @@ -90,32 +93,38 @@ def test(self, episodes=100): # Note that we need to record the first N episodes that are STARTED, # not the first N that are completed, or we introduce bias. test_returns = [] + test_episode_lengths = [] episodes_started = self._n_envs should_record = [True] * self._n_envs # initialize state states = self._env.reset() returns = states.reward.clone() + episode_lengths = np.zeros(self._n_envs) while len(test_returns) < episodes: # step the agent and environments actions = test_agent.act(states) states = self._env.step(actions) returns += states.reward + episode_lengths += 1 # record any episodes that have finished for i, done in enumerate(states.done): if done: if should_record[i] and len(test_returns) < episodes: episode_return = returns[i].item() + episode_length = episode_lengths[i] test_returns.append(episode_return) - self._log_test_episode(len(test_returns), episode_return) + test_episode_lengths.append(episode_length) + self._log_test_episode(len(test_returns), episode_return, episode_length) returns[i] = 0. + episode_lengths[i] = -1 episodes_started += 1 if episodes_started > episodes: should_record[i] = False - self._log_test(test_returns) + self._log_test(test_returns, test_episode_lengths) return test_returns def _done(self, frames, episodes): diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py index 28a9558b..6fd88843 100644 --- a/all/experiments/parallel_env_experiment_test.py +++ b/all/experiments/parallel_env_experiment_test.py @@ -30,7 +30,7 @@ def test_adds_custom_label(self): experiment = MockExperiment(self.make_agent(), env, name="a2c", quiet=True) self.assertEqual(experiment._logger.label, "a2c_CartPole-v0") - def test_writes_training_returns_eps(self): + def test_writes_training_returns_episode(self): self.experiment.train(episodes=4) np.testing.assert_equal( self.experiment._logger.data["eval/returns/episode"]["steps"], @@ -41,16 +41,51 @@ def test_writes_training_returns_eps(self): np.array([12.0, 13.0, 16.0, 16.0]), ) + def test_writes_training_returns_frame(self): + self.experiment.train(episodes=4) + np.testing.assert_equal( + self.experiment._logger.data["eval/returns/frame"]["steps"], + np.array([49, 53, 65, 65]), + ) + np.testing.assert_equal( + self.experiment._logger.data["eval/returns/frame"]["values"], + np.array([12.0, 13.0, 16.0, 16.0]), + ) + + def test_writes_training_episode_length(self): + self.experiment.train(episodes=4) + np.testing.assert_equal( + self.experiment._logger.data["eval/episode_length"]["steps"], + np.array([49, 53, 65, 65]), + ) + np.testing.assert_equal( + self.experiment._logger.data["eval/episode_length"]["values"], + np.array([12.0, 13.0, 16.0, 16.0]), + ) + def test_writes_test_returns(self): self.experiment.train(episodes=5) returns = self.experiment.test(episodes=4) self.assertEqual(len(returns), 4) np.testing.assert_equal( - self.experiment._logger.data["summary/returns-test/mean"]["values"], + self.experiment._logger.data["summary/test_returns/mean"]["values"], + np.array([np.mean(returns)]), + ) + np.testing.assert_equal( + self.experiment._logger.data["summary/test_returns/std"]["values"], + np.array([np.std(returns)]), + ) + + def test_writes_test_episode_length(self): + self.experiment.train(episodes=5) + returns = self.experiment.test(episodes=4) + self.assertEqual(len(returns), 4) + np.testing.assert_equal( + self.experiment._logger.data["summary/test_episode_length/mean"]["values"], np.array([np.mean(returns)]), ) np.testing.assert_equal( - self.experiment._logger.data["summary/returns-test/std"]["values"], + self.experiment._logger.data["summary/test_episode_length/std"]["values"], np.array([np.std(returns)]), ) diff --git a/all/experiments/single_env_experiment.py b/all/experiments/single_env_experiment.py index 1b12df9b..49dd269a 100644 --- a/all/experiments/single_env_experiment.py +++ b/all/experiments/single_env_experiment.py @@ -48,11 +48,13 @@ def train(self, frames=np.inf, episodes=np.inf): def test(self, episodes=100): test_agent = self._preset.test_agent() returns = [] + episode_lengths = [] for episode in range(episodes): - episode_return = self._run_test_episode(test_agent) + episode_return, episode_length = self._run_test_episode(test_agent) returns.append(episode_return) - self._log_test_episode(episode, episode_return) - self._log_test(returns) + episode_lengths.append(episode_length) + self._log_test_episode(episode, episode_return, episode_length) + self._log_test(returns, episode_lengths) return returns def _run_training_episode(self): @@ -64,6 +66,7 @@ def _run_training_episode(self): state = self._env.reset() action = self._agent.act(state) returns = 0 + episode_length = 0 # loop until the episode is finished while not state.done: @@ -72,6 +75,7 @@ def _run_training_episode(self): state = self._env.step(action) action = self._agent.act(state) returns += state.reward + episode_length += 1 self._frame += 1 # stop the timer @@ -79,7 +83,7 @@ def _run_training_episode(self): fps = (self._frame - start_frame) / (end_time - start_time) # log the results - self._log_training_episode(returns, fps) + self._log_training_episode(returns, episode_length, fps) # update experiment state self._episode += 1 @@ -89,6 +93,7 @@ def _run_test_episode(self, test_agent): state = self._env.reset() action = test_agent.act(state) returns = 0 + episode_length = 0 # loop until the episode is finished while not state.done: @@ -97,8 +102,9 @@ def _run_test_episode(self, test_agent): state = self._env.step(action) action = test_agent.act(state) returns += state.reward + episode_length += 1 - return returns + return returns, episode_length def _done(self, frames, episodes): return self._frame > frames or self._episode > episodes diff --git a/all/experiments/single_env_experiment_test.py b/all/experiments/single_env_experiment_test.py index 43979bdf..e2e6adc6 100644 --- a/all/experiments/single_env_experiment_test.py +++ b/all/experiments/single_env_experiment_test.py @@ -71,7 +71,19 @@ def test_adds_custom_name(self): ) self.assertEqual(experiment._logger.label, "dqn_CartPole-v0") - def test_writes_training_returns_eps(self): + def test_writes_training_returns_frame(self): + experiment = MockExperiment(self.make_preset(), self.env, quiet=True) + experiment.train(episodes=3) + np.testing.assert_equal( + experiment._logger.data["eval/returns/frame"]["values"], + np.array([22., 17., 28.]), + ) + np.testing.assert_equal( + experiment._logger.data["eval/returns/frame"]["steps"], + np.array([23, 40, 68]), + ) + + def test_writes_training_returns_episode(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=3) np.testing.assert_equal( @@ -83,6 +95,18 @@ def test_writes_training_returns_eps(self): np.array([1, 2, 3]), ) + def test_writes_training_episode_length(self): + experiment = MockExperiment(self.make_preset(), self.env, quiet=True) + experiment.train(episodes=3) + np.testing.assert_equal( + experiment._logger.data["eval/episode_length"]["values"], + np.array([22, 17, 28]), + ) + np.testing.assert_equal( + experiment._logger.data["eval/episode_length"]["steps"], + np.array([23, 40, 68]), + ) + def test_writes_test_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=5) @@ -91,16 +115,37 @@ def test_writes_test_returns(self): expected_std = 0.5 np.testing.assert_equal(np.mean(returns), expected_mean) np.testing.assert_equal( - experiment._logger.data["summary/returns-test/mean"]["values"], + experiment._logger.data["summary/test_returns/mean"]["values"], + np.array([expected_mean]), + ) + np.testing.assert_approx_equal( + np.array(experiment._logger.data["summary/test_returns/std"]["values"]), + np.array([expected_std]), + significant=4, + ) + np.testing.assert_equal( + experiment._logger.data["summary/test_returns/mean"]["steps"], + np.array([93]), + ) + + def test_writes_test_episode_length(self): + experiment = MockExperiment(self.make_preset(), self.env, quiet=True) + experiment.train(episodes=5) + returns = experiment.test(episodes=4) + expected_mean = 8.5 + expected_std = 0.5 + np.testing.assert_equal(np.mean(returns), expected_mean) + np.testing.assert_equal( + experiment._logger.data["summary/test_returns/mean"]["values"], np.array([expected_mean]), ) np.testing.assert_approx_equal( - np.array(experiment._logger.data["summary/returns-test/std"]["values"]), + np.array(experiment._logger.data["summary/test_returns/std"]["values"]), np.array([expected_std]), significant=4, ) np.testing.assert_equal( - experiment._logger.data["summary/returns-test/mean"]["steps"], + experiment._logger.data["summary/test_returns/mean"]["steps"], np.array([93]), ) diff --git a/all/experiments/watch_test.py b/all/experiments/watch_test.py index 607d56d4..6327cd69 100644 --- a/all/experiments/watch_test.py +++ b/all/experiments/watch_test.py @@ -1,8 +1,8 @@ -from .watch import load_and_watch import unittest from unittest import mock import torch from all.environments import GymEnvironment +from all.experiments.watch import load_and_watch class MockAgent(): @@ -22,7 +22,12 @@ def test_agent(self): class WatchTest(unittest.TestCase): @mock.patch('torch.load', lambda filename: MockPreset(filename)) @mock.patch('time.sleep', mock.MagicMock()) + @mock.patch('sys.stdout', mock.MagicMock()) def test_load_and_watch(self): env = mock.MagicMock(GymEnvironment("CartPole-v0", render_mode="rgb_array")) load_and_watch("file.name", env, n_episodes=3) self.assertEqual(env.reset.call_count, 4) + + +if __name__ == "__main__": + unittest.main() From 851a52016cf8db63429fffc93372724236c22312 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 11 Feb 2024 15:51:38 -0500 Subject: [PATCH 09/26] add entropy_backups hyperparameter to SAC (#296) * add entropy_backups hyperparameter to SAC * run formatter --- all/agents/sac.py | 9 +++++++-- all/presets/continuous/sac.py | 8 +++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/all/agents/sac.py b/all/agents/sac.py index 9c6f262c..c81e47ca 100644 --- a/all/agents/sac.py +++ b/all/agents/sac.py @@ -35,6 +35,7 @@ def __init__(self, q2, replay_buffer, discount_factor=0.99, + entropy_backups=True, entropy_target=-2., lr_temperature=1e-4, minibatch_size=32, @@ -51,6 +52,7 @@ def __init__(self, self.logger = logger # hyperparameters self.discount_factor = discount_factor + self.entropy_backups = entropy_backups self.entropy_target = entropy_target self.lr_temperature = lr_temperature self.minibatch_size = minibatch_size @@ -73,13 +75,16 @@ def _train(self): if self._should_train(): # sample from replay buffer (states, actions, rewards, next_states, _) = self.replay_buffer.sample(self.minibatch_size) + discount_factor = self.discount_factor # compute targets for Q and V next_actions, next_log_probs = self.policy.no_grad(next_states) - q_targets = rewards + self.discount_factor * (torch.min( + q_targets = rewards + discount_factor * torch.min( self.q1.target(next_states, next_actions), self.q2.target(next_states, next_actions), - ) - self.temperature * next_log_probs) + ) + if self.entropy_backups: + q_targets -= discount_factor * self.temperature * next_log_probs # update Q and V-functions q1_loss = mse_loss(self.q1(states, actions), q_targets) diff --git a/all/presets/continuous/sac.py b/all/presets/continuous/sac.py index e1dc0ffc..f4d9a76f 100644 --- a/all/presets/continuous/sac.py +++ b/all/presets/continuous/sac.py @@ -28,6 +28,7 @@ # Exploration settings "temperature_initial": 0.1, "lr_temperature": 1e-5, + "entropy_backups": True, "entropy_target_scaling": 1., # Model construction "q1_model_constructor": fc_q, @@ -120,13 +121,14 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): q1, q2, replay_buffer, - temperature_initial=self.hyperparameters["temperature_initial"], + discount_factor=self.hyperparameters["discount_factor"], + entropy_backups=self.hyperparameters["entropy_backups"], entropy_target=(-self.action_space.shape[0] * self.hyperparameters["entropy_target_scaling"]), lr_temperature=self.hyperparameters["lr_temperature"], + minibatch_size=self.hyperparameters["minibatch_size"], replay_start_size=self.hyperparameters["replay_start_size"], - discount_factor=self.hyperparameters["discount_factor"], + temperature_initial=self.hyperparameters["temperature_initial"], update_frequency=self.hyperparameters["update_frequency"], - minibatch_size=self.hyperparameters["minibatch_size"], logger=logger )) From 5ecb76c75273e03eb81179d7127c4eff9ab5b193 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 11 Feb 2024 16:36:57 -0500 Subject: [PATCH 10/26] Refactor/formatting (#299) * use black formatter * add isort * remove unused imports * add missing isort import --- Makefile | 7 +- all/__init__.py | 34 +- all/agents/__init__.py | 1 - all/agents/_agent.py | 1 + all/agents/_multiagent.py | 1 + all/agents/_parallel_agent.py | 1 + all/agents/a2c.py | 31 +- all/agents/c51.py | 39 +- all/agents/ddpg.py | 45 +- all/agents/ddqn.py | 40 +- all/agents/dqn.py | 41 +- all/agents/independent.py | 2 +- all/agents/ppo.py | 61 +- all/agents/sac.py | 64 +- all/agents/vac.py | 14 +- all/agents/vpg.py | 36 +- all/agents/vqn.py | 11 +- all/agents/vsarsa.py | 9 +- all/approximation/__init__.py | 9 +- all/approximation/approximation.py | 74 +- all/approximation/checkpointer/__init__.py | 3 +- all/approximation/feature_network.py | 11 +- all/approximation/feature_network_test.py | 52 +- all/approximation/identity.py | 8 +- all/approximation/identity_test.py | 10 +- all/approximation/q_continuous.py | 17 +- all/approximation/q_dist.py | 20 +- all/approximation/q_dist_test.py | 1126 +++++++++++++++-- all/approximation/q_network.py | 17 +- all/approximation/q_network_test.py | 53 +- all/approximation/target/__init__.py | 2 + all/approximation/target/abstract.py | 2 +- all/approximation/target/fixed.py | 2 + all/approximation/target/polyak.py | 12 +- all/approximation/target/trivial.py | 1 + all/approximation/v_network.py | 16 +- all/approximation/v_network_test.py | 26 +- all/bodies/__init__.py | 8 +- all/bodies/atari.py | 22 +- all/bodies/rewards.py | 5 +- all/bodies/time.py | 23 +- all/bodies/time_test.py | 110 +- all/bodies/vision.py | 20 +- all/core/__init__.py | 4 +- all/core/state.py | 5 +- all/core/state_test.py | 205 +-- all/environments/__init__.py | 2 +- all/environments/_multiagent_environment.py | 42 +- all/environments/atari.py | 26 +- all/environments/atari_test.py | 17 +- all/environments/atari_wrappers.py | 47 +- all/environments/duplicate_env.py | 21 +- all/environments/duplicate_env_test.py | 63 +- all/environments/gym.py | 34 +- all/environments/gym_test.py | 16 +- all/environments/mujoco_test.py | 11 +- all/environments/multiagent_atari.py | 20 +- all/environments/multiagent_atari_test.py | 62 +- all/environments/multiagent_pettingzoo.py | 42 +- .../multiagent_pettingzoo_test.py | 63 +- all/environments/pybullet.py | 6 +- all/environments/pybullet_test.py | 17 +- all/environments/vector_env.py | 44 +- all/environments/vector_env_test.py | 70 +- all/experiments/__init__.py | 10 +- all/experiments/experiment.py | 61 +- all/experiments/multiagent_env_experiment.py | 76 +- .../multiagent_env_experiment_test.py | 99 +- all/experiments/parallel_env_experiment.py | 50 +- .../parallel_env_experiment_test.py | 4 +- all/experiments/plots.py | 7 +- all/experiments/run_experiment.py | 25 +- all/experiments/single_env_experiment.py | 38 +- all/experiments/single_env_experiment_test.py | 6 +- all/experiments/slurm.py | 76 +- all/experiments/watch.py | 5 +- all/experiments/watch_test.py | 12 +- all/logging/__init__.py | 3 +- all/logging/_logger.py | 24 +- all/logging/dummy.py | 2 +- all/logging/experiment.py | 55 +- all/memory/__init__.py | 8 +- all/memory/advantage.py | 23 +- all/memory/advantage_test.py | 112 +- all/memory/generalized_advantage.py | 44 +- all/memory/generalized_advantage_test.py | 89 +- all/memory/replay_buffer.py | 65 +- all/memory/replay_buffer_test.py | 42 +- all/memory/segment_tree.py | 17 +- all/nn/__init__.py | 26 +- all/nn/nn_test.py | 4 +- all/optim/__init__.py | 2 +- all/optim/scheduler.py | 16 +- all/optim/scheduler_test.py | 6 +- all/policies/__init__.py | 6 +- all/policies/deterministic.py | 23 +- all/policies/deterministic_test.py | 22 +- all/policies/gaussian.py | 21 +- all/policies/gaussian_test.py | 20 +- all/policies/greedy.py | 33 +- all/policies/soft_deterministic.py | 22 +- all/policies/soft_deterministic_test.py | 19 +- all/policies/softmax.py | 17 +- all/policies/softmax_test.py | 34 +- all/presets/__init__.py | 11 +- all/presets/atari/__init__.py | 21 +- all/presets/atari/a2c.py | 54 +- all/presets/atari/c51.py | 61 +- all/presets/atari/ddqn.py | 73 +- all/presets/atari/dqn.py | 66 +- all/presets/atari/models/__init__.py | 34 +- all/presets/atari/ppo.py | 60 +- all/presets/atari/rainbow.py | 82 +- all/presets/atari/vac.py | 54 +- all/presets/atari/vpg.py | 55 +- all/presets/atari/vqn.py | 46 +- all/presets/atari/vsarsa.py | 46 +- all/presets/atari_test.py | 37 +- all/presets/builder.py | 43 +- all/presets/builder_test.py | 41 +- all/presets/classic_control/__init__.py | 20 +- all/presets/classic_control/a2c.py | 45 +- all/presets/classic_control/c51.py | 49 +- all/presets/classic_control/ddqn.py | 50 +- all/presets/classic_control/dqn.py | 53 +- all/presets/classic_control/ppo.py | 62 +- all/presets/classic_control/rainbow.py | 76 +- all/presets/classic_control/vac.py | 55 +- all/presets/classic_control/vpg.py | 59 +- all/presets/classic_control/vqn.py | 49 +- all/presets/classic_control/vsarsa.py | 53 +- all/presets/classic_control_test.py | 26 +- all/presets/continuous/__init__.py | 10 +- all/presets/continuous/ddpg.py | 64 +- all/presets/continuous/models/__init__.py | 8 +- all/presets/continuous/ppo.py | 94 +- all/presets/continuous/sac.py | 88 +- all/presets/continuous_test.py | 39 +- all/presets/independent_multiagent.py | 26 +- all/presets/multiagent_atari_test.py | 20 +- all/presets/preset.py | 7 +- all/scripts/atari.py | 13 +- all/scripts/classic.py | 15 +- all/scripts/continuous.py | 18 +- all/scripts/multiagent_atari.py | 20 +- all/scripts/plot.py | 10 +- all/scripts/release.py | 23 +- all/scripts/watch_atari.py | 2 +- all/scripts/watch_classic.py | 5 +- all/scripts/watch_continuous.py | 5 +- all/scripts/watch_multiagent_atari.py | 10 +- benchmarks/atari40.py | 17 +- benchmarks/pybullet.py | 23 +- examples/experiment.py | 21 +- examples/slurm_experiment.py | 22 +- integration/atari_test.py | 54 +- integration/classic_control_test.py | 6 +- integration/continuous_test.py | 6 +- integration/multiagent_atari_test.py | 17 +- integration/validate_agent.py | 9 +- setup.py | 22 +- 161 files changed, 3887 insertions(+), 2296 deletions(-) diff --git a/Makefile b/Makefile index f2c5e5ef..cac991d1 100644 --- a/Makefile +++ b/Makefile @@ -11,10 +11,13 @@ integration-test: python -m unittest discover -s integration -p "*test.py" lint: - flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all + black --check all benchmarks examples integration setup.py + isort --profile black --check all benchmarks examples integration setup.py + flake8 --select "F401" all benchmarks examples integration setup.py format: - autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all + black all benchmarks examples integration setup.py + isort --profile black all benchmarks examples integration setup.py tensorboard: tensorboard --logdir runs diff --git a/all/__init__.py b/all/__init__.py index 3bfd56fc..0432db2e 100644 --- a/all/__init__.py +++ b/all/__init__.py @@ -1,26 +1,16 @@ -import all.agents -import all.approximation -import all.core -import all.environments -import all.logging -import all.memory -import all.nn -import all.optim -import all.policies -import all.presets from all.core import State, StateArray __all__ = [ - 'agents', - 'approximation', - 'core', - 'environments', - 'logging', - 'memory', - 'nn', - 'optim', - 'policies', - 'presets', - 'State', - 'StateArray' + "agents", + "approximation", + "core", + "environments", + "logging", + "memory", + "nn", + "optim", + "policies", + "presets", + "State", + "StateArray", ] diff --git a/all/agents/__init__.py b/all/agents/__init__.py index 2811a056..7a62e6b0 100644 --- a/all/agents/__init__.py +++ b/all/agents/__init__.py @@ -15,7 +15,6 @@ from .vqn import VQN, VQNTestAgent from .vsarsa import VSarsa, VSarsaTestAgent - __all__ = [ # Agent interfaces "Agent", diff --git a/all/agents/_agent.py b/all/agents/_agent.py index 45038f14..ed4cb357 100644 --- a/all/agents/_agent.py +++ b/all/agents/_agent.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod + from all.optim import Schedulable diff --git a/all/agents/_multiagent.py b/all/agents/_multiagent.py index 11f4a18d..e98e0e40 100644 --- a/all/agents/_multiagent.py +++ b/all/agents/_multiagent.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod + from all.optim import Schedulable diff --git a/all/agents/_parallel_agent.py b/all/agents/_parallel_agent.py index b98ae618..cba3b42d 100644 --- a/all/agents/_parallel_agent.py +++ b/all/agents/_parallel_agent.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod + from all.optim import Schedulable diff --git a/all/agents/a2c.py b/all/agents/a2c.py index 082b9309..ccc97d48 100644 --- a/all/agents/a2c.py +++ b/all/agents/a2c.py @@ -1,7 +1,8 @@ -import torch from torch.nn.functional import mse_loss + from all.logging import DummyLogger from all.memory import NStepAdvantageBuffer + from ._agent import Agent from ._parallel_agent import ParallelAgent @@ -28,15 +29,15 @@ class A2C(ParallelAgent): """ def __init__( - self, - features, - v, - policy, - discount_factor=0.99, - entropy_loss_scaling=0.01, - n_envs=None, - n_steps=4, - logger=DummyLogger() + self, + features, + v, + policy, + discount_factor=0.99, + entropy_loss_scaling=0.01, + n_envs=None, + n_steps=4, + logger=DummyLogger(), ): if n_envs is None: raise RuntimeError("Must specify n_envs.") @@ -80,7 +81,9 @@ def _train(self, next_states): value_loss = mse_loss(values, targets) policy_gradient_loss = -(distribution.log_prob(actions) * advantages).mean() entropy_loss = -distribution.entropy().mean() - policy_loss = policy_gradient_loss + self.entropy_loss_scaling * entropy_loss + policy_loss = ( + policy_gradient_loss + self.entropy_loss_scaling * entropy_loss + ) loss = value_loss + policy_loss # backward pass @@ -90,8 +93,8 @@ def _train(self, next_states): self.features.step() # record metrics - self.logger.add_info('entropy', -entropy_loss) - self.logger.add_info('normalized_value_error', value_loss / targets.var()) + self.logger.add_info("entropy", -entropy_loss) + self.logger.add_info("normalized_value_error", value_loss / targets.var()) def _make_buffer(self): return NStepAdvantageBuffer( @@ -99,7 +102,7 @@ def _make_buffer(self): self.features, self.n_steps, self.n_envs, - discount_factor=self.discount_factor + discount_factor=self.discount_factor, ) diff --git a/all/agents/c51.py b/all/agents/c51.py index 8bdbcc96..3a1650b6 100644 --- a/all/agents/c51.py +++ b/all/agents/c51.py @@ -1,6 +1,8 @@ -import torch import numpy as np +import torch + from all.logging import DummyLogger + from ._agent import Agent @@ -26,16 +28,16 @@ class C51(Agent): """ def __init__( - self, - q_dist, - replay_buffer, - discount_factor=0.99, - eps=1e-5, - exploration=0.02, - minibatch_size=32, - replay_start_size=5000, - update_frequency=1, - logger=DummyLogger(), + self, + q_dist, + replay_buffer, + discount_factor=0.99, + eps=1e-5, + exploration=0.02, + minibatch_size=32, + replay_start_size=5000, + update_frequency=1, + logger=DummyLogger(), ): # objects self.q_dist = q_dist @@ -81,7 +83,9 @@ def _best_actions(self, probs): def _train(self): if self._should_train(): # sample transitions from buffer - states, actions, rewards, next_states, weights = self.replay_buffer.sample(self.minibatch_size) + states, actions, rewards, next_states, weights = self.replay_buffer.sample( + self.minibatch_size + ) # forward pass dist = self.q_dist(states, actions) # compute target distribution @@ -100,14 +104,15 @@ def _train(self): def _should_train(self): self._frames_seen += 1 - return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0 + return ( + self._frames_seen > self.replay_start_size + and self._frames_seen % self.update_frequency == 0 + ) def _compute_target_dist(self, states, rewards): actions = self._best_actions(self.q_dist.no_grad(states)) dist = self.q_dist.target(states, actions) - shifted_atoms = ( - rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms - ) + shifted_atoms = rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms return self.q_dist.project(dist, shifted_atoms) def _kl(self, dist, target_dist): @@ -117,7 +122,7 @@ def _kl(self, dist, target_dist): class C51TestAgent(Agent): - def __init__(self, q_dist, n_actions, exploration=0.): + def __init__(self, q_dist, n_actions, exploration=0.0): self.q_dist = q_dist self.n_actions = n_actions self.exploration = exploration diff --git a/all/agents/ddpg.py b/all/agents/ddpg.py index 6a01f0ae..76bb5712 100644 --- a/all/agents/ddpg.py +++ b/all/agents/ddpg.py @@ -1,6 +1,7 @@ import torch from torch.distributions.normal import Normal from torch.nn.functional import mse_loss + from ._agent import Agent @@ -28,17 +29,18 @@ class DDPG(Agent): update_frequency (int): Number of timesteps per training update. """ - def __init__(self, - q, - policy, - replay_buffer, - action_space, - discount_factor=0.99, - minibatch_size=32, - noise=0.1, - replay_start_size=5000, - update_frequency=1, - ): + def __init__( + self, + q, + policy, + replay_buffer, + action_space, + discount_factor=0.99, + minibatch_size=32, + noise=0.1, + replay_start_size=5000, + update_frequency=1, + ): # objects self.q = q self.policy = policy @@ -49,7 +51,13 @@ def __init__(self, self.minibatch_size = minibatch_size self.discount_factor = discount_factor # private - self._noise = Normal(0, noise * torch.tensor((action_space.high - action_space.low) / 2).to(policy.device)) + self._noise = Normal( + 0, + noise + * torch.tensor((action_space.high - action_space.low) / 2).to( + policy.device + ), + ) self._low = torch.tensor(action_space.low, device=policy.device) self._high = torch.tensor(action_space.high, device=policy.device) self._state = None @@ -76,11 +84,15 @@ def _choose_action(self, state): def _train(self): if self._should_train(): # sample transitions from buffer - (states, actions, rewards, next_states, _) = self.replay_buffer.sample(self.minibatch_size) + (states, actions, rewards, next_states, _) = self.replay_buffer.sample( + self.minibatch_size + ) # train q-network q_values = self.q(states, actions) - targets = rewards + self.discount_factor * self.q.target(next_states, self.policy.target(next_states)) + targets = rewards + self.discount_factor * self.q.target( + next_states, self.policy.target(next_states) + ) loss = mse_loss(q_values, targets) self.q.reinforce(loss) @@ -92,7 +104,10 @@ def _train(self): def _should_train(self): self._frames_seen += 1 - return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0 + return ( + self._frames_seen > self.replay_start_size + and self._frames_seen % self.update_frequency == 0 + ) class DDPGTestAgent(Agent): diff --git a/all/agents/ddqn.py b/all/agents/ddqn.py index 0f9a1acc..f6bd9246 100644 --- a/all/agents/ddqn.py +++ b/all/agents/ddqn.py @@ -1,11 +1,13 @@ import torch + from all.nn import weighted_mse_loss + from ._agent import Agent from .dqn import DQNTestAgent class DDQN(Agent): - ''' + """ Double Deep Q-Network (DDQN). DDQN is an enchancment to DQN that uses a "double Q-style" update, wherein the online network is used to select target actions @@ -24,18 +26,19 @@ class DDQN(Agent): minibatch_size (int): The number of experiences to sample in each training update. replay_start_size (int): Number of experiences in replay buffer when training begins. update_frequency (int): Number of timesteps per training update. - ''' + """ - def __init__(self, - q, - policy, - replay_buffer, - discount_factor=0.99, - loss=weighted_mse_loss, - minibatch_size=32, - replay_start_size=5000, - update_frequency=1, - ): + def __init__( + self, + q, + policy, + replay_buffer, + discount_factor=0.99, + loss=weighted_mse_loss, + minibatch_size=32, + replay_start_size=5000, + update_frequency=1, + ): # objects self.q = q self.policy = policy @@ -64,12 +67,16 @@ def eval(self, state): def _train(self): if self._should_train(): # sample transitions from buffer - (states, actions, rewards, next_states, weights) = self.replay_buffer.sample(self.minibatch_size) + (states, actions, rewards, next_states, weights) = ( + self.replay_buffer.sample(self.minibatch_size) + ) # forward pass values = self.q(states, actions) # compute targets next_actions = torch.argmax(self.q.no_grad(next_states), dim=1) - targets = rewards + self.discount_factor * self.q.target(next_states, next_actions) + targets = rewards + self.discount_factor * self.q.target( + next_states, next_actions + ) # compute loss loss = self.loss(values, targets, weights) # backward pass @@ -80,7 +87,10 @@ def _train(self): def _should_train(self): self._frames_seen += 1 - return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0 + return ( + self._frames_seen > self.replay_start_size + and self._frames_seen % self.update_frequency == 0 + ) DDQNTestAgent = DQNTestAgent diff --git a/all/agents/dqn.py b/all/agents/dqn.py index 0e39df3b..e0a40388 100644 --- a/all/agents/dqn.py +++ b/all/agents/dqn.py @@ -1,11 +1,11 @@ -import numpy as np import torch from torch.nn.functional import mse_loss + from ._agent import Agent class DQN(Agent): - ''' + """ Deep Q-Network (DQN). DQN was one of the original deep reinforcement learning algorithms. It extends the ideas behind Q-learning to work well with modern convolution networks. @@ -25,18 +25,19 @@ class DQN(Agent): n_actions (int): The number of available actions. replay_start_size (int): Number of experiences in replay buffer when training begins. update_frequency (int): Number of timesteps per training update. - ''' + """ - def __init__(self, - q, - policy, - replay_buffer, - discount_factor=0.99, - loss=mse_loss, - minibatch_size=32, - replay_start_size=5000, - update_frequency=1, - ): + def __init__( + self, + q, + policy, + replay_buffer, + discount_factor=0.99, + loss=mse_loss, + minibatch_size=32, + replay_start_size=5000, + update_frequency=1, + ): # objects self.q = q self.policy = policy @@ -65,11 +66,16 @@ def eval(self, state): def _train(self): if self._should_train(): # sample transitions from buffer - (states, actions, rewards, next_states, _) = self.replay_buffer.sample(self.minibatch_size) + (states, actions, rewards, next_states, _) = self.replay_buffer.sample( + self.minibatch_size + ) # forward pass values = self.q(states, actions) # compute targets - targets = rewards + self.discount_factor * torch.max(self.q.target(next_states), dim=1)[0] + targets = ( + rewards + + self.discount_factor * torch.max(self.q.target(next_states), dim=1)[0] + ) # compute loss loss = self.loss(values, targets) # backward pass @@ -77,7 +83,10 @@ def _train(self): def _should_train(self): self._frames_seen += 1 - return (self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0) + return ( + self._frames_seen > self.replay_start_size + and self._frames_seen % self.update_frequency == 0 + ) class DQNTestAgent(Agent): diff --git a/all/agents/independent.py b/all/agents/independent.py index 32c3275f..886d398d 100644 --- a/all/agents/independent.py +++ b/all/agents/independent.py @@ -6,4 +6,4 @@ def __init__(self, agents): self.agents = agents def act(self, state): - return self.agents[state['agent']].act(state) + return self.agents[state["agent"]].act(state) diff --git a/all/agents/ppo.py b/all/agents/ppo.py index d13cde08..ce699279 100644 --- a/all/agents/ppo.py +++ b/all/agents/ppo.py @@ -1,8 +1,9 @@ import torch from torch.nn.functional import mse_loss + from all.logging import DummyLogger from all.memory import GeneralizedAdvantageBuffer -from ._agent import Agent + from ._parallel_agent import ParallelAgent from .a2c import A2CTestAgent @@ -31,20 +32,20 @@ class PPO(ParallelAgent): """ def __init__( - self, - features, - v, - policy, - discount_factor=0.99, - entropy_loss_scaling=0.01, - epochs=4, - epsilon=0.2, - lam=0.95, - minibatches=4, - compute_batch_size=256, - n_envs=None, - n_steps=4, - logger=DummyLogger() + self, + features, + v, + policy, + discount_factor=0.99, + entropy_loss_scaling=0.01, + epochs=4, + epsilon=0.2, + lam=0.95, + minibatches=4, + compute_batch_size=256, + n_envs=None, + n_steps=4, + logger=DummyLogger(), ): if n_envs is None: raise RuntimeError("Must specify n_envs.") @@ -85,10 +86,18 @@ def _train(self, next_states): states, actions, advantages = self._buffer.advantages(next_states) # compute target values - features = states.batch_execute(self.compute_batch_size, self.features.no_grad) - features['actions'] = actions - pi_0 = features.batch_execute(self.compute_batch_size, lambda s: self.policy.no_grad(s).log_prob(s['actions'])) - targets = features.batch_execute(self.compute_batch_size, self.v.no_grad) + advantages + features = states.batch_execute( + self.compute_batch_size, self.features.no_grad + ) + features["actions"] = actions + pi_0 = features.batch_execute( + self.compute_batch_size, + lambda s: self.policy.no_grad(s).log_prob(s["actions"]), + ) + targets = ( + features.batch_execute(self.compute_batch_size, self.v.no_grad) + + advantages + ) # train for several epochs for _ in range(self.epochs): @@ -105,7 +114,9 @@ def _train_epoch(self, states, actions, advantages, targets, pi_0): i = indexes[first:last] # perform a single training step - self._train_minibatch(states[i], actions[i], pi_0[i], advantages[i], targets[i]) + self._train_minibatch( + states[i], actions[i], pi_0[i], advantages[i], targets[i] + ) def _train_minibatch(self, states, actions, pi_0, advantages, targets): # forward pass @@ -116,7 +127,9 @@ def _train_minibatch(self, states, actions, pi_0, advantages, targets): # compute losses value_loss = mse_loss(values, targets) - policy_gradient_loss = self._clipped_policy_gradient_loss(pi_0, pi_i, advantages) + policy_gradient_loss = self._clipped_policy_gradient_loss( + pi_0, pi_i, advantages + ) entropy_loss = -distribution.entropy().mean() policy_loss = policy_gradient_loss + self.entropy_loss_scaling * entropy_loss loss = value_loss + policy_loss @@ -128,8 +141,8 @@ def _train_minibatch(self, states, actions, pi_0, advantages, targets): self.features.step() # debugging - self.logger.add_info('entropy', -entropy_loss) - self.logger.add_info('normalized_value_error', value_loss / targets.var()) + self.logger.add_info("entropy", -entropy_loss) + self.logger.add_info("normalized_value_error", value_loss / targets.var()) def _clipped_policy_gradient_loss(self, pi_0, pi_i, advantages): ratios = torch.exp(pi_i - pi_0) @@ -146,7 +159,7 @@ def _make_buffer(self): self.n_envs, discount_factor=self.discount_factor, lam=self.lam, - compute_batch_size=self.compute_batch_size + compute_batch_size=self.compute_batch_size, ) diff --git a/all/agents/sac.py b/all/agents/sac.py index c81e47ca..ef537c39 100644 --- a/all/agents/sac.py +++ b/all/agents/sac.py @@ -1,6 +1,8 @@ import torch from torch.nn.functional import mse_loss + from all.logging import DummyLogger + from ._agent import Agent @@ -29,21 +31,22 @@ class SAC(Agent): update_frequency (int): Number of timesteps per training update. """ - def __init__(self, - policy, - q1, - q2, - replay_buffer, - discount_factor=0.99, - entropy_backups=True, - entropy_target=-2., - lr_temperature=1e-4, - minibatch_size=32, - replay_start_size=5000, - temperature_initial=0.1, - update_frequency=1, - logger=DummyLogger() - ): + def __init__( + self, + policy, + q1, + q2, + replay_buffer, + discount_factor=0.99, + entropy_backups=True, + entropy_target=-2.0, + lr_temperature=1e-4, + minibatch_size=32, + replay_start_size=5000, + temperature_initial=0.1, + update_frequency=1, + logger=DummyLogger(), + ): # objects self.policy = policy self.q1 = q1 @@ -74,7 +77,9 @@ def act(self, state): def _train(self): if self._should_train(): # sample from replay buffer - (states, actions, rewards, next_states, _) = self.replay_buffer.sample(self.minibatch_size) + (states, actions, rewards, next_states, _) = self.replay_buffer.sample( + self.minibatch_size + ) discount_factor = self.discount_factor # compute targets for Q and V @@ -100,21 +105,28 @@ def _train(self): self.q1.zero_grad() # adjust temperature - temperature_grad = (new_log_probs + self.entropy_target).mean() * self.temperature - self.temperature = max(0, self.temperature + self.lr_temperature * temperature_grad.detach()) + temperature_grad = ( + new_log_probs + self.entropy_target + ).mean() * self.temperature + self.temperature = max( + 0, self.temperature + self.lr_temperature * temperature_grad.detach() + ) # additional debugging info - self.logger.add_info('entropy', -new_log_probs.mean()) - self.logger.add_info('q_values', q_values.mean()) - self.logger.add_info('rewards', rewards.mean()) - self.logger.add_info('normalized_q1_error', q1_loss / q_targets.var()) - self.logger.add_info('normalized_q2_error', q2_loss / q_targets.var()) - self.logger.add_info('temperature', self.temperature) - self.logger.add_info('temperature_grad', temperature_grad) + self.logger.add_info("entropy", -new_log_probs.mean()) + self.logger.add_info("q_values", q_values.mean()) + self.logger.add_info("rewards", rewards.mean()) + self.logger.add_info("normalized_q1_error", q1_loss / q_targets.var()) + self.logger.add_info("normalized_q2_error", q2_loss / q_targets.var()) + self.logger.add_info("temperature", self.temperature) + self.logger.add_info("temperature_grad", temperature_grad) def _should_train(self): self._frames_seen += 1 - return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0 + return ( + self._frames_seen > self.replay_start_size + and self._frames_seen % self.update_frequency == 0 + ) class SACTestAgent(Agent): diff --git a/all/agents/vac.py b/all/agents/vac.py index b11e55ef..60655b1d 100644 --- a/all/agents/vac.py +++ b/all/agents/vac.py @@ -1,11 +1,11 @@ from torch.nn.functional import mse_loss -from ._agent import Agent + from ._parallel_agent import ParallelAgent from .a2c import A2CTestAgent class VAC(ParallelAgent): - ''' + """ Vanilla Actor-Critic (VAC). VAC is an implementation of the actor-critic alogorithm found in the Sutton and Barto (2018) textbook. This implementation tweaks the algorithm slightly by using a shared feature layer. @@ -20,7 +20,7 @@ class VAC(ParallelAgent): n_envs (int): Number of parallel actors/environments n_steps (int): Number of timesteps per rollout. Updates are performed once per rollout. logger (Logger): Used for logging. - ''' + """ def __init__(self, features, v, policy, discount_factor=1): self.features = features @@ -47,12 +47,16 @@ def _train(self, state, reward): values = self.v(self._features) # compute targets - targets = reward + self.discount_factor * self.v.target(self.features.target(state)) + targets = reward + self.discount_factor * self.v.target( + self.features.target(state) + ) advantages = targets - values.detach() # compute losses value_loss = mse_loss(values, targets) - policy_loss = -(advantages * self._distribution.log_prob(self._action)).mean() + policy_loss = -( + advantages * self._distribution.log_prob(self._action) + ).mean() loss = value_loss + policy_loss # backward pass diff --git a/all/agents/vpg.py b/all/agents/vpg.py index e46e09ac..c0b54512 100644 --- a/all/agents/vpg.py +++ b/all/agents/vpg.py @@ -1,12 +1,14 @@ import torch from torch.nn.functional import mse_loss + from all.core import State + from ._agent import Agent from .a2c import A2CTestAgent class VPG(Agent): - ''' + """ Vanilla Policy Gradient (VPG/REINFORCE). VPG (also known as REINFORCE) is the least biased implementation of the policy gradient theorem. It uses complete episode rollouts as unbiased estimates of the Q-function, rather than the n-step @@ -25,16 +27,9 @@ class VPG(Agent): min_batch_size (int): Updates will occurs when an episode ends after at least this many state-action pairs are seen. Set this to a large value in order to train on multiple episodes at once. - ''' - - def __init__( - self, - features, - v, - policy, - discount_factor=0.99, - min_batch_size=1 - ): + """ + + def __init__(self, features, v, policy, discount_factor=0.99, min_batch_size=1): self.features = features self.v = v self.policy = policy @@ -92,21 +87,20 @@ def _terminal(self, state, reward): def _train(self): # forward pass - values = torch.cat([ - self.v(features) - for (features, _, _) - in self._trajectories - ]) + values = torch.cat( + [self.v(features) for (features, _, _) in self._trajectories] + ) # forward passes for log_pis were stored during execution log_pis = torch.cat([log_pis for (_, _, log_pis) in self._trajectories]) # compute targets - targets = torch.cat([ - self._compute_discounted_returns(rewards) - for (_, rewards, _) - in self._trajectories - ]) + targets = torch.cat( + [ + self._compute_discounted_returns(rewards) + for (_, rewards, _) in self._trajectories + ] + ) advantages = targets - values.detach() # compute losses diff --git a/all/agents/vqn.py b/all/agents/vqn.py index b4847b13..49ae9673 100644 --- a/all/agents/vqn.py +++ b/all/agents/vqn.py @@ -1,12 +1,12 @@ import torch from torch.nn.functional import mse_loss + from ._agent import Agent from ._parallel_agent import ParallelAgent -from .dqn import DQNTestAgent class VQN(ParallelAgent): - ''' + """ Vanilla Q-Network (VQN). VQN is an implementation of the Q-learning algorithm found in the Sutton and Barto (2018) textbook. Q-learning algorithms attempt to learning the optimal policy while executing a (generally) @@ -19,7 +19,7 @@ class VQN(ParallelAgent): q (QNetwork): An Approximation of the Q function. policy (GreedyPolicy): A policy derived from the Q-function. discount_factor (float): Discount factor for future rewards. - ''' + """ def __init__(self, q, policy, discount_factor=0.99): self.q = q @@ -43,7 +43,10 @@ def _train(self, reward, next_state): # forward pass value = self.q(self._state, self._action) # compute target - target = reward + self.discount_factor * torch.max(self.q.target(next_state), dim=1)[0] + target = ( + reward + + self.discount_factor * torch.max(self.q.target(next_state), dim=1)[0] + ) # compute loss loss = mse_loss(value, target) # backward pass diff --git a/all/agents/vsarsa.py b/all/agents/vsarsa.py index d0b51646..a96e1b08 100644 --- a/all/agents/vsarsa.py +++ b/all/agents/vsarsa.py @@ -1,10 +1,11 @@ from torch.nn.functional import mse_loss + from ._parallel_agent import ParallelAgent from .vqn import VQNTestAgent class VSarsa(ParallelAgent): - ''' + """ Vanilla SARSA (VSarsa). SARSA (State-Action-Reward-State-Action) is an on-policy alternative to Q-learning. Unlike Q-learning, SARSA attempts to learn the Q-function for the current policy rather than the optimal policy. This @@ -15,7 +16,7 @@ class VSarsa(ParallelAgent): q (QNetwork): An Approximation of the Q function. policy (GreedyPolicy): A policy derived from the Q-function. discount_factor (float): Discount factor for future rewards. - ''' + """ def __init__(self, q, policy, discount_factor=0.99): self.q = q @@ -39,7 +40,9 @@ def _train(self, reward, next_state, next_action): # forward pass value = self.q(self._state, self._action) # compute target - target = reward + self.discount_factor * self.q.target(next_state, next_action) + target = reward + self.discount_factor * self.q.target( + next_state, next_action + ) # compute loss loss = mse_loss(value, target) # backward pass diff --git a/all/approximation/__init__.py b/all/approximation/__init__.py index 2890f66c..89865f0e 100644 --- a/all/approximation/__init__.py +++ b/all/approximation/__init__.py @@ -1,13 +1,12 @@ from .approximation import Approximation +from .checkpointer import Checkpointer, DummyCheckpointer, PeriodicCheckpointer +from .feature_network import FeatureNetwork +from .identity import Identity from .q_continuous import QContinuous from .q_dist import QDist from .q_network import QNetwork +from .target import FixedTarget, PolyakTarget, TargetNetwork, TrivialTarget from .v_network import VNetwork -from .feature_network import FeatureNetwork -from .identity import Identity -from .target import TargetNetwork, FixedTarget, PolyakTarget, TrivialTarget -from .checkpointer import Checkpointer, DummyCheckpointer, PeriodicCheckpointer - __all__ = [ "Approximation", diff --git a/all/approximation/approximation.py b/all/approximation/approximation.py index 3c346650..84a03966 100644 --- a/all/approximation/approximation.py +++ b/all/approximation/approximation.py @@ -1,15 +1,18 @@ import os + import torch from torch.nn import utils + from all.logging import DummyLogger -from .target import TrivialTarget + from .checkpointer import DummyCheckpointer +from .target import TrivialTarget DEFAULT_CHECKPOINT_FREQUENCY = 200 -class Approximation(): - ''' +class Approximation: + """ Base function approximation object. This defines a Pytorch-based function approximation object that @@ -48,20 +51,20 @@ class Approximation(): logger (all.logging.Logger:, optional): A Logger object used for logging. The standard object logs to tensorboard, however, other types of Logger objects may be implemented by the user. - ''' + """ def __init__( - self, - model, - optimizer=None, - checkpointer=None, - clip_grad=0, - device=None, - loss_scaling=1, - name='approximation', - scheduler=None, - target=None, - logger=DummyLogger(), + self, + model, + optimizer=None, + checkpointer=None, + clip_grad=0, + device=None, + loss_scaling=1, + name="approximation", + scheduler=None, + target=None, + logger=DummyLogger(), ): self.model = model self.device = device if device else next(model.parameters()).device @@ -79,27 +82,24 @@ def __init__( if checkpointer is None: checkpointer = DummyCheckpointer() self._checkpointer = checkpointer - self._checkpointer.init( - self.model, - os.path.join(logger.log_dir, name + '.pt') - ) + self._checkpointer.init(self.model, os.path.join(logger.log_dir, name + ".pt")) def __call__(self, *inputs): - ''' + """ Run a forward pass of the model. - ''' + """ return self.model(*inputs) def no_grad(self, *inputs): - '''Run a forward pass of the model in no_grad mode.''' + """Run a forward pass of the model in no_grad mode.""" with torch.no_grad(): return self.model(*inputs) def eval(self, *inputs): - ''' + """ Run a forward pass of the model in eval mode with no_grad. The model is returned to its previous mode afer the forward pass is made. - ''' + """ with torch.no_grad(): # check current mode mode = self.model.training @@ -112,11 +112,11 @@ def eval(self, *inputs): return result def target(self, *inputs): - '''Run a forward pass of the target network.''' + """Run a forward pass of the target network.""" return self._target(*inputs) def reinforce(self, loss): - ''' + """ Backpropagate the loss through the model and make an update step. Internally, this will perform most of the activities associated with a control loop in standard machine learning environments, depending on the configuration of the object: @@ -127,14 +127,14 @@ def reinforce(self, loss): Returns: self: The current Approximation object - ''' + """ loss = self._loss_scaling * loss loss.backward() self.step(loss=loss) return self def step(self, loss=None): - ''' + """ Given that a backward pass has been made, run an optimization step. Internally, this will perform most of the activities associated with a control loop in standard machine learning environments, depending on the configuration of the object: @@ -145,7 +145,7 @@ def step(self, loss=None): Returns: self: The current Approximation object - ''' + """ if loss is not None: self._logger.add_loss(self._name, loss.detach()) self._clip_grad_norm() @@ -157,22 +157,26 @@ def step(self, loss=None): return self def zero_grad(self): - ''' + """ Clears the gradients of all optimized tensors Returns: self: The current Approximation object - ''' + """ self._optimizer.zero_grad() return self def _clip_grad_norm(self): - '''Clip the gradient norm if set. Raises RuntimeError if norm is non-finite.''' + """Clip the gradient norm if set. Raises RuntimeError if norm is non-finite.""" if self._clip_grad != 0: - utils.clip_grad_norm_(self.model.parameters(), self._clip_grad, error_if_nonfinite=True) + utils.clip_grad_norm_( + self.model.parameters(), self._clip_grad, error_if_nonfinite=True + ) def _step_lr_scheduler(self): - '''Step the . Raises RuntimeError if norm is non-finite.''' + """Step the . Raises RuntimeError if norm is non-finite.""" if self._scheduler: - self._logger.add_schedule(self._name + '/lr', self._optimizer.param_groups[0]['lr']) + self._logger.add_schedule( + self._name + "/lr", self._optimizer.param_groups[0]["lr"] + ) self._scheduler.step() diff --git a/all/approximation/checkpointer/__init__.py b/all/approximation/checkpointer/__init__.py index 36047f9d..495eca2e 100644 --- a/all/approximation/checkpointer/__init__.py +++ b/all/approximation/checkpointer/__init__.py @@ -1,5 +1,6 @@ import warnings -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod + import torch diff --git a/all/approximation/feature_network.py b/all/approximation/feature_network.py index 559cbbe4..af3aea02 100644 --- a/all/approximation/feature_network.py +++ b/all/approximation/feature_network.py @@ -1,14 +1,15 @@ import torch + from .approximation import Approximation class FeatureNetwork(Approximation): - ''' + """ An Approximation that accepts a state updates the observation key based on the given model. - ''' + """ - def __init__(self, model, optimizer=None, name='feature', **kwargs): + def __init__(self, model, optimizer=None, name="feature", **kwargs): model = FeatureModule(model) super().__init__(model, optimizer, name=name, **kwargs) @@ -19,5 +20,5 @@ def __init__(self, model): self.model = model def forward(self, states): - features = states.as_output(self.model(states.as_input('observation'))) - return states.update('observation', features) + features = states.as_output(self.model(states.as_input("observation"))) + return states.update("observation", features) diff --git a/all/approximation/feature_network_test.py b/all/approximation/feature_network_test.py index e8190e99..8a76b42d 100644 --- a/all/approximation/feature_network_test.py +++ b/all/approximation/feature_network_test.py @@ -1,10 +1,11 @@ import unittest + import torch -from torch import nn import torch_testing as tt -from all.core import State -from all.approximation.feature_network import FeatureNetwork +from torch import nn +from all.approximation.feature_network import FeatureNetwork +from all.core import State STATE_DIM = 2 @@ -16,20 +17,21 @@ def setUp(self): optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1) self.features = FeatureNetwork(self.model, optimizer) - self.states = State({ - 'observation': torch.randn(3, STATE_DIM), - 'mask': torch.tensor([1, 0, 1]) - }) - self.expected_features = State({ - 'observation': torch.tensor( - [ - [-0.2385, -0.7263, -0.0340], - [-0.3569, -0.6612, 0.3485], - [-0.0296, -0.7566, -0.4624], - ] - ), - 'mask': torch.tensor([1, 0, 1]) - }) + self.states = State( + {"observation": torch.randn(3, STATE_DIM), "mask": torch.tensor([1, 0, 1])} + ) + self.expected_features = State( + { + "observation": torch.tensor( + [ + [-0.2385, -0.7263, -0.0340], + [-0.3569, -0.6612, 0.3485], + [-0.0296, -0.7566, -0.4624], + ] + ), + "mask": torch.tensor([1, 0, 1]), + } + ) def test_forward(self): features = self.features(self.states) @@ -41,14 +43,14 @@ def test_backward(self): loss = torch.sum(states.observation) self.features.reinforce(loss) features = self.features(self.states) - expected = State({ - 'observation': torch.tensor([ - [-0.71, -1.2, -0.5], - [-0.72, -1.03, -0.02], - [-0.57, -1.3, -1.01] - ]), - 'mask': torch.tensor([1, 0, 1]), - }) + expected = State( + { + "observation": torch.tensor( + [[-0.71, -1.2, -0.5], [-0.72, -1.03, -0.02], [-0.57, -1.3, -1.01]] + ), + "mask": torch.tensor([1, 0, 1]), + } + ) self.assert_state_equal(features, expected) def test_eval(self): diff --git a/all/approximation/identity.py b/all/approximation/identity.py index 77353b9b..db4bb87a 100644 --- a/all/approximation/identity.py +++ b/all/approximation/identity.py @@ -1,16 +1,16 @@ -import torch from torch import nn + from .approximation import Approximation class Identity(Approximation): - ''' + """ An Approximation that represents the identity function. Because the model has no parameters, reinforce and step do nothing. - ''' + """ - def __init__(self, device, name='identity', **kwargs): + def __init__(self, device, name="identity", **kwargs): super().__init__(nn.Identity(), None, device=device, name=name, **kwargs) def reinforce(self): diff --git a/all/approximation/identity_test.py b/all/approximation/identity_test.py index 7af58afd..a5ef493b 100644 --- a/all/approximation/identity_test.py +++ b/all/approximation/identity_test.py @@ -1,13 +1,15 @@ import unittest + import torch import torch_testing as tt + +from all.approximation import FixedTarget, Identity from all.core import State -from all.approximation import Identity, FixedTarget class TestIdentityNetwork(unittest.TestCase): def setUp(self): - self.model = Identity('cpu', target=FixedTarget(10)) + self.model = Identity("cpu", target=FixedTarget(10)) def test_forward_tensor(self): inputs = torch.tensor([1, 2, 3]) @@ -15,9 +17,7 @@ def test_forward_tensor(self): tt.assert_equal(inputs, outputs) def test_forward_state(self): - inputs = State({ - 'observation': torch.tensor([1, 2, 3]) - }) + inputs = State({"observation": torch.tensor([1, 2, 3])}) outputs = self.model(inputs) self.assertEqual(inputs, outputs) diff --git a/all/approximation/q_continuous.py b/all/approximation/q_continuous.py index 29dc754d..c53cee3c 100644 --- a/all/approximation/q_continuous.py +++ b/all/approximation/q_continuous.py @@ -1,23 +1,14 @@ import torch + from all.nn import RLNetwork + from .approximation import Approximation class QContinuous(Approximation): - def __init__( - self, - model, - optimizer, - name='q', - **kwargs - ): + def __init__(self, model, optimizer, name="q", **kwargs): model = QContinuousModule(model) - super().__init__( - model, - optimizer, - name=name, - **kwargs - ) + super().__init__(model, optimizer, name=name, **kwargs) class QContinuousModule(RLNetwork): diff --git a/all/approximation/q_dist.py b/all/approximation/q_dist.py index eb195169..c2e9a854 100644 --- a/all/approximation/q_dist.py +++ b/all/approximation/q_dist.py @@ -1,20 +1,22 @@ import torch from torch.nn import functional as F + from all import nn + from .approximation import Approximation class QDist(Approximation): def __init__( - self, - model, - optimizer, - n_actions, - n_atoms, - v_min, - v_max, - name="q_dist", - **kwargs + self, + model, + optimizer, + n_actions, + n_atoms, + v_min, + v_max, + name="q_dist", + **kwargs ): device = next(model.parameters()).device self.n_actions = n_actions diff --git a/all/approximation/q_dist_test.py b/all/approximation/q_dist_test.py index 778db4ee..06df95a4 100644 --- a/all/approximation/q_dist_test.py +++ b/all/approximation/q_dist_test.py @@ -1,9 +1,11 @@ import unittest + import torch -from torch import nn import torch_testing as tt -from all.core import StateArray +from torch import nn + from all.approximation import QDist +from all.core import StateArray STATE_DIM = 1 ACTIONS = 2 @@ -73,7 +75,9 @@ def test_single_q_values(self): ) def test_done(self): - states = StateArray(torch.randn((3, STATE_DIM)), (3,), mask=torch.tensor([1, 0, 1])) + states = StateArray( + torch.randn((3, STATE_DIM)), (3,), mask=torch.tensor([1, 0, 1]) + ) probs = self.q(states) self.assertEqual(probs.shape, (3, ACTIONS, ATOMS)) tt.assert_almost_equal( @@ -134,141 +138,997 @@ def _loss(dist, target_dist): def test_project_dist(self): # This gave problems in the past between different cuda version, # so a test was added. - q = QDist(self.model, self.optimizer, ACTIONS, 51, -10., 10.) - dist = torch.tensor([ - [0.0190, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0201, - 0.0203, 0.0189, 0.0190, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0200, 0.0202, 0.0191, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0201, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0201, 0.0192], - [0.0191, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0201, - 0.0203, 0.0190, 0.0190, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0200, 0.0202, 0.0191, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0200, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0200, 0.0192], - [0.0191, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0200, - 0.0203, 0.0190, 0.0191, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0199, 0.0202, 0.0192, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0200, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0200, 0.0192] - ]) - support = torch.tensor([ - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030], - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030], - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030] - ]) - expected = torch.tensor([ - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0201, 0.0195, 0.0201, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0205, 0.0208, 0.0197, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0207, 0.0200, 0.0049], - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0202, 0.0196, 0.0201, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0205, 0.0208, 0.0197, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0206, 0.0200, 0.0049], - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0202, 0.0196, 0.0202, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0204, 0.0208, 0.0198, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0206, 0.0200, 0.0049] - ]) - tt.assert_almost_equal(q.project(dist, support).cpu(), expected.cpu(), decimal=3) + q = QDist(self.model, self.optimizer, ACTIONS, 51, -10.0, 10.0) + dist = torch.tensor( + [ + [ + 0.0190, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0201, + 0.0203, + 0.0189, + 0.0190, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0200, + 0.0202, + 0.0191, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0201, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0201, + 0.0192, + ], + [ + 0.0191, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0201, + 0.0203, + 0.0190, + 0.0190, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0200, + 0.0202, + 0.0191, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0200, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0200, + 0.0192, + ], + [ + 0.0191, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0200, + 0.0203, + 0.0190, + 0.0191, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0199, + 0.0202, + 0.0192, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0200, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0200, + 0.0192, + ], + ] + ) + support = torch.tensor( + [ + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + ] + ) + expected = torch.tensor( + [ + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0201, + 0.0195, + 0.0201, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0205, + 0.0208, + 0.0197, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0207, + 0.0200, + 0.0049, + ], + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0202, + 0.0196, + 0.0201, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0205, + 0.0208, + 0.0197, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0206, + 0.0200, + 0.0049, + ], + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0202, + 0.0196, + 0.0202, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0204, + 0.0208, + 0.0198, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0206, + 0.0200, + 0.0049, + ], + ] + ) + tt.assert_almost_equal( + q.project(dist, support).cpu(), expected.cpu(), decimal=3 + ) def test_project_dist_cuda(self): if torch.cuda.is_available(): # This gave problems in the past between different cuda version, # so a test was added. - q = QDist(self.model.cuda(), self.optimizer, ACTIONS, 51, -10., 10.) - dist = torch.tensor([ - [0.0190, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0201, - 0.0203, 0.0189, 0.0190, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0200, 0.0202, 0.0191, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0201, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0201, 0.0192], - [0.0191, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0201, - 0.0203, 0.0190, 0.0190, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0200, 0.0202, 0.0191, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0200, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0200, 0.0192], - [0.0191, 0.0197, 0.0200, 0.0190, 0.0195, 0.0198, 0.0194, 0.0192, 0.0200, - 0.0203, 0.0190, 0.0191, 0.0199, 0.0193, 0.0192, 0.0199, 0.0198, 0.0197, - 0.0193, 0.0198, 0.0192, 0.0191, 0.0199, 0.0202, 0.0192, 0.0202, 0.0198, - 0.0200, 0.0198, 0.0193, 0.0192, 0.0202, 0.0192, 0.0194, 0.0199, 0.0197, - 0.0197, 0.0200, 0.0199, 0.0190, 0.0192, 0.0195, 0.0202, 0.0194, 0.0203, - 0.0201, 0.0190, 0.0192, 0.0201, 0.0200, 0.0192] - ]).cuda() - support = torch.tensor([ - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030], - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030], - [-9.7030, -9.3149, -8.9268, -8.5386, -8.1505, -7.7624, -7.3743, -6.9862, - -6.5980, -6.2099, -5.8218, -5.4337, -5.0456, -4.6574, -4.2693, -3.8812, - -3.4931, -3.1050, -2.7168, -2.3287, -1.9406, -1.5525, -1.1644, -0.7762, - -0.3881, 0.0000, 0.3881, 0.7762, 1.1644, 1.5525, 1.9406, 2.3287, - 2.7168, 3.1050, 3.4931, 3.8812, 4.2693, 4.6574, 5.0456, 5.4337, - 5.8218, 6.2099, 6.5980, 6.9862, 7.3743, 7.7624, 8.1505, 8.5386, - 8.9268, 9.3149, 9.7030] - ]).cuda() - expected = torch.tensor([ - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0201, 0.0195, 0.0201, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0205, 0.0208, 0.0197, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0207, 0.0200, 0.0049], - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0202, 0.0196, 0.0201, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0205, 0.0208, 0.0197, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0206, 0.0200, 0.0049], - [0.0049, 0.0198, 0.0204, 0.0202, 0.0198, 0.0202, 0.0202, 0.0199, 0.0202, - 0.0208, 0.0202, 0.0196, 0.0202, 0.0201, 0.0198, 0.0203, 0.0204, 0.0203, - 0.0200, 0.0203, 0.0199, 0.0197, 0.0204, 0.0208, 0.0198, 0.0214, 0.0204, - 0.0206, 0.0203, 0.0199, 0.0199, 0.0206, 0.0198, 0.0201, 0.0204, 0.0203, - 0.0204, 0.0206, 0.0201, 0.0197, 0.0199, 0.0204, 0.0204, 0.0205, 0.0208, - 0.0200, 0.0197, 0.0204, 0.0206, 0.0200, 0.0049] - ]) - tt.assert_almost_equal(q.project(dist, support).cpu(), expected.cpu(), decimal=3) + q = QDist(self.model.cuda(), self.optimizer, ACTIONS, 51, -10.0, 10.0) + dist = torch.tensor( + [ + [ + 0.0190, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0201, + 0.0203, + 0.0189, + 0.0190, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0200, + 0.0202, + 0.0191, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0201, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0201, + 0.0192, + ], + [ + 0.0191, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0201, + 0.0203, + 0.0190, + 0.0190, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0200, + 0.0202, + 0.0191, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0200, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0200, + 0.0192, + ], + [ + 0.0191, + 0.0197, + 0.0200, + 0.0190, + 0.0195, + 0.0198, + 0.0194, + 0.0192, + 0.0200, + 0.0203, + 0.0190, + 0.0191, + 0.0199, + 0.0193, + 0.0192, + 0.0199, + 0.0198, + 0.0197, + 0.0193, + 0.0198, + 0.0192, + 0.0191, + 0.0199, + 0.0202, + 0.0192, + 0.0202, + 0.0198, + 0.0200, + 0.0198, + 0.0193, + 0.0192, + 0.0202, + 0.0192, + 0.0194, + 0.0199, + 0.0197, + 0.0197, + 0.0200, + 0.0199, + 0.0190, + 0.0192, + 0.0195, + 0.0202, + 0.0194, + 0.0203, + 0.0201, + 0.0190, + 0.0192, + 0.0201, + 0.0200, + 0.0192, + ], + ] + ).cuda() + support = torch.tensor( + [ + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + [ + -9.7030, + -9.3149, + -8.9268, + -8.5386, + -8.1505, + -7.7624, + -7.3743, + -6.9862, + -6.5980, + -6.2099, + -5.8218, + -5.4337, + -5.0456, + -4.6574, + -4.2693, + -3.8812, + -3.4931, + -3.1050, + -2.7168, + -2.3287, + -1.9406, + -1.5525, + -1.1644, + -0.7762, + -0.3881, + 0.0000, + 0.3881, + 0.7762, + 1.1644, + 1.5525, + 1.9406, + 2.3287, + 2.7168, + 3.1050, + 3.4931, + 3.8812, + 4.2693, + 4.6574, + 5.0456, + 5.4337, + 5.8218, + 6.2099, + 6.5980, + 6.9862, + 7.3743, + 7.7624, + 8.1505, + 8.5386, + 8.9268, + 9.3149, + 9.7030, + ], + ] + ).cuda() + expected = torch.tensor( + [ + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0201, + 0.0195, + 0.0201, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0205, + 0.0208, + 0.0197, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0207, + 0.0200, + 0.0049, + ], + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0202, + 0.0196, + 0.0201, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0205, + 0.0208, + 0.0197, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0206, + 0.0200, + 0.0049, + ], + [ + 0.0049, + 0.0198, + 0.0204, + 0.0202, + 0.0198, + 0.0202, + 0.0202, + 0.0199, + 0.0202, + 0.0208, + 0.0202, + 0.0196, + 0.0202, + 0.0201, + 0.0198, + 0.0203, + 0.0204, + 0.0203, + 0.0200, + 0.0203, + 0.0199, + 0.0197, + 0.0204, + 0.0208, + 0.0198, + 0.0214, + 0.0204, + 0.0206, + 0.0203, + 0.0199, + 0.0199, + 0.0206, + 0.0198, + 0.0201, + 0.0204, + 0.0203, + 0.0204, + 0.0206, + 0.0201, + 0.0197, + 0.0199, + 0.0204, + 0.0204, + 0.0205, + 0.0208, + 0.0200, + 0.0197, + 0.0204, + 0.0206, + 0.0200, + 0.0049, + ], + ] + ) + tt.assert_almost_equal( + q.project(dist, support).cpu(), expected.cpu(), decimal=3 + ) if __name__ == "__main__": diff --git a/all/approximation/q_network.py b/all/approximation/q_network.py index 6cbe999c..1da6635d 100644 --- a/all/approximation/q_network.py +++ b/all/approximation/q_network.py @@ -1,23 +1,14 @@ import torch + from all.nn import RLNetwork + from .approximation import Approximation class QNetwork(Approximation): - def __init__( - self, - model, - optimizer=None, - name='q', - **kwargs - ): + def __init__(self, model, optimizer=None, name="q", **kwargs): model = QModule(model) - super().__init__( - model, - optimizer, - name=name, - **kwargs - ) + super().__init__(model, optimizer, name=name, **kwargs) class QModule(RLNetwork): diff --git a/all/approximation/q_network_test.py b/all/approximation/q_network_test.py index dee5a061..911715ea 100644 --- a/all/approximation/q_network_test.py +++ b/all/approximation/q_network_test.py @@ -1,11 +1,13 @@ import unittest + +import numpy as np import torch +import torch_testing as tt from torch import nn from torch.nn.functional import smooth_l1_loss -import torch_testing as tt -import numpy as np + +from all.approximation import FixedTarget, QNetwork from all.core import State, StateArray -from all.approximation import QNetwork, FixedTarget STATE_DIM = 2 ACTIONS = 3 @@ -14,31 +16,30 @@ class TestQNetwork(unittest.TestCase): def setUp(self): torch.manual_seed(2) - self.model = nn.Sequential( - nn.Linear(STATE_DIM, ACTIONS) - ) + self.model = nn.Sequential(nn.Linear(STATE_DIM, ACTIONS)) def optimizer(params): return torch.optim.SGD(params, lr=0.1) + self.q = QNetwork(self.model, optimizer) def test_eval_list(self): states = StateArray( - torch.randn(5, STATE_DIM), - (5,), - mask=torch.tensor([1, 1, 0, 1, 0]) + torch.randn(5, STATE_DIM), (5,), mask=torch.tensor([1, 1, 0, 1, 0]) ) result = self.q.eval(states) tt.assert_almost_equal( result, - torch.tensor([ - [-0.238509, -0.726287, -0.034026], - [-0.35688755, -0.6612102, 0.34849477], - [0., 0., 0.], - [0.1944, -0.5536, -0.2345], - [0., 0., 0.] - ]), - decimal=2 + torch.tensor( + [ + [-0.238509, -0.726287, -0.034026], + [-0.35688755, -0.6612102, 0.34849477], + [0.0, 0.0, 0.0], + [0.1944, -0.5536, -0.2345], + [0.0, 0.0, 0.0], + ] + ), + decimal=2, ) def test_eval_actions(self): @@ -46,20 +47,16 @@ def test_eval_actions(self): actions = [1, 2, 0] result = self.q.eval(states, actions) self.assertEqual(result.shape, torch.Size([3])) - tt.assert_almost_equal(result, torch.tensor([-0.7262873, 0.3484948, -0.0296164])) + tt.assert_almost_equal( + result, torch.tensor([-0.7262873, 0.3484948, -0.0296164]) + ) def test_target_net(self): torch.manual_seed(2) - model = nn.Sequential( - nn.Linear(1, 1) - ) + model = nn.Sequential(nn.Linear(1, 1)) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) - q = QNetwork( - model, - optimizer, - target=FixedTarget(3) - ) - inputs = State(torch.tensor([1.])) + q = QNetwork(model, optimizer, target=FixedTarget(3)) + inputs = State(torch.tensor([1.0])) def loss(policy_value): target = policy_value - 1 @@ -95,5 +92,5 @@ def loss(policy_value): np.testing.assert_equal(target_value, -0.6085841655731201) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/approximation/target/__init__.py b/all/approximation/target/__init__.py index c81dadbd..451384e4 100644 --- a/all/approximation/target/__init__.py +++ b/all/approximation/target/__init__.py @@ -2,3 +2,5 @@ from .fixed import FixedTarget from .polyak import PolyakTarget from .trivial import TrivialTarget + +__all__ = ["TargetNetwork", "FixedTarget", "PolyakTarget", "TrivialTarget"] diff --git a/all/approximation/target/abstract.py b/all/approximation/target/abstract.py index 31453816..411e2685 100644 --- a/all/approximation/target/abstract.py +++ b/all/approximation/target/abstract.py @@ -1,4 +1,4 @@ -from abc import abstractmethod, ABC +from abc import ABC, abstractmethod class TargetNetwork(ABC): diff --git a/all/approximation/target/fixed.py b/all/approximation/target/fixed.py index 7f6c576d..756ee996 100644 --- a/all/approximation/target/fixed.py +++ b/all/approximation/target/fixed.py @@ -1,5 +1,7 @@ import copy + import torch + from .abstract import TargetNetwork diff --git a/all/approximation/target/polyak.py b/all/approximation/target/polyak.py index 2d89c1e4..ae9271ff 100644 --- a/all/approximation/target/polyak.py +++ b/all/approximation/target/polyak.py @@ -1,10 +1,12 @@ import copy + import torch + from .abstract import TargetNetwork class PolyakTarget(TargetNetwork): - '''TargetNetwork that updates using polyak averaging''' + """TargetNetwork that updates using polyak averaging""" def __init__(self, rate): self._source = None @@ -20,5 +22,9 @@ def init(self, model): self._target = copy.deepcopy(model) def update(self): - for target_param, source_param in zip(self._target.parameters(), self._source.parameters()): - target_param.data.copy_(target_param.data * (1.0 - self._rate) + source_param.data * self._rate) + for target_param, source_param in zip( + self._target.parameters(), self._source.parameters() + ): + target_param.data.copy_( + target_param.data * (1.0 - self._rate) + source_param.data * self._rate + ) diff --git a/all/approximation/target/trivial.py b/all/approximation/target/trivial.py index 2e95b8d7..7f8cbaec 100644 --- a/all/approximation/target/trivial.py +++ b/all/approximation/target/trivial.py @@ -1,4 +1,5 @@ import torch + from .abstract import TargetNetwork diff --git a/all/approximation/v_network.py b/all/approximation/v_network.py index 6df364c1..9947e6c6 100644 --- a/all/approximation/v_network.py +++ b/all/approximation/v_network.py @@ -1,22 +1,12 @@ from all.nn import RLNetwork + from .approximation import Approximation class VNetwork(Approximation): - def __init__( - self, - model, - optimizer, - name='v', - **kwargs - ): + def __init__(self, model, optimizer, name="v", **kwargs): model = VModule(model) - super().__init__( - model, - optimizer, - name=name, - **kwargs - ) + super().__init__(model, optimizer, name=name, **kwargs) class VModule(RLNetwork): diff --git a/all/approximation/v_network_test.py b/all/approximation/v_network_test.py index cc281ed7..3c05320c 100644 --- a/all/approximation/v_network_test.py +++ b/all/approximation/v_network_test.py @@ -1,7 +1,9 @@ import unittest + import torch -from torch import nn import torch_testing as tt +from torch import nn + from all.approximation.v_network import VNetwork from all.core import StateArray @@ -16,31 +18,29 @@ def loss(value, error): class TestVNetwork(unittest.TestCase): def setUp(self): torch.manual_seed(2) - self.model = nn.Sequential( - nn.Linear(STATE_DIM, 1) - ) + self.model = nn.Sequential(nn.Linear(STATE_DIM, 1)) optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1) self.v = VNetwork(self.model, optimizer) def test_reinforce_list(self): states = StateArray( - torch.randn(5, STATE_DIM), - (5,), - mask=torch.tensor([1, 1, 0, 1, 0]) + torch.randn(5, STATE_DIM), (5,), mask=torch.tensor([1, 1, 0, 1, 0]) ) result = self.v(states) - tt.assert_almost_equal(result, torch.tensor([0.7053187, 0.3975691, 0., 0.2701665, 0.])) + tt.assert_almost_equal( + result, torch.tensor([0.7053187, 0.3975691, 0.0, 0.2701665, 0.0]) + ) self.v.reinforce(loss(result, torch.tensor([1, -1, 1, 1, 1])).float()) result = self.v(states) - tt.assert_almost_equal(result, torch.tensor([0.9732854, 0.5453826, 0., 0.4344811, 0.])) + tt.assert_almost_equal( + result, torch.tensor([0.9732854, 0.5453826, 0.0, 0.4344811, 0.0]) + ) def test_multi_reinforce(self): states = StateArray( - torch.randn(6, STATE_DIM), - (6,), - mask=torch.tensor([1, 1, 0, 1, 0, 0, 0]) + torch.randn(6, STATE_DIM), (6,), mask=torch.tensor([1, 1, 0, 1, 0, 0, 0]) ) result1 = self.v(states[0:2]) self.v.reinforce(loss(result1, torch.tensor([1, 2])).float()) @@ -52,5 +52,5 @@ def test_multi_reinforce(self): self.v.reinforce(loss(result3, torch.tensor([1, 2])).float()) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/bodies/__init__.py b/all/bodies/__init__.py index 52fd4ab8..926f461d 100644 --- a/all/bodies/__init__.py +++ b/all/bodies/__init__.py @@ -4,10 +4,4 @@ from .time import TimeFeature from .vision import FrameStack -__all__ = [ - "Body", - "ClipRewards", - "DeepmindAtariBody", - "FrameStack", - "TimeFeature" -] +__all__ = ["Body", "ClipRewards", "DeepmindAtariBody", "FrameStack", "TimeFeature"] diff --git a/all/bodies/atari.py b/all/bodies/atari.py index ecd8c7ca..0a560743 100644 --- a/all/bodies/atari.py +++ b/all/bodies/atari.py @@ -1,11 +1,19 @@ import torch + from ._body import Body from .rewards import ClipRewards from .vision import FrameStack class DeepmindAtariBody(Body): - def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4, clip_rewards=True): + def __init__( + self, + agent, + lazy_frames=False, + episodic_lives=True, + frame_stack=4, + clip_rewards=True, + ): if frame_stack > 1: agent = FrameStack(agent, lazy=lazy_frames, size=frame_stack) if clip_rewards: @@ -17,19 +25,19 @@ def __init__(self, agent, lazy_frames=False, episodic_lives=True, frame_stack=4, class EpisodicLives(Body): def process_state(self, state): - if 'life_lost' not in state: + if "life_lost" not in state: return state if len(state.shape) == 0: - if state['life_lost']: - return state.update('mask', 0.) + if state["life_lost"]: + return state.update("mask", 0.0) return state masks = [None] * len(state) - life_lost = state['life_lost'] + life_lost = state["life_lost"] for i, old_mask in enumerate(state.mask): if life_lost[i]: - masks[i] = 0. + masks[i] = 0.0 else: masks[i] = old_mask - return state.update('mask', torch.tensor(masks, device=state.device)) + return state.update("mask", torch.tensor(masks, device=state.device)) diff --git a/all/bodies/rewards.py b/all/bodies/rewards.py index 653bb508..8feededd 100644 --- a/all/bodies/rewards.py +++ b/all/bodies/rewards.py @@ -1,11 +1,12 @@ -import torch import numpy as np +import torch + from ._body import Body class ClipRewards(Body): def process_state(self, state): - return state.update('reward', self._clip(state.reward)) + return state.update("reward", self._clip(state.reward)) def _clip(self, reward): if torch.is_tensor(reward): diff --git a/all/bodies/time.py b/all/bodies/time.py index a11acc21..1e818908 100644 --- a/all/bodies/time.py +++ b/all/bodies/time.py @@ -1,5 +1,7 @@ import torch + from all.core import StateArray + from ._body import Body @@ -13,18 +15,23 @@ def process_state(self, state): if isinstance(state, StateArray): if self.timestep is None: self.timestep = torch.zeros(state.shape, device=state.device) - observation = torch.cat((state.observation, self.scale * self.timestep.view(-1, 1)), dim=1) - state = state.update('observation', observation) + observation = torch.cat( + (state.observation, self.scale * self.timestep.view(-1, 1)), dim=1 + ) + state = state.update("observation", observation) self.timestep = state.mask.float() * (self.timestep + 1) return state if self.timestep is None: self.timestep = 0 - state.update('timestep', self.timestep) - observation = torch.cat(( - state.observation, - torch.tensor(self.scale * self.timestep, device=state.device).view(-1) - ), dim=0) - state = state.update('observation', observation) + state.update("timestep", self.timestep) + observation = torch.cat( + ( + state.observation, + torch.tensor(self.scale * self.timestep, device=state.device).view(-1), + ), + dim=0, + ) + state = state.update("observation", observation) self.timestep = state.mask * (self.timestep + 1) return state diff --git a/all/bodies/time_test.py b/all/bodies/time_test.py index 31dc3ad8..d0971278 100644 --- a/all/bodies/time_test.py +++ b/all/bodies/time_test.py @@ -1,11 +1,13 @@ import unittest + import torch import torch_testing as tt -from all.core import State, StateArray + from all.bodies import TimeFeature +from all.core import State, StateArray -class TestAgent(): +class TestAgent: def __init__(self): self.last_state = None @@ -23,57 +25,101 @@ def setUp(self): def test_init(self): state = State(torch.randn(4)) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), + atol=1e-04, + ) def test_single_env(self): state = State(torch.randn(4)) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 0.]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 0.0]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), + atol=1e-04, + ) def test_reset(self): state = State(torch.randn(4)) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), + atol=1e-04, + ) self.agent.act(State(state.observation, done=True)) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 2e-3]), + atol=1e-04, + ) self.agent.act(State(state.observation)) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 0.0000]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([0.3923, -0.2236, -0.3195, -1.2050, 1e-3]), + atol=1e-04, + ) def test_multi_env(self): state = StateArray(torch.randn(2, 2), (2,)) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [[0.3923, -0.2236, 0.], [-0.3195, -1.2050, 0.]]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([[0.3923, -0.2236, 0.0], [-0.3195, -1.2050, 0.0]]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [[0.3923, -0.2236, 1e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04) - self.agent.act(StateArray(state.observation, (2,), done=torch.tensor([False, True]))) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [[0.3923, -0.2236, 2e-3], [-0.3195, -1.2050, 2e-3]]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([[0.3923, -0.2236, 1e-3], [-0.3195, -1.2050, 1e-3]]), + atol=1e-04, + ) + self.agent.act( + StateArray(state.observation, (2,), done=torch.tensor([False, True])) + ) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([[0.3923, -0.2236, 2e-3], [-0.3195, -1.2050, 2e-3]]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [[0.3923, -0.2236, 3e-3], [-0.3195, -1.2050, 0.]]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([[0.3923, -0.2236, 3e-3], [-0.3195, -1.2050, 0.0]]), + atol=1e-04, + ) self.agent.act(state) - tt.assert_allclose(self.test_agent.last_state.observation, torch.tensor( - [[0.3923, -0.2236, 4e-3], [-0.3195, -1.2050, 1e-3]]), atol=1e-04) + tt.assert_allclose( + self.test_agent.last_state.observation, + torch.tensor([[0.3923, -0.2236, 4e-3], [-0.3195, -1.2050, 1e-3]]), + atol=1e-04, + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/bodies/vision.py b/all/bodies/vision.py index 99ffd4db..82b7e696 100644 --- a/all/bodies/vision.py +++ b/all/bodies/vision.py @@ -1,5 +1,7 @@ import torch + from all.core import State, StateArray + from ._body import Body @@ -19,15 +21,15 @@ def process_state(self, state): if self._lazy: return LazyState.from_state(state, self._frames, self._to_cache) if isinstance(state, StateArray): - return state.update('observation', torch.cat(self._frames, dim=1)) - return state.update('observation', torch.cat(self._frames, dim=0)) + return state.update("observation", torch.cat(self._frames, dim=1)) + return state.update("observation", torch.cat(self._frames, dim=0)) class TensorDeviceCache: - ''' + """ To efficiently implement device trasfer of lazy states, this class caches the transfered tensor so that it is not copied multiple times. - ''' + """ def __init__(self, max_size=16): self.max_size = max_size @@ -54,11 +56,11 @@ class LazyState(State): def from_state(cls, state, frames, to_cache): state = LazyState(state, device=frames[0].device) state.to_cache = to_cache - state['observation'] = frames + state["observation"] = frames return state def __getitem__(self, key): - if key == 'observation': + if key == "observation": v = dict.__getitem__(self, key) if torch.is_tensor(v): return v @@ -71,7 +73,7 @@ def update(self, key, value): if not k == key: x[k] = dict.__getitem__(self, k) x[key] = value - state = LazyState.from_state(x, x['observation'], self.to_cache) + state = LazyState.from_state(x, x["observation"], self.to_cache) return state def to(self, device): @@ -79,12 +81,12 @@ def to(self, device): return self x = {} for key, value in self.items(): - if key == 'observation': + if key == "observation": x[key] = [self.to_cache.convert(v, device) for v in value] # x[key] = [v.to(device) for v in value]#torch.cat(value,axis=0).to(device) elif torch.is_tensor(value): x[key] = value.to(device) else: x[key] = value - state = LazyState.from_state(x, x['observation'], self.to_cache) + state = LazyState.from_state(x, x["observation"], self.to_cache) return state diff --git a/all/core/__init__.py b/all/core/__init__.py index 540c3800..9e173a13 100644 --- a/all/core/__init__.py +++ b/all/core/__init__.py @@ -1,3 +1,3 @@ -from .state import State, StateArray, MultiagentState +from .state import MultiagentState, State, StateArray -__all__ = ['State', 'StateArray', 'MultiagentState'] +__all__ = ["State", "StateArray", "MultiagentState"] diff --git a/all/core/state.py b/all/core/state.py index 29e785ae..12681441 100644 --- a/all/core/state.py +++ b/all/core/state.py @@ -1,6 +1,7 @@ +import warnings + import numpy as np import torch -import warnings class State(dict): @@ -334,7 +335,7 @@ def update(self, key, value): def as_input(self, key): value = self[key] return value.view( - (np.prod(self.shape), *value.shape[len(self.shape):]) + (np.prod(self.shape), *value.shape[len(self.shape) :]) ).float() def as_output(self, tensor): diff --git a/all/core/state_test.py b/all/core/state_test.py index 6670b553..00c2c4ee 100644 --- a/all/core/state_test.py +++ b/all/core/state_test.py @@ -1,8 +1,10 @@ import unittest import warnings + import numpy as np import torch import torch_testing as tt + from all.core import State, StateArray @@ -11,86 +13,75 @@ def test_constructor_defaults(self): observation = torch.randn(3, 4) state = State(observation) tt.assert_equal(state.observation, observation) - self.assertEqual(state.mask, 1.) + self.assertEqual(state.mask, 1.0) self.assertEqual(state.done, False) - self.assertEqual(state.reward, 0.) + self.assertEqual(state.reward, 0.0) self.assertEqual(state.shape, ()) def test_from_dict(self): observation = torch.randn(3, 4) - state = State({ - 'observation': observation, - 'done': True, - 'mask': 1, - 'reward': 5. - }) + state = State( + {"observation": observation, "done": True, "mask": 1, "reward": 5.0} + ) tt.assert_equal(state.observation, observation) self.assertEqual(state.done, True) - self.assertEqual(state.mask, 1.) - self.assertEqual(state.reward, 5.) + self.assertEqual(state.mask, 1.0) + self.assertEqual(state.reward, 5.0) def test_auto_mask_true(self): observation = torch.randn(3, 4) - state = State({ - 'observation': observation, - 'done': True, - 'reward': 5. - }) - self.assertEqual(state.mask, 0.) + state = State({"observation": observation, "done": True, "reward": 5.0}) + self.assertEqual(state.mask, 0.0) def test_auto_mask_false(self): observation = torch.randn(3, 4) - state = State({ - 'observation': observation, - 'done': False, - 'reward': 5. - }) - self.assertEqual(state.mask, 1.) + state = State({"observation": observation, "done": False, "reward": 5.0}) + self.assertEqual(state.mask, 1.0) def test_from_gym_reset(self): observation = np.array([1, 2, 3]) - state = State.from_gym((observation, {'coolInfo': 3})) + state = State.from_gym((observation, {"coolInfo": 3})) tt.assert_equal(state.observation, torch.from_numpy(observation)) - self.assertEqual(state.mask, 1.) + self.assertEqual(state.mask, 1.0) self.assertEqual(state.done, False) - self.assertEqual(state.reward, 0.) + self.assertEqual(state.reward, 0.0) self.assertEqual(state.shape, ()) - self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state["coolInfo"], 3.0) def test_from_gym_step(self): observation = np.array([1, 2, 3]) - state = State.from_gym((observation, 2., True, False, {'coolInfo': 3.})) + state = State.from_gym((observation, 2.0, True, False, {"coolInfo": 3.0})) tt.assert_equal(state.observation, torch.from_numpy(observation)) - self.assertEqual(state.mask, 0.) + self.assertEqual(state.mask, 0.0) self.assertEqual(state.done, True) - self.assertEqual(state.reward, 2.) - self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.reward, 2.0) + self.assertEqual(state["coolInfo"], 3.0) self.assertEqual(state.shape, ()) def test_from_truncated_gym_step(self): observation = np.array([1, 2, 3]) - state = State.from_gym((observation, 2., False, True, {'coolInfo': 3.})) + state = State.from_gym((observation, 2.0, False, True, {"coolInfo": 3.0})) tt.assert_equal(state.observation, torch.from_numpy(observation)) - self.assertEqual(state.mask, 1.) + self.assertEqual(state.mask, 1.0) self.assertEqual(state.done, True) - self.assertEqual(state.reward, 2.) - self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.reward, 2.0) + self.assertEqual(state["coolInfo"], 3.0) self.assertEqual(state.shape, ()) def test_legacy_gym_step(self): observation = np.array([1, 2, 3]) - state = State.from_gym((observation, 2., True, {'coolInfo': 3.})) + state = State.from_gym((observation, 2.0, True, {"coolInfo": 3.0})) tt.assert_equal(state.observation, torch.from_numpy(observation)) - self.assertEqual(state.mask, 0.) + self.assertEqual(state.mask, 0.0) self.assertEqual(state.done, True) - self.assertEqual(state.reward, 2.) - self.assertEqual(state['coolInfo'], 3.) + self.assertEqual(state.reward, 2.0) + self.assertEqual(state["coolInfo"], 3.0) self.assertEqual(state.shape, ()) def test_as_input(self): observation = torch.randn(3, 4) state = State(observation) - self.assertEqual(state.as_input('observation').shape, (1, 3, 4)) + self.assertEqual(state.as_input("observation").shape, (1, 3, 4)) def test_as_output(self): observation = torch.randn(3, 4) @@ -100,30 +91,30 @@ def test_as_output(self): def test_apply_mask(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, False, {})) + state = State.from_gym((observation, 0.0, True, False, {})) tt.assert_equal(state.apply_mask(observation), torch.zeros(3, 4)) def test_apply(self): observation = torch.randn(3, 4) state = State(observation) model = torch.nn.Conv1d(3, 5, 2) - output = state.apply(model, 'observation') + output = state.apply(model, "observation") self.assertEqual(output.shape, (5, 3)) self.assertNotEqual(output.sum().item(), 0) def test_apply_done(self): observation = torch.randn(3, 4) - state = State.from_gym((observation, 0., True, False, {})) + state = State.from_gym((observation, 0.0, True, False, {})) model = torch.nn.Conv1d(3, 5, 2) - output = state.apply(model, 'observation') + output = state.apply(model, "observation") self.assertEqual(output.shape, (5, 3)) self.assertEqual(output.sum().item(), 0) def test_to_device(self): observation = torch.randn(3, 4) - state = State(observation, device=torch.device('cpu')) + state = State(observation, device=torch.device("cpu")) state_cpu = state.to("cpu") - self.assertTrue(torch.equal(state['observation'], state_cpu['observation'])) + self.assertTrue(torch.equal(state["observation"], state_cpu["observation"])) self.assertFalse(state is state_cpu) @@ -132,23 +123,23 @@ def test_constructor_defaults(self): raw = torch.randn(3, 4) state = State(raw, (3,)) tt.assert_equal(state.observation, raw) - self.assertEqual(state.mask, 1.) + self.assertEqual(state.mask, 1.0) self.assertEqual(state.done, False) - self.assertEqual(state.reward, 0.) + self.assertEqual(state.reward, 0.0) def test_apply(self): observation = torch.randn(3, 4) state = StateArray(observation, (3,)) model = torch.nn.Linear(4, 2) - output = state.apply(model, 'observation') + output = state.apply(model, "observation") self.assertEqual(output.shape, (3, 2)) self.assertNotEqual(output.sum().item(), 0) def test_apply_done(self): observation = torch.randn(3, 4) - state = StateArray(observation, (3,), mask=torch.tensor([0., 0., 0.])) + state = StateArray(observation, (3,), mask=torch.tensor([0.0, 0.0, 0.0])) model = torch.nn.Linear(4, 2) - output = state.apply(model, 'observation') + output = state.apply(model, "observation") self.assertEqual(output.shape, (3, 2)) self.assertEqual(output.sum().item(), 0) @@ -160,17 +151,19 @@ def test_as_output(self): def test_auto_mask(self): observation = torch.randn(3, 4) - state = StateArray({ - 'observation': observation, - 'done': torch.tensor([True, False, True]), - }, (3,)) - tt.assert_equal(state.mask, torch.tensor([0., 1., 0.])) + state = StateArray( + { + "observation": observation, + "done": torch.tensor([True, False, True]), + }, + (3,), + ) + tt.assert_equal(state.mask, torch.tensor([0.0, 1.0, 0.0])) def test_multi_dim(self): - state = StateArray.array([ - State(torch.randn((3, 4))), - State(torch.randn((3, 4))) - ]) + state = StateArray.array( + [State(torch.randn((3, 4))), State(torch.randn((3, 4)))] + ) self.assertEqual(state.shape, (2,)) state = StateArray.array([state] * 3) self.assertEqual(state.shape, (3, 2)) @@ -181,10 +174,9 @@ def test_multi_dim(self): tt.assert_equal(state.reward, torch.zeros((5, 3, 2))) def test_view(self): - state = StateArray.array([ - State(torch.randn((3, 4))), - State(torch.randn((3, 4))) - ]) + state = StateArray.array( + [State(torch.randn((3, 4))), State(torch.randn((3, 4)))] + ) self.assertEqual(state.shape, (2,)) state = StateArray.array([state] * 3) self.assertEqual(state.shape, (3, 2)) @@ -193,28 +185,45 @@ def test_view(self): self.assertEqual(state.observation.shape, (2, 3, 3, 4)) def test_batch_exec(self): - zeros = StateArray.array([ - State(torch.zeros((3, 4))), - State(torch.zeros((3, 4))), - State(torch.zeros((3, 4))) - ]) - ones_state = zeros.batch_execute(2, lambda x: StateArray({'observation': x.observation + 1}, x.shape, x.device)) + zeros = StateArray.array( + [ + State(torch.zeros((3, 4))), + State(torch.zeros((3, 4))), + State(torch.zeros((3, 4))), + ] + ) + ones_state = zeros.batch_execute( + 2, + lambda x: StateArray({"observation": x.observation + 1}, x.shape, x.device), + ) ones_tensor = zeros.batch_execute(2, lambda x: x.observation + 1) self.assertEqual(ones_state.shape, (3,)) self.assertTrue(torch.equal(ones_state.observation, torch.ones((3, 3, 4)))) self.assertTrue(torch.equal(ones_tensor, torch.ones((3, 3, 4)))) def test_cat(self): - i1 = StateArray({'observation': torch.zeros((2, 3, 4)), 'reward': torch.ones((2,))}, shape=(2,)) - i2 = StateArray({'observation': torch.zeros((1, 3, 4)), 'reward': torch.ones((1,))}, shape=(1,)) + i1 = StateArray( + {"observation": torch.zeros((2, 3, 4)), "reward": torch.ones((2,))}, + shape=(2,), + ) + i2 = StateArray( + {"observation": torch.zeros((1, 3, 4)), "reward": torch.ones((1,))}, + shape=(1,), + ) cat = StateArray.cat([i1, i2]) self.assertEqual(cat.shape, (3,)) self.assertEqual(cat.observation.shape, (3, 3, 4)) self.assertEqual(cat.reward.shape, (3,)) def test_cat_axis1(self): - i1 = StateArray({'observation': torch.zeros((2, 3, 4)), 'reward': torch.ones((2, 3))}, shape=(2, 3)) - i2 = StateArray({'observation': torch.zeros((2, 2, 4)), 'reward': torch.ones((2, 2))}, shape=(2, 2)) + i1 = StateArray( + {"observation": torch.zeros((2, 3, 4)), "reward": torch.ones((2, 3))}, + shape=(2, 3), + ) + i2 = StateArray( + {"observation": torch.zeros((2, 2, 4)), "reward": torch.ones((2, 2))}, + shape=(2, 2), + ) cat = StateArray.cat([i1, i2], axis=1) self.assertEqual(cat.shape, (2, 5)) self.assertEqual(cat.observation.shape, (2, 5, 4)) @@ -223,33 +232,41 @@ def test_cat_axis1(self): def test_key_error(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - StateArray.array([ - State({ - 'observation': torch.tensor([1, 2]), - 'other_key': True - }), - State({ - 'observation': torch.tensor([1, 2]), - }), - ]) + StateArray.array( + [ + State({"observation": torch.tensor([1, 2]), "other_key": True}), + State( + { + "observation": torch.tensor([1, 2]), + } + ), + ] + ) self.assertEqual(len(w), 1) - self.assertEqual(w[0].message.args[0], 'KeyError while creating StateArray for key "other_key", omitting.') + self.assertEqual( + w[0].message.args[0], + 'KeyError while creating StateArray for key "other_key", omitting.', + ) def test_type_error(self): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - StateArray.array([ - State({ - 'observation': torch.tensor([1, 2]), - 'other_key': torch.tensor([1]) - }), - State({ - 'observation': torch.tensor([1, 2]), - 'other_key': 5. - }), - ]) + StateArray.array( + [ + State( + { + "observation": torch.tensor([1, 2]), + "other_key": torch.tensor([1]), + } + ), + State({"observation": torch.tensor([1, 2]), "other_key": 5.0}), + ] + ) self.assertEqual(len(w), 1) - self.assertEqual(w[0].message.args[0], 'TypeError while creating StateArray for key "other_key", omitting.') + self.assertEqual( + w[0].message.args[0], + 'TypeError while creating StateArray for key "other_key", omitting.', + ) if __name__ == "__main__": diff --git a/all/environments/__init__.py b/all/environments/__init__.py index 20ba6fc1..9b1498fa 100644 --- a/all/environments/__init__.py +++ b/all/environments/__init__.py @@ -10,7 +10,6 @@ from .pybullet import PybulletEnvironment from .vector_env import GymVectorEnvironment - __all__ = [ "AtariEnvironment", "DuplicateEnvironment", @@ -22,4 +21,5 @@ "MultiagentPettingZooEnv", "MujocoEnvironment", "PybulletEnvironment", + "VectorEnvironment", ] diff --git a/all/environments/_multiagent_environment.py b/all/environments/_multiagent_environment.py index a409168d..3ecce51b 100644 --- a/all/environments/_multiagent_environment.py +++ b/all/environments/_multiagent_environment.py @@ -2,7 +2,7 @@ class MultiagentEnvironment(ABC): - ''' + """ A multiagent reinforcement learning Environment. The Multiagent variant of the Environment object. @@ -10,20 +10,20 @@ class MultiagentEnvironment(ABC): the states, the actions, the transitions between states, and the rewards given to the agent. Environments are often used to benchmark reinforcement learning agents, or to define real problems that the user hopes to solve using reinforcement learning. - ''' + """ @abstractmethod def reset(self): - ''' + """ Reset the environment and return a new initial state for the first agent. Returns all.core.MultiagentState: The initial state for the next episode. - ''' + """ @abstractmethod def step(self, action): - ''' + """ Apply an action for the current agent and get the multiagent state for the next agent. Parameters: @@ -31,37 +31,37 @@ def step(self, action): Returns: all.core.MultiagentState: The state for the next agent. - ''' + """ @abstractmethod def render(self, **kwargs): - '''Render the current environment state.''' + """Render the current environment state.""" @abstractmethod def close(self): - '''Clean up any extraneous environment objects.''' + """Clean up any extraneous environment objects.""" @abstractmethod def agent_iter(self): - ''' + """ Create an iterable which that the next element is always the name of the agent whose turn it is to act. Returns: An Iterable over Agent strings. - ''' + """ @abstractmethod def last(self): - ''' + """ Get the MultiagentState object for the current agent. Returns: The all.core.MultiagentState object for the current agent. - ''' + """ @abstractmethod def is_done(self, agent): - ''' + """ Determine whether a given agent is done. Args: @@ -69,33 +69,33 @@ def is_done(self, agent): Returns: A boolean representing whether the given agent is done. - ''' + """ @property def state(self): - '''The State for the current agent.''' + """The State for the current agent.""" return self.last() @property @abstractmethod def name(self): - '''str: The name of the environment.''' + """str: The name of the environment.""" @abstractmethod def state_space(self, agent_id): - '''The state space for the given agent.''' + """The state space for the given agent.""" def observation_space(self, agent_id): - '''Alias for MultiagentEnvironment.state_space(agent_id).''' + """Alias for MultiagentEnvironment.state_space(agent_id).""" return self.state_space(agent_id) @abstractmethod def action_space(self): - '''The action space for the given agent.''' + """The action space for the given agent.""" @property @abstractmethod def device(self): - ''' + """ The torch device the environment lives on. - ''' + """ diff --git a/all/environments/atari.py b/all/environments/atari.py index e8a3a050..c889c943 100644 --- a/all/environments/atari.py +++ b/all/environments/atari.py @@ -1,19 +1,21 @@ import gymnasium import torch + from all.core import State -from .duplicate_env import DuplicateEnvironment + +from ._environment import Environment from .atari_wrappers import ( - NoopResetEnv, - MaxAndSkipEnv, FireResetEnv, - WarpFrame, LifeLostEnv, + MaxAndSkipEnv, + NoopResetEnv, + WarpFrame, ) -from ._environment import Environment +from .duplicate_env import DuplicateEnvironment class AtariEnvironment(Environment): - def __init__(self, name, device='cpu', **gym_make_kwargs): + def __init__(self, name, device="cpu", **gym_make_kwargs): # construct the environment env = gymnasium.make(name + "NoFrameskip-v4", **gym_make_kwargs) @@ -37,14 +39,18 @@ def __init__(self, name, device='cpu', **gym_make_kwargs): self._device = device def reset(self): - self._state = State.from_gym(self._env.reset(), dtype=self._env.observation_space.dtype, device=self._device) + self._state = State.from_gym( + self._env.reset(), + dtype=self._env.observation_space.dtype, + device=self._device, + ) return self._state def step(self, action): self._state = State.from_gym( self._env.step(self._convert(action)), dtype=self._env.observation_space.dtype, - device=self._device + device=self._device, ) return self._state @@ -58,7 +64,9 @@ def seed(self, seed): self._env.seed(seed) def duplicate(self, n): - return DuplicateEnvironment([AtariEnvironment(self._name, device=self._device) for _ in range(n)]) + return DuplicateEnvironment( + [AtariEnvironment(self._name, device=self._device) for _ in range(n)] + ) @property def name(self): diff --git a/all/environments/atari_test.py b/all/environments/atari_test.py index 4b6bf4aa..fef2dfd6 100644 --- a/all/environments/atari_test.py +++ b/all/environments/atari_test.py @@ -1,10 +1,11 @@ import unittest + from all.environments import AtariEnvironment class AtariEnvironmentTest(unittest.TestCase): def test_reset(self): - env = AtariEnvironment('Breakout') + env = AtariEnvironment("Breakout") state = env.reset() self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) @@ -12,30 +13,30 @@ def test_reset(self): self.assertEqual(state.mask, 1) def test_step(self): - env = AtariEnvironment('Breakout') + env = AtariEnvironment("Breakout") env.reset() state = env.step(1) self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) - self.assertEqual(state['life_lost'], False) + self.assertEqual(state["life_lost"], False) def test_step_until_life_lost(self): - env = AtariEnvironment('Breakout') + env = AtariEnvironment("Breakout") env.reset() for _ in range(100): state = env.step(1) - if state['life_lost']: + if state["life_lost"]: break self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) - self.assertEqual(state['life_lost'], True) + self.assertEqual(state["life_lost"], True) def test_step_until_done(self): - env = AtariEnvironment('Breakout') + env = AtariEnvironment("Breakout") env.reset() for _ in range(1000): state = env.step(1) @@ -45,4 +46,4 @@ def test_step_until_done(self): self.assertEqual(state.reward, 0) self.assertTrue(state.done) self.assertEqual(state.mask, 0) - self.assertEqual(state['life_lost'], False) + self.assertEqual(state["life_lost"], False) diff --git a/all/environments/atari_wrappers.py b/all/environments/atari_wrappers.py index df6013ef..7f513851 100644 --- a/all/environments/atari_wrappers.py +++ b/all/environments/atari_wrappers.py @@ -1,31 +1,34 @@ -''' +""" A subset of Atari wrappers modified from: https://github.com/openai/baselines/blob/master/baselines/common/atari_wrappers.py Other behaviors were implemented as Bodies. -''' -import numpy as np +""" + import os -os.environ.setdefault('PATH', '') -from collections import deque -import gymnasium -from gymnasium import spaces + +import numpy as np + +os.environ.setdefault("PATH", "") + import cv2 +import gymnasium + cv2.ocl.setUseOpenCL(False) class NoopResetEnv(gymnasium.Wrapper): def __init__(self, env, noop_max=30): - '''Sample initial states by taking random number of no-ops on reset. + """Sample initial states by taking random number of no-ops on reset. No-op is assumed to be action 0. - ''' + """ gymnasium.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None self.noop_action = 0 - assert env.unwrapped.get_action_meanings()[0] == 'NOOP' + assert env.unwrapped.get_action_meanings()[0] == "NOOP" def reset(self, **kwargs): - ''' Do no-op action for a number of steps in [1, noop_max].''' + """Do no-op action for a number of steps in [1, noop_max].""" self.env.reset(**kwargs) if self.override_num_noops is not None: noops = self.override_num_noops @@ -45,13 +48,13 @@ def step(self, ac): class FireResetEnv(gymnasium.Wrapper): def __init__(self, env): - ''' + """ Take action on reset for environments that are fixed until firing. Important: This was modified to also fire on lives lost. - ''' + """ gymnasium.Wrapper.__init__(self, env) - assert env.unwrapped.get_action_meanings()[1] == 'FIRE' + assert env.unwrapped.get_action_meanings()[1] == "FIRE" assert len(env.unwrapped.get_action_meanings()) >= 3 self.lives = 0 self.was_real_done = True @@ -85,14 +88,14 @@ def lost_life(self): class MaxAndSkipEnv(gymnasium.Wrapper): def __init__(self, env, skip=4): - '''Return only every `skip`-th frame''' + """Return only every `skip`-th frame""" gymnasium.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,) + env.observation_space.shape, dtype=np.uint8) self._skip = skip def step(self, action): - '''Repeat action, sum reward, and max over last observations.''' + """Repeat action, sum reward, and max over last observations.""" total_reward = 0.0 for i in range(self._skip): obs, reward, terminated, truncated, info = self.env.step(action) @@ -115,11 +118,11 @@ def reset(self, **kwargs): class WarpFrame(gymnasium.ObservationWrapper): def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): - ''' + """ Warp frames to 84x84 as done in the Nature paper and later work. If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which observation should be warped. - ''' + """ super().__init__(env) self._width = width self._height = height @@ -168,11 +171,11 @@ def observation(self, obs): class LifeLostEnv(gymnasium.Wrapper): def __init__(self, env): - ''' + """ Modified wrapper to add a "life_lost" key to info. This allows the agent Body to make the episode as done if it desires. - ''' + """ gymnasium.Wrapper.__init__(self, env) self.lives = 0 @@ -183,7 +186,7 @@ def reset(self): def step(self, action): obs, reward, terminated, truncated, _ = self.env.step(action) lives = self.env.unwrapped.ale.lives() - life_lost = (lives < self.lives and lives > 0) + life_lost = lives < self.lives and lives > 0 self.lives = lives - info = {'life_lost': life_lost} + info = {"life_lost": life_lost} return obs, reward, terminated, truncated, info diff --git a/all/environments/duplicate_env.py b/all/environments/duplicate_env.py index 42fbd3b4..9b914255 100644 --- a/all/environments/duplicate_env.py +++ b/all/environments/duplicate_env.py @@ -1,12 +1,12 @@ -import gymnasium import torch + from all.core import State + from ._vector_environment import VectorEnvironment -import numpy as np class DuplicateEnvironment(VectorEnvironment): - ''' + """ Turns a list of ALL Environment objects into a VectorEnvironment object This wrapper just takes the list of States the environments generate and outputs @@ -16,9 +16,9 @@ class DuplicateEnvironment(VectorEnvironment): Args: envs: A list of ALL environments device (optional): the device on which tensors will be stored - ''' + """ - def __init__(self, envs, device=torch.device('cpu')): + def __init__(self, envs, device=torch.device("cpu")): self._name = envs[0].name self._envs = envs self._state = None @@ -34,9 +34,16 @@ def name(self): def reset(self, seed=None, **kwargs): if seed is not None: - self._state = State.array([sub_env.reset(seed=(seed + i), **kwargs) for i, sub_env in enumerate(self._envs)]) + self._state = State.array( + [ + sub_env.reset(seed=(seed + i), **kwargs) + for i, sub_env in enumerate(self._envs) + ] + ) else: - self._state = State.array([sub_env.reset(**kwargs) for sub_env in self._envs]) + self._state = State.array( + [sub_env.reset(**kwargs) for sub_env in self._envs] + ) return self._state def step(self, actions): diff --git a/all/environments/duplicate_env_test.py b/all/environments/duplicate_env_test.py index 1c0c750e..e506db17 100644 --- a/all/environments/duplicate_env_test.py +++ b/all/environments/duplicate_env_test.py @@ -1,18 +1,19 @@ import unittest -import gymnasium + import torch + from all.environments import DuplicateEnvironment, GymEnvironment def make_vec_env(num_envs=3): - env = [GymEnvironment('CartPole-v0') for i in range(num_envs)] + env = [GymEnvironment("CartPole-v0") for i in range(num_envs)] return env class DuplicateEnvironmentTest(unittest.TestCase): def test_env_name(self): env = DuplicateEnvironment(make_vec_env()) - self.assertEqual(env.name, 'CartPole-v0') + self.assertEqual(env.name, "CartPole-v0") def test_num_envs(self): num_envs = 5 @@ -25,9 +26,30 @@ def test_reset(self): env = DuplicateEnvironment(make_vec_env(num_envs)) state = env.reset() self.assertEqual(state.observation.shape, (num_envs, 4)) - self.assertTrue((state.reward == torch.zeros(num_envs, )).all()) - self.assertTrue((state.done == torch.zeros(num_envs, )).all()) - self.assertTrue((state.mask == torch.ones(num_envs, )).all()) + self.assertTrue( + ( + state.reward + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.done + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.mask + == torch.ones( + num_envs, + ) + ).all() + ) def test_step(self): num_envs = 5 @@ -35,9 +57,30 @@ def test_step(self): env.reset() state = env.step(torch.ones(num_envs, dtype=torch.int32)) self.assertEqual(state.observation.shape, (num_envs, 4)) - self.assertTrue((state.reward == torch.ones(num_envs, )).all()) - self.assertTrue((state.done == torch.zeros(num_envs, )).all()) - self.assertTrue((state.mask == torch.ones(num_envs, )).all()) + self.assertTrue( + ( + state.reward + == torch.ones( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.done + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.mask + == torch.ones( + num_envs, + ) + ).all() + ) def test_step_until_done(self): num_envs = 3 @@ -48,6 +91,6 @@ def test_step_until_done(self): if state.done[0]: break self.assertEqual(state[0].observation.shape, (4,)) - self.assertEqual(state[0].reward, 1.) + self.assertEqual(state[0].reward, 1.0) self.assertTrue(state[0].done) self.assertEqual(state[0].mask, 0) diff --git a/all/environments/gym.py b/all/environments/gym.py index 7baa3c97..994fcfd7 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -1,13 +1,16 @@ import gymnasium import torch + from all.core import State + from ._environment import Environment from .duplicate_env import DuplicateEnvironment + gymnasium.logger.set_level(40) class GymEnvironment(Environment): - ''' + """ A wrapper for OpenAI Gym environments (see: https://gymnasium.openai.com). This wrapper converts the output of the gym environment to PyTorch tensors, @@ -23,11 +26,19 @@ class GymEnvironment(Environment): device (str, optional): the device on which tensors will be stored legacy_gym (str, optional): If true, calls gym.make() instead of gymnasium.make() **gym_make_kwargs: kwargs passed to gymnasium.make(id, **gym_make_kwargs) - ''' - - def __init__(self, id, device=torch.device('cpu'), name=None, legacy_gym=False, **gym_make_kwargs): + """ + + def __init__( + self, + id, + device=torch.device("cpu"), + name=None, + legacy_gym=False, + **gym_make_kwargs + ): if legacy_gym: import gym + self._gym = gym else: self._gym = gymnasium @@ -46,14 +57,18 @@ def name(self): return self._name def reset(self, **kwargs): - self._state = State.from_gym(self._env.reset(**kwargs), dtype=self._env.observation_space.dtype, device=self._device) + self._state = State.from_gym( + self._env.reset(**kwargs), + dtype=self._env.observation_space.dtype, + device=self._device, + ) return self._state def step(self, action): self._state = State.from_gym( self._env.step(self._convert(action)), dtype=self._env.observation_space.dtype, - device=self._device + device=self._device, ) return self._state @@ -67,7 +82,12 @@ def seed(self, seed): self._env.seed(seed) def duplicate(self, n): - return DuplicateEnvironment([GymEnvironment(self._id, device=self.device, name=self._name) for _ in range(n)]) + return DuplicateEnvironment( + [ + GymEnvironment(self._id, device=self.device, name=self._name) + for _ in range(n) + ] + ) @property def state_space(self): diff --git a/all/environments/gym_test.py b/all/environments/gym_test.py index 31dea2e2..29abcd34 100644 --- a/all/environments/gym_test.py +++ b/all/environments/gym_test.py @@ -1,15 +1,15 @@ import unittest -import gymnasium + from all.environments import GymEnvironment class GymEnvironmentTest(unittest.TestCase): def test_env_name(self): - env = GymEnvironment('CartPole-v0') - self.assertEqual(env.name, 'CartPole-v0') + env = GymEnvironment("CartPole-v0") + self.assertEqual(env.name, "CartPole-v0") def test_reset(self): - env = GymEnvironment('CartPole-v0') + env = GymEnvironment("CartPole-v0") state = env.reset() self.assertEqual(state.observation.shape, (4,)) self.assertEqual(state.reward, 0) @@ -17,22 +17,22 @@ def test_reset(self): self.assertEqual(state.mask, 1) def test_step(self): - env = GymEnvironment('CartPole-v0') + env = GymEnvironment("CartPole-v0") env.reset() state = env.step(1) self.assertEqual(state.observation.shape, (4,)) - self.assertEqual(state.reward, 1.) + self.assertEqual(state.reward, 1.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) def test_step_until_done(self): - env = GymEnvironment('CartPole-v0') + env = GymEnvironment("CartPole-v0") env.reset() for _ in range(100): state = env.step(1) if state.done: break self.assertEqual(state.observation.shape, (4,)) - self.assertEqual(state.reward, 1.) + self.assertEqual(state.reward, 1.0) self.assertTrue(state.done) self.assertEqual(state.mask, 0) diff --git a/all/environments/mujoco_test.py b/all/environments/mujoco_test.py index 7a207e8c..7fb240b7 100644 --- a/all/environments/mujoco_test.py +++ b/all/environments/mujoco_test.py @@ -1,11 +1,12 @@ import unittest -from all.environments import MujocoEnvironment, GymEnvironment + +from all.environments import MujocoEnvironment class MujocoEnvironmentTest(unittest.TestCase): def test_load_env(self): env = MujocoEnvironment("Ant-v4") - self.assertEqual(env.name, 'Ant-v4') + self.assertEqual(env.name, "Ant-v4") def test_observation_space(self): env = MujocoEnvironment("Ant-v4") @@ -19,7 +20,7 @@ def test_reset(self): env = MujocoEnvironment("Ant-v4") state = env.reset(seed=0) self.assertEqual(state.observation.shape, (27,)) - self.assertEqual(state.reward, 0.) + self.assertEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) @@ -28,8 +29,8 @@ def test_step(self): state = env.reset(seed=0) state = env.step(env.action_space.sample()) self.assertEqual(state.observation.shape, (27,)) - self.assertGreater(state.reward, -2.) + self.assertGreater(state.reward, -2.0) self.assertLess(state.reward, 2) - self.assertNotEqual(state.reward, 0.) + self.assertNotEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) diff --git a/all/environments/multiagent_atari.py b/all/environments/multiagent_atari.py index e9a10de1..c59437bb 100644 --- a/all/environments/multiagent_atari.py +++ b/all/environments/multiagent_atari.py @@ -1,14 +1,10 @@ import importlib -import numpy as np -import torch -import gymnasium -from all.core import MultiagentState -from ._multiagent_environment import MultiagentEnvironment + from .multiagent_pettingzoo import MultiagentPettingZooEnv class MultiagentAtariEnv(MultiagentPettingZooEnv): - ''' + """ A wrapper for PettingZoo Atari environments (see: https://www.pettingzoo.ml/atari). This wrapper converts the output of the PettingZoo environment to PyTorch tensors, @@ -17,16 +13,18 @@ class MultiagentAtariEnv(MultiagentPettingZooEnv): Args: env_name (string): A string representing the name of the environment (e.g. pong-v1) device (optional): the device on which tensors will be stored - ''' + """ - def __init__(self, env_name, device='cuda', **pettingzoo_params): + def __init__(self, env_name, device="cuda", **pettingzoo_params): env = self._load_env(env_name, pettingzoo_params) super().__init__(env, name=env_name, device=device) def _load_env(self, env_name, pettingzoo_params): - from pettingzoo import atari - from supersuit import resize_v1, frame_skip_v0, reshape_v0, max_observation_v0 - env = importlib.import_module('pettingzoo.atari.{}'.format(env_name)).env(obs_type='grayscale_image', **pettingzoo_params) + from supersuit import frame_skip_v0, max_observation_v0, reshape_v0, resize_v1 + + env = importlib.import_module("pettingzoo.atari.{}".format(env_name)).env( + obs_type="grayscale_image", **pettingzoo_params + ) env = max_observation_v0(env, 2) env = frame_skip_v0(env, 4) env = resize_v1(env, 84, 84) diff --git a/all/environments/multiagent_atari_test.py b/all/environments/multiagent_atari_test.py index c5c5e7a8..78c779c9 100644 --- a/all/environments/multiagent_atari_test.py +++ b/all/environments/multiagent_atari_test.py @@ -1,82 +1,84 @@ import unittest + import torch + from all.environments import MultiagentAtariEnv class MultiagentAtariEnvTest(unittest.TestCase): def test_init(self): - MultiagentAtariEnv('pong_v3', device='cpu') - MultiagentAtariEnv('mario_bros_v3', device='cpu') - MultiagentAtariEnv('entombed_cooperative_v3', device='cpu') + MultiagentAtariEnv("pong_v3", device="cpu") + MultiagentAtariEnv("mario_bros_v3", device="cpu") + MultiagentAtariEnv("entombed_cooperative_v3", device="cpu") def test_reset(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") state = env.reset() self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'first_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "first_0") def test_step(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") env.reset() state = env.step(0) self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'second_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "second_0") def test_step_tensor(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") env.reset() state = env.step(torch.tensor([0])) self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'second_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "second_0") def test_name(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') - self.assertEqual(env.name, 'pong_v3') + env = MultiagentAtariEnv("pong_v3", device="cpu") + self.assertEqual(env.name, "pong_v3") def test_agent_iter(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") env.reset() it = iter(env.agent_iter()) - self.assertEqual(next(it), 'first_0') + self.assertEqual(next(it), "first_0") def test_state_spaces(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') - self.assertEqual(env.state_space('first_0').shape, (1, 84, 84)) - self.assertEqual(env.state_space('second_0').shape, (1, 84, 84)) + env = MultiagentAtariEnv("pong_v3", device="cpu") + self.assertEqual(env.state_space("first_0").shape, (1, 84, 84)) + self.assertEqual(env.state_space("second_0").shape, (1, 84, 84)) def test_action_spaces(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') - self.assertEqual(env.action_space('first_0').n, 6) - self.assertEqual(env.action_space('second_0').n, 6) + env = MultiagentAtariEnv("pong_v3", device="cpu") + self.assertEqual(env.action_space("first_0").n, 6) + self.assertEqual(env.action_space("second_0").n, 6) def test_list_agents(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') - self.assertEqual(env.agents, ['first_0', 'second_0']) + env = MultiagentAtariEnv("pong_v3", device="cpu") + self.assertEqual(env.agents, ["first_0", "second_0"]) def test_is_done(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") env.reset() - self.assertFalse(env.is_done('first_0')) - self.assertFalse(env.is_done('second_0')) + self.assertFalse(env.is_done("first_0")) + self.assertFalse(env.is_done("second_0")) def test_last(self): - env = MultiagentAtariEnv('pong_v3', device='cpu') + env = MultiagentAtariEnv("pong_v3", device="cpu") env.reset() state = env.last() self.assertEqual(state.observation.shape, (1, 84, 84)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'first_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "first_0") if __name__ == "__main__": diff --git a/all/environments/multiagent_pettingzoo.py b/all/environments/multiagent_pettingzoo.py index 3bc96619..63f09b77 100644 --- a/all/environments/multiagent_pettingzoo.py +++ b/all/environments/multiagent_pettingzoo.py @@ -1,14 +1,14 @@ -import importlib -import numpy as np -import torch import cloudpickle import gymnasium +import torch + from all.core import MultiagentState + from ._multiagent_environment import MultiagentEnvironment class MultiagentPettingZooEnv(MultiagentEnvironment): - ''' + """ A wrapper for generael PettingZoo environments (see: https://www.pettingzoo.ml/). This wrapper converts the output of the PettingZoo environment to PyTorch tensors, @@ -17,9 +17,9 @@ class MultiagentPettingZooEnv(MultiagentEnvironment): Args: zoo_env (AECEnv): A PettingZoo AECEnv environment (e.g. pettingzoo.mpe.simple_push_v2) device (optional): the device on which tensors will be stored - ''' + """ - def __init__(self, zoo_env, name, device='cuda'): + def __init__(self, zoo_env, name, device="cuda"): env = zoo_env env.reset() self._env = env @@ -27,22 +27,24 @@ def __init__(self, zoo_env, name, device='cuda'): self._device = device self.agents = self._env.agents self.subenvs = { - agent: SubEnv(agent, device, self.state_space(agent), self.action_space(agent)) + agent: SubEnv( + agent, device, self.state_space(agent), self.action_space(agent) + ) for agent in self.agents } - ''' + """ Reset the environment and return a new initial state. Returns: An initial MultiagentState object. - ''' + """ def reset(self, **kwargs): self._env.reset(**kwargs) return self.last() - ''' + """ Reset the environment and return a new initial state. Args: @@ -50,7 +52,7 @@ def reset(self, **kwargs): Returns: The MultiagentState object for the next agent - ''' + """ def step(self, action): if action is None: @@ -75,12 +77,24 @@ def is_done(self, agent): return self._env.terminations[agent] def duplicate(self, n): - return [MultiagentPettingZooEnv(cloudpickle.loads(cloudpickle.dumps(self._env)), self._name, device=self.device) for _ in range(n)] + return [ + MultiagentPettingZooEnv( + cloudpickle.loads(cloudpickle.dumps(self._env)), + self._name, + device=self.device, + ) + for _ in range(n) + ] def last(self): observation, reward, terminated, truncated, info = self._env.last() selected_obs_space = self._env.observation_space(self._env.agent_selection) - return MultiagentState.from_zoo(self._env.agent_selection, (observation, reward, terminated, truncated, info), device=self._device, dtype=selected_obs_space.dtype) + return MultiagentState.from_zoo( + self._env.agent_selection, + (observation, reward, terminated, truncated, info), + device=self._device, + dtype=selected_obs_space.dtype, + ) @property def name(self): @@ -112,7 +126,7 @@ def _convert(self, action): return action -class SubEnv(): +class SubEnv: def __init__(self, name, device, state_space, action_space): self.name = name self.device = device diff --git a/all/environments/multiagent_pettingzoo_test.py b/all/environments/multiagent_pettingzoo_test.py index 482e50e5..184728af 100644 --- a/all/environments/multiagent_pettingzoo_test.py +++ b/all/environments/multiagent_pettingzoo_test.py @@ -1,8 +1,9 @@ import unittest -import torch -from all.environments import MultiagentPettingZooEnv + from pettingzoo.mpe import simple_world_comm_v3 +from all.environments import MultiagentPettingZooEnv + class MultiagentPettingZooEnvTest(unittest.TestCase): def test_init(self): @@ -14,8 +15,8 @@ def test_reset(self): self.assertEqual(state.observation.shape, (34,)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'leadadversary_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "leadadversary_0") def test_step(self): env = self._make_env() @@ -24,8 +25,8 @@ def test_step(self): self.assertEqual(state.observation.shape, (34,)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'adversary_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "adversary_0") def test_step_tensor(self): env = self._make_env() @@ -34,38 +35,48 @@ def test_step_tensor(self): self.assertEqual(state.observation.shape, (34,)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'adversary_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "adversary_0") def test_name(self): env = self._make_env() - self.assertEqual(env.name, 'simple_world_comm_v3') + self.assertEqual(env.name, "simple_world_comm_v3") def test_agent_iter(self): env = self._make_env() env.reset() it = iter(env.agent_iter()) - self.assertEqual(next(it), 'leadadversary_0') + self.assertEqual(next(it), "leadadversary_0") def test_state_spaces(self): env = self._make_env() - self.assertEqual(env.state_space('leadadversary_0').shape, (34,)) - self.assertEqual(env.state_space('adversary_0').shape, (34,)) + self.assertEqual(env.state_space("leadadversary_0").shape, (34,)) + self.assertEqual(env.state_space("adversary_0").shape, (34,)) def test_action_spaces(self): env = self._make_env() - self.assertEqual(env.action_space('leadadversary_0').n, 20) - self.assertEqual(env.action_space('adversary_0').n, 5) + self.assertEqual(env.action_space("leadadversary_0").n, 20) + self.assertEqual(env.action_space("adversary_0").n, 5) def test_list_agents(self): env = self._make_env() - self.assertEqual(env.agents, ['leadadversary_0', 'adversary_0', 'adversary_1', 'adversary_2', 'agent_0', 'agent_1']) + self.assertEqual( + env.agents, + [ + "leadadversary_0", + "adversary_0", + "adversary_1", + "adversary_2", + "agent_0", + "agent_1", + ], + ) def test_terminated(self): env = self._make_env() env.reset() - self.assertFalse(env.is_done('leadadversary_0')) - self.assertFalse(env.is_done('adversary_0')) + self.assertFalse(env.is_done("leadadversary_0")) + self.assertFalse(env.is_done("adversary_0")) def test_last(self): env = self._make_env() @@ -74,20 +85,28 @@ def test_last(self): self.assertEqual(state.observation.shape, (34,)) self.assertEqual(state.reward, 0) self.assertEqual(state.done, False) - self.assertEqual(state.mask, 1.) - self.assertEqual(state['agent'], 'leadadversary_0') + self.assertEqual(state.mask, 1.0) + self.assertEqual(state["agent"], "leadadversary_0") def test_variable_spaces(self): - env = MultiagentPettingZooEnv(simple_world_comm_v3.env(), name="simple_world_comm_v2", device='cpu') + env = MultiagentPettingZooEnv( + simple_world_comm_v3.env(), name="simple_world_comm_v2", device="cpu" + ) state = env.reset() # tests that action spaces work for agent in env.agents: state = env.last() - self.assertTrue(env.observation_space(agent).contains(state['observation'].cpu().detach().numpy())) + self.assertTrue( + env.observation_space(agent).contains( + state["observation"].cpu().detach().numpy() + ) + ) env.step(env.action_space(env.agent_selection).sample()) def _make_env(self): - return MultiagentPettingZooEnv(simple_world_comm_v3.env(), name="simple_world_comm_v3", device='cpu') + return MultiagentPettingZooEnv( + simple_world_comm_v3.env(), name="simple_world_comm_v3", device="cpu" + ) if __name__ == "__main__": diff --git a/all/environments/pybullet.py b/all/environments/pybullet.py index 70f379fb..48afe200 100644 --- a/all/environments/pybullet.py +++ b/all/environments/pybullet.py @@ -7,11 +7,13 @@ class PybulletEnvironment(GymEnvironment): "cheetah": "HalfCheetahBulletEnv-v0", "humanoid": "HumanoidBulletEnv-v0", "hopper": "HopperBulletEnv-v0", - "walker": "Walker2DBulletEnv-v0" + "walker": "Walker2DBulletEnv-v0", } def __init__(self, name, **kwargs): - import pybullet_envs + # + import pybullet_envs # noqa: F401 + if name in self.short_names: name = self.short_names[name] super().__init__(name, legacy_gym=True, **kwargs) diff --git a/all/environments/pybullet_test.py b/all/environments/pybullet_test.py index d08e5bfb..7dcda1dd 100644 --- a/all/environments/pybullet_test.py +++ b/all/environments/pybullet_test.py @@ -1,5 +1,6 @@ import unittest -from all.environments import PybulletEnvironment, GymEnvironment + +from all.environments import PybulletEnvironment class PybulletEnvironmentTest(unittest.TestCase): @@ -9,25 +10,25 @@ def test_env_short_name(self): self.assertEqual(env.name, long_name) def test_env_full_name(self): - env = PybulletEnvironment('HalfCheetahBulletEnv-v0') - self.assertEqual(env.name, 'HalfCheetahBulletEnv-v0') + env = PybulletEnvironment("HalfCheetahBulletEnv-v0") + self.assertEqual(env.name, "HalfCheetahBulletEnv-v0") def test_reset(self): - env = PybulletEnvironment('cheetah') + env = PybulletEnvironment("cheetah") state = env.reset() self.assertEqual(state.observation.shape, (26,)) - self.assertEqual(state.reward, 0.) + self.assertEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) def test_step(self): - env = PybulletEnvironment('cheetah') + env = PybulletEnvironment("cheetah") env.seed(0) state = env.reset() state = env.step(env.action_space.sample()) self.assertEqual(state.observation.shape, (26,)) - self.assertGreater(state.reward, -1.) + self.assertGreater(state.reward, -1.0) self.assertLess(state.reward, 1) - self.assertNotEqual(state.reward, 0.) + self.assertNotEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) diff --git a/all/environments/vector_env.py b/all/environments/vector_env.py index d74a46af..cd5cf12b 100644 --- a/all/environments/vector_env.py +++ b/all/environments/vector_env.py @@ -1,12 +1,13 @@ -import gymnasium +import numpy as np import torch + from all.core import StateArray + from ._vector_environment import VectorEnvironment -import numpy as np class GymVectorEnvironment(VectorEnvironment): - ''' + """ A wrapper for Gym's vector environments (see: https://github.com/openai/gym/blob/master/gym/vector/vector_env.py). @@ -19,9 +20,9 @@ class GymVectorEnvironment(VectorEnvironment): Args: vec_env: An OpenAI gym vector environment device (optional): the device on which tensors will be stored - ''' + """ - def __init__(self, vec_env, name, device=torch.device('cpu')): + def __init__(self, vec_env, name, device=torch.device("cpu")): self._name = name self._env = vec_env self._state = None @@ -37,7 +38,13 @@ def name(self): def reset(self, **kwargs): obs, info = self._env.reset(**kwargs) - self._state = self._to_state(obs, np.zeros(self._env.num_envs), np.zeros(self._env.num_envs), np.zeros(self._env.num_envs), info) + self._state = self._to_state( + obs, + np.zeros(self._env.num_envs), + np.zeros(self._env.num_envs), + np.zeros(self._env.num_envs), + info, + ) return self._state def _to_state(self, obs, rew, terminated, truncated, info): @@ -45,12 +52,15 @@ def _to_state(self, obs, rew, terminated, truncated, info): rew = rew.astype("float32") done = (terminated + truncated).astype("bool") mask = (1 - terminated).astype("float32") - return StateArray({ - "observation": torch.tensor(obs, device=self._device), - "reward": torch.tensor(rew, device=self._device), - "done": torch.tensor(done, device=self._device), - "mask": torch.tensor(mask, device=self._device) - }, shape=(self._env.num_envs,)) + return StateArray( + { + "observation": torch.tensor(obs, device=self._device), + "reward": torch.tensor(rew, device=self._device), + "done": torch.tensor(done, device=self._device), + "mask": torch.tensor(mask, device=self._device), + }, + shape=(self._env.num_envs,), + ) def step(self, action): state_tuple = self._env.step(action.cpu().detach().numpy()) @@ -62,11 +72,17 @@ def close(self): @property def state_space(self): - return getattr(self._env, "single_observation_space", getattr(self._env, "observation_space")) + return getattr( + self._env, + "single_observation_space", + getattr(self._env, "observation_space"), + ) @property def action_space(self): - return getattr(self._env, "single_action_space", getattr(self._env, "action_space")) + return getattr( + self._env, "single_action_space", getattr(self._env, "action_space") + ) @property def state_array(self): diff --git a/all/environments/vector_env_test.py b/all/environments/vector_env_test.py index a4cfba77..117c8a6f 100644 --- a/all/environments/vector_env_test.py +++ b/all/environments/vector_env_test.py @@ -1,18 +1,22 @@ import unittest + import gymnasium import torch -from all.environments import GymVectorEnvironment, GymEnvironment, DuplicateEnvironment + +from all.environments import DuplicateEnvironment, GymEnvironment, GymVectorEnvironment def make_vec_env(num_envs=3): - env = gymnasium.vector.SyncVectorEnv([lambda: gymnasium.make('CartPole-v0')] * num_envs) + env = gymnasium.vector.SyncVectorEnv( + [lambda: gymnasium.make("CartPole-v0")] * num_envs + ) return env class GymVectorEnvironmentTest(unittest.TestCase): def test_env_name(self): env = GymVectorEnvironment(make_vec_env(), "CartPole") - self.assertEqual(env.name, 'CartPole') + self.assertEqual(env.name, "CartPole") def test_num_envs(self): num_envs = 5 @@ -25,9 +29,30 @@ def test_reset(self): env = GymVectorEnvironment(make_vec_env(num_envs), "CartPole") state = env.reset() self.assertEqual(state.observation.shape, (num_envs, 4)) - self.assertTrue((state.reward == torch.zeros(num_envs, )).all()) - self.assertTrue((state.done == torch.zeros(num_envs, )).all()) - self.assertTrue((state.mask == torch.ones(num_envs, )).all()) + self.assertTrue( + ( + state.reward + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.done + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.mask + == torch.ones( + num_envs, + ) + ).all() + ) def test_step(self): num_envs = 5 @@ -35,9 +60,30 @@ def test_step(self): env.reset() state = env.step(torch.ones(num_envs, dtype=torch.int32)) self.assertEqual(state.observation.shape, (num_envs, 4)) - self.assertTrue((state.reward == torch.ones(num_envs, )).all()) - self.assertTrue((state.done == torch.zeros(num_envs, )).all()) - self.assertTrue((state.mask == torch.ones(num_envs, )).all()) + self.assertTrue( + ( + state.reward + == torch.ones( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.done + == torch.zeros( + num_envs, + ) + ).all() + ) + self.assertTrue( + ( + state.mask + == torch.ones( + num_envs, + ) + ).all() + ) def test_step_until_done(self): num_envs = 3 @@ -50,14 +96,16 @@ def test_step_until_done(self): else: self.assertTrue(False) self.assertEqual(state[0].observation.shape, (4,)) - self.assertEqual(state[0].reward, 1.) + self.assertEqual(state[0].reward, 1.0) self.assertTrue(state[0].done) self.assertEqual(state[0].mask, 0) def test_same_as_duplicate(self): n_envs = 3 torch.manual_seed(42) - env1 = DuplicateEnvironment([GymEnvironment('CartPole-v0') for i in range(n_envs)]) + env1 = DuplicateEnvironment( + [GymEnvironment("CartPole-v0") for i in range(n_envs)] + ) env2 = GymVectorEnvironment(make_vec_env(n_envs), "CartPole-v0") state1 = env1.reset(seed=42) state2 = env2.reset(seed=42) diff --git a/all/experiments/__init__.py b/all/experiments/__init__.py index f0f7b530..c6d230f7 100644 --- a/all/experiments/__init__.py +++ b/all/experiments/__init__.py @@ -1,11 +1,11 @@ -from .run_experiment import run_experiment from .experiment import Experiment -from .single_env_experiment import SingleEnvExperiment -from .parallel_env_experiment import ParallelEnvExperiment from .multiagent_env_experiment import MultiagentEnvExperiment +from .parallel_env_experiment import ParallelEnvExperiment from .plots import plot_returns_100 +from .run_experiment import run_experiment +from .single_env_experiment import SingleEnvExperiment from .slurm import SlurmExperiment -from .watch import watch, load_and_watch +from .watch import load_and_watch, watch __all__ = [ "run_experiment", @@ -16,5 +16,5 @@ "SlurmExperiment", "watch", "load_and_watch", - "plot_returns_100" + "plot_returns_100", ] diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py index 0601da34..5611d7ae 100644 --- a/all/experiments/experiment.py +++ b/all/experiments/experiment.py @@ -1,16 +1,17 @@ from abc import ABC, abstractmethod + import numpy as np class Experiment(ABC): - ''' + """ An Experiment manages the basic train/test loop and logs results. Args: logger (:torch.logging.logger:): A Logger object used for logging. quiet (bool): If False, the Experiment will print information about episode returns to standard out. - ''' + """ def __init__(self, logger, quiet): self._logger = logger @@ -20,7 +21,7 @@ def __init__(self, logger, quiet): @abstractmethod def train(self, frames=np.inf, episodes=np.inf): - ''' + """ Train the agent for a certain number of frames or episodes. If both frames and episodes are specified, then the training loop will exit when either condition is satisfied. @@ -28,11 +29,11 @@ def train(self, frames=np.inf, episodes=np.inf): Args: frames (int): The maximum number of training frames. episodes (bool): The maximum number of training episodes. - ''' + """ @abstractmethod def test(self, episodes=100): - ''' + """ Test the agent in eval mode for a certain number of episodes. Args: @@ -40,58 +41,68 @@ def test(self, episodes=100): Returns: list(float): A list of all returns received during testing. - ''' + """ @property @abstractmethod def frame(self): - '''The index of the current training frame.''' + """The index of the current training frame.""" @property @abstractmethod def episode(self): - '''The index of the current training episode''' + """The index of the current training episode""" def _log_training_episode(self, returns, episode_length, fps): if not self._quiet: - print('episode: {}, frame: {}, fps: {}, episode_length: {}, returns: {}'.format( - self.episode, - self.frame, - int(fps), - episode_length, - returns - )) + print( + "episode: {}, frame: {}, fps: {}, episode_length: {}, returns: {}".format( + self.episode, self.frame, int(fps), episode_length, returns + ) + ) if returns > self._best_returns: self._best_returns = returns self._returns100.append(returns) if len(self._returns100) == 100: mean = np.mean(self._returns100) std = np.std(self._returns100) - self._logger.add_summary('returns100', mean, std, step="frame") + self._logger.add_summary("returns100", mean, std, step="frame") self._returns100 = [] - self._logger.add_eval('returns/episode', returns, step="episode") - self._logger.add_eval('returns/frame', returns, step="frame") + self._logger.add_eval("returns/episode", returns, step="episode") + self._logger.add_eval("returns/frame", returns, step="frame") self._logger.add_eval("returns/max", self._best_returns, step="frame") self._logger.add_eval("episode_length", episode_length) - self._logger.add_eval('fps', fps, step="frame") + self._logger.add_eval("fps", fps, step="frame") def _log_test_episode(self, episode, returns, episode_length): if not self._quiet: - print('test episode: {}, episode_length: {}, returns: {}'.format(episode, episode_length, returns)) + print( + "test episode: {}, episode_length: {}, returns: {}".format( + episode, episode_length, returns + ) + ) def _log_test(self, returns, episode_lengths): if not self._quiet: returns_mean = np.mean(returns) returns_sem = np.std(returns) / np.sqrt(len(returns)) - print('test returns (mean ± sem): {} ± {}'.format(returns_mean, returns_sem)) + print( + "test returns (mean ± sem): {} ± {}".format(returns_mean, returns_sem) + ) episode_length_mean = np.mean(episode_lengths) episode_length_sem = np.std(episode_lengths) / np.sqrt(len(episode_lengths)) - print('test episode length (mean ± sem): {} ± {}'.format(episode_length_mean, episode_length_sem)) - self._logger.add_summary('test_returns', np.mean(returns), np.std(returns)) - self._logger.add_summary('test_episode_length', np.mean(episode_lengths), np.std(episode_lengths)) + print( + "test episode length (mean ± sem): {} ± {}".format( + episode_length_mean, episode_length_sem + ) + ) + self._logger.add_summary("test_returns", np.mean(returns), np.std(returns)) + self._logger.add_summary( + "test_episode_length", np.mean(episode_lengths), np.std(episode_lengths) + ) def save(self): - return self._preset.save('{}/preset.pt'.format(self._logger.log_dir)) + return self._preset.save("{}/preset.pt".format(self._logger.log_dir)) def close(self): self._logger.close() diff --git a/all/experiments/multiagent_env_experiment.py b/all/experiments/multiagent_env_experiment.py index 3314f401..e141e037 100644 --- a/all/experiments/multiagent_env_experiment.py +++ b/all/experiments/multiagent_env_experiment.py @@ -1,10 +1,12 @@ from timeit import default_timer as timer + import numpy as np -from all.logging import ExperimentLogger, CometLogger + +from all.logging import CometLogger, ExperimentLogger -class MultiagentEnvExperiment(): - ''' +class MultiagentEnvExperiment: + """ An Experiment object for training and testing Multiagents. Args: @@ -17,20 +19,20 @@ class MultiagentEnvExperiment(): save_freq (int, optional): How often to save the model to disk. train_steps (int, optional): The number of steps for which to train. verbose (bool, optional): Whether or not to log detailed information or only summaries. - ''' + """ def __init__( - self, - preset, - env, - logdir='runs', - name=None, - quiet=False, - render=False, - save_freq=100, - train_steps=float('inf'), - verbose=True, - logger="tensorboard" + self, + preset, + env, + logdir="runs", + name=None, + quiet=False, + render=False, + save_freq=100, + train_steps=float("inf"), + verbose=True, + logger="tensorboard", ): self._name = name if name is not None else preset.name self._logger = self._make_logger(logdir, self._name, env.name, verbose, logger) @@ -47,7 +49,7 @@ def __init__( if render: self._env.render() - ''' + """ Train the Multiagent for a certain number of frames or episodes. If both frames and episodes are specified, then the training loop will exit when either condition is satisfied. @@ -58,14 +60,14 @@ def __init__( Returns: MultiagentEnvExperiment: The experiment object. - ''' + """ def train(self, frames=np.inf, episodes=np.inf): while not self._done(frames, episodes): self._run_training_episode() return self - ''' + """ Test the agent in eval mode for a certain number of episodes. Args: @@ -73,7 +75,7 @@ def train(self, frames=np.inf, episodes=np.inf): Returns: list(float): A list of all returns received during testing. - ''' + """ def test(self, episodes=100): test_agent = self._preset.test_agent() @@ -90,17 +92,19 @@ def test(self, episodes=100): return returns def save(self): - return self._preset.save('{}/preset.pt'.format(self._logger.log_dir)) + return self._preset.save("{}/preset.pt".format(self._logger.log_dir)) def close(self): self._logger.close() - '''int: The number of completed training frames''' + """int: The number of completed training frames""" + @property def frame(self): return self._frame - '''int: The number of completed training episodes''' + """int: The number of completed training episodes""" + @property def episode(self): return self._episode @@ -158,28 +162,38 @@ def _done(self, frames, episodes): def _log_training_episode(self, returns, fps): if not self._quiet: - print('returns: {}'.format(returns)) - print('frames: {}, fps: {}'.format(self._frame, fps)) + print("returns: {}".format(returns)) + print("frames: {}, fps: {}".format(self._frame, fps)) for agent in self._env.agents: - self._logger.add_eval('{}/returns/frame'.format(agent), returns[agent], step="frame") + self._logger.add_eval( + "{}/returns/frame".format(agent), returns[agent], step="frame" + ) def _log_test_episode(self, episode, returns): if not self._quiet: - print('test episode: {}, returns: {}'.format(episode, returns)) + print("test episode: {}, returns: {}".format(episode, returns)) def _log_test(self, returns): for agent, agent_returns in returns.items(): if not self._quiet: mean = np.mean(agent_returns) sem = np.std(agent_returns) / np.sqrt(len(agent_returns)) - print('{} test returns (mean ± sem): {} ± {}'.format(agent, mean, sem)) - self._logger.add_summary('{}/returns-test'.format(agent), np.mean(agent_returns), np.std(agent_returns)) + print("{} test returns (mean ± sem): {} ± {}".format(agent, mean, sem)) + self._logger.add_summary( + "{}/returns-test".format(agent), + np.mean(agent_returns), + np.std(agent_returns), + ) def _save_model(self): - if self._save_freq != float('inf') and self._episode % self._save_freq == 0: + if self._save_freq != float("inf") and self._episode % self._save_freq == 0: self.save() def _make_logger(self, logdir, agent_name, env_name, verbose, logger): if logger == "comet": - return CometLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) - return ExperimentLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) + return CometLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) + return ExperimentLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) diff --git a/all/experiments/multiagent_env_experiment_test.py b/all/experiments/multiagent_env_experiment_test.py index fbac6bbd..ad782c7b 100644 --- a/all/experiments/multiagent_env_experiment_test.py +++ b/all/experiments/multiagent_env_experiment_test.py @@ -1,17 +1,19 @@ import random import unittest + import numpy as np import torch -from all.presets.atari import dqn -from all.presets import IndependentMultiagentPreset + from all.environments import MultiagentAtariEnv from all.experiments import MultiagentEnvExperiment from all.experiments.single_env_experiment_test import MockLogger +from all.presets import IndependentMultiagentPreset +from all.presets.atari import dqn class MockExperiment(MultiagentEnvExperiment): def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - self._logger = MockLogger(self, agent_name + '_' + env_name, verbose) + self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -20,57 +22,96 @@ def setUp(self): random.seed(0) np.random.seed(0) torch.manual_seed(0) - self.env = MultiagentAtariEnv('space_invaders_v2', device='cpu', seed=0) + self.env = MultiagentAtariEnv("space_invaders_v2", device="cpu", seed=0) self.env.reset(seed=0) self.experiment = None def test_adds_default_name(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), self.env, quiet=True, save_freq=float("inf") + ) self.assertEqual(experiment._logger.label, "independent_space_invaders_v2") def test_adds_custom_name(self): - experiment = MockExperiment(self.make_preset(), self.env, name='custom', quiet=True, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), + self.env, + name="custom", + quiet=True, + save_freq=float("inf"), + ) self.assertEqual(experiment._logger.label, "custom_space_invaders_v2") def test_writes_training_returns(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), self.env, quiet=True, save_freq=float("inf") + ) experiment.train(episodes=3) self.maxDiff = None # could not get the exact numbers to be reproducible across enviornments :( - self.assertEqual(len(experiment._logger.data['eval/first_0/returns/frame']['values']), 3) - self.assertEqual(len(experiment._logger.data['eval/first_0/returns/frame']['steps']), 3) - self.assertEqual(len(experiment._logger.data['eval/second_0/returns/frame']['values']), 3) - self.assertEqual(len(experiment._logger.data['eval/second_0/returns/frame']['steps']), 3) + self.assertEqual( + len(experiment._logger.data["eval/first_0/returns/frame"]["values"]), 3 + ) + self.assertEqual( + len(experiment._logger.data["eval/first_0/returns/frame"]["steps"]), 3 + ) + self.assertEqual( + len(experiment._logger.data["eval/second_0/returns/frame"]["values"]), 3 + ) + self.assertEqual( + len(experiment._logger.data["eval/second_0/returns/frame"]["steps"]), 3 + ) def test_writes_test_returns(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), self.env, quiet=True, save_freq=float("inf") + ) experiment.train(episodes=3) experiment._logger.data = {} experiment.test(episodes=3) - self.assertEqual(list(experiment._logger.data.keys()), [ - 'summary/first_0/returns-test/mean', - 'summary/first_0/returns-test/std', - 'summary/second_0/returns-test/mean', - 'summary/second_0/returns-test/std' - ]) - steps = experiment._logger.data['summary/first_0/returns-test/mean']['steps'][0] + self.assertEqual( + list(experiment._logger.data.keys()), + [ + "summary/first_0/returns-test/mean", + "summary/first_0/returns-test/std", + "summary/second_0/returns-test/mean", + "summary/second_0/returns-test/std", + ], + ) + steps = experiment._logger.data["summary/first_0/returns-test/mean"]["steps"][0] for datum in experiment._logger.data.values(): - self.assertEqual(len(datum['values']), 1) - self.assertGreaterEqual(datum['values'][0], 0.0) - self.assertEqual(len(datum['steps']), 1) - self.assertEqual(datum['steps'][0], steps) + self.assertEqual(len(datum["values"]), 1) + self.assertGreaterEqual(datum["values"][0], 0.0) + self.assertEqual(len(datum["steps"]), 1) + self.assertEqual(datum["steps"][0], steps) def test_writes_loss(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, verbose=True, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), + self.env, + quiet=True, + verbose=True, + save_freq=float("inf"), + ) self.assertTrue(experiment._logger.verbose) - experiment = MockExperiment(self.make_preset(), self.env, quiet=True, verbose=False, save_freq=float('inf')) + experiment = MockExperiment( + self.make_preset(), + self.env, + quiet=True, + verbose=False, + save_freq=float("inf"), + ) self.assertFalse(experiment._logger.verbose) def make_preset(self): - return IndependentMultiagentPreset('independent', 'cpu', { - agent: dqn.device('cpu').env(env).build() - for agent, env in self.env.subenvs.items() - }) + return IndependentMultiagentPreset( + "independent", + "cpu", + { + agent: dqn.device("cpu").env(env).build() + for agent, env in self.env.subenvs.items() + }, + ) if __name__ == "__main__": diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index 6cb2f926..cc462974 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -1,29 +1,33 @@ - -import torch import time + import numpy as np -from all.logging import ExperimentLogger, CometLogger -from .experiment import Experiment +import torch + from all.environments import VectorEnvironment +from all.logging import CometLogger, ExperimentLogger + +from .experiment import Experiment class ParallelEnvExperiment(Experiment): - '''An Experiment object for training and testing agents that use parallel training environments.''' + """An Experiment object for training and testing agents that use parallel training environments.""" def __init__( - self, - preset, - env, - name=None, - train_steps=float('inf'), - logdir='runs', - quiet=False, - render=False, - verbose=True, - logger="tensorboard" + self, + preset, + env, + name=None, + train_steps=float("inf"), + logdir="runs", + quiet=False, + render=False, + verbose=True, + logger="tensorboard", ): self._name = name if name is not None else preset.name - super().__init__(self._make_logger(logdir, self._name, env.name, verbose, logger), quiet) + super().__init__( + self._make_logger(logdir, self._name, env.name, verbose, logger), quiet + ) self._n_envs = preset.n_envs if isinstance(env, VectorEnvironment): assert self._n_envs == env.num_envs @@ -117,8 +121,10 @@ def test(self, episodes=100): episode_length = episode_lengths[i] test_returns.append(episode_return) test_episode_lengths.append(episode_length) - self._log_test_episode(len(test_returns), episode_return, episode_length) - returns[i] = 0. + self._log_test_episode( + len(test_returns), episode_return, episode_length + ) + returns[i] = 0.0 episode_lengths[i] = -1 episodes_started += 1 if episodes_started > episodes: @@ -132,5 +138,9 @@ def _done(self, frames, episodes): def _make_logger(self, logdir, agent_name, env_name, verbose, logger): if logger == "comet": - return CometLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) - return ExperimentLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) + return CometLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) + return ExperimentLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py index 6fd88843..f65d9e49 100644 --- a/all/experiments/parallel_env_experiment_test.py +++ b/all/experiments/parallel_env_experiment_test.py @@ -1,10 +1,12 @@ import unittest + import numpy as np import torch -from all.presets.classic_control import a2c + from all.environments import GymEnvironment from all.experiments import ParallelEnvExperiment from all.experiments.single_env_experiment_test import MockLogger +from all.presets.classic_control import a2c class MockExperiment(ParallelEnvExperiment): diff --git a/all/experiments/plots.py b/all/experiments/plots.py index a5f258fd..c3e69f37 100644 --- a/all/experiments/plots.py +++ b/all/experiments/plots.py @@ -1,6 +1,7 @@ import os -import numpy as np + import matplotlib.pyplot as plt +import numpy as np def plot_returns_100(runs_dir, timesteps=-1): @@ -51,11 +52,11 @@ def subplot_returns_100(ax, env, data, lines, timesteps=-1): if agent in lines: ax.plot(x, mean, label=agent, color=lines[agent].get_color()) else: - line, = ax.plot(x, mean, label=agent) + (line,) = ax.plot(x, mean, label=agent) lines[agent] = line ax.fill_between( x, mean + std, mean - std, alpha=0.2, color=lines[agent].get_color() ) ax.set_title(env) ax.set_xlabel("timesteps") - ax.ticklabel_format(style='sci', axis='x', scilimits=(0, 5)) + ax.ticklabel_format(style="sci", axis="x", scilimits=(0, 5)) diff --git a/all/experiments/run_experiment.py b/all/experiments/run_experiment.py index 5b153519..3ed2021d 100644 --- a/all/experiments/run_experiment.py +++ b/all/experiments/run_experiment.py @@ -1,18 +1,19 @@ -from .single_env_experiment import SingleEnvExperiment -from .parallel_env_experiment import ParallelEnvExperiment from all.presets import ParallelPreset +from .parallel_env_experiment import ParallelEnvExperiment +from .single_env_experiment import SingleEnvExperiment + def run_experiment( - agents, - envs, - frames, - logdir='runs', - quiet=False, - render=False, - test_episodes=100, - verbose=True, - logger="tensorboard" + agents, + envs, + frames, + logdir="runs", + quiet=False, + render=False, + test_episodes=100, + verbose=True, + logger="tensorboard", ): if not isinstance(agents, list): agents = [agents] @@ -32,7 +33,7 @@ def run_experiment( quiet=quiet, render=render, verbose=verbose, - logger=logger + logger=logger, ) experiment.save() experiment.train(frames=frames) diff --git a/all/experiments/single_env_experiment.py b/all/experiments/single_env_experiment.py index 49dd269a..d3ecb0ed 100644 --- a/all/experiments/single_env_experiment.py +++ b/all/experiments/single_env_experiment.py @@ -1,27 +1,31 @@ from timeit import default_timer as timer + import numpy as np -from all.logging import ExperimentLogger, CometLogger + +from all.logging import CometLogger, ExperimentLogger from .experiment import Experiment class SingleEnvExperiment(Experiment): - '''An Experiment object for training and testing agents that interact with one environment at a time.''' + """An Experiment object for training and testing agents that interact with one environment at a time.""" def __init__( - self, - preset, - env, - name=None, - train_steps=float('inf'), - logdir='runs', - quiet=False, - render=False, - verbose=True, - logger="tensorboard" + self, + preset, + env, + name=None, + train_steps=float("inf"), + logdir="runs", + quiet=False, + render=False, + verbose=True, + logger="tensorboard", ): self._name = name if name is not None else preset.name - super().__init__(self._make_logger(logdir, self._name, env.name, verbose, logger), quiet) + super().__init__( + self._make_logger(logdir, self._name, env.name, verbose, logger), quiet + ) self._logdir = logdir self._preset = preset self._agent = self._preset.agent(logger=self._logger, train_steps=train_steps) @@ -111,5 +115,9 @@ def _done(self, frames, episodes): def _make_logger(self, logdir, agent_name, env_name, verbose, logger): if logger == "comet": - return CometLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) - return ExperimentLogger(self, agent_name, env_name, verbose=verbose, logdir=logdir) + return CometLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) + return ExperimentLogger( + self, agent_name, env_name, verbose=verbose, logdir=logdir + ) diff --git a/all/experiments/single_env_experiment_test.py b/all/experiments/single_env_experiment_test.py index e2e6adc6..1a7e50f5 100644 --- a/all/experiments/single_env_experiment_test.py +++ b/all/experiments/single_env_experiment_test.py @@ -1,10 +1,12 @@ import unittest + import numpy as np import torch -from all.presets.classic_control import dqn + from all.environments import GymEnvironment from all.experiments import SingleEnvExperiment from all.logging import Logger +from all.presets.classic_control import dqn class MockLogger(Logger): @@ -76,7 +78,7 @@ def test_writes_training_returns_frame(self): experiment.train(episodes=3) np.testing.assert_equal( experiment._logger.data["eval/returns/frame"]["values"], - np.array([22., 17., 28.]), + np.array([22.0, 17.0, 28.0]), ) np.testing.assert_equal( experiment._logger.data["eval/returns/frame"]["steps"], diff --git a/all/experiments/slurm.py b/all/experiments/slurm.py index f70396e0..7e4e0903 100644 --- a/all/experiments/slurm.py +++ b/all/experiments/slurm.py @@ -2,29 +2,27 @@ import os import subprocess import sys -from .run_experiment import run_experiment +from .run_experiment import run_experiment # track the number of experiments created # in the current process -ID = { - "value": 1 -} +ID = {"value": 1} class SlurmExperiment: def __init__( - self, - agents, - envs, - frames, - test_episodes=100, - verbose=False, - job_name='autonomous-learning-library', - script_name='experiment.sh', - outdir='out', - logdir='runs', - sbatch_args=None, + self, + agents, + envs, + frames, + test_episodes=100, + verbose=False, + job_name="autonomous-learning-library", + script_name="experiment.sh", + outdir="out", + logdir="runs", + sbatch_args=None, ): if not isinstance(agents, list): agents = [agents] @@ -59,12 +57,12 @@ def __init__( self.queue_jobs() def parse_args(self): - parser = argparse.ArgumentParser(description='Run an Atari benchmark.') - parser.add_argument('--experiment_id', type=int) + parser = argparse.ArgumentParser(description="Run an Atari benchmark.") + parser.add_argument("--experiment_id", type=int) self.args = parser.parse_args() def run_experiment(self): - task_id = int(os.environ['SLURM_ARRAY_TASK_ID']) + task_id = int(os.environ["SLURM_ARRAY_TASK_ID"]) env = self.envs[int(task_id / len(self.agents))] agent = self.agents[task_id % len(self.agents)] run_experiment( @@ -73,7 +71,7 @@ def run_experiment(self): self.frames, test_episodes=self.test_episodes, logdir=self.logdir, - verbose=self.verbose + verbose=self.verbose, ) def queue_jobs(self): @@ -82,41 +80,41 @@ def queue_jobs(self): self.run_sbatch_script() def create_sbatch_script(self): - script = open(self.script_name, 'w') - script.write('#!/bin/sh\n\n') + script = open(self.script_name, "w") + script.write("#!/bin/sh\n\n") num_experiments = len(self.envs) * len(self.agents) sbatch_args = { - 'job-name': self.job_name, - 'output': os.path.join(self.outdir, 'all_%A_%a.out'), - 'error': os.path.join(self.outdir, 'all_%A_%a.err'), - 'array': '0-' + str(num_experiments - 1), - 'partition': '1080ti-short', - 'ntasks': 1, - 'mem-per-cpu': 4000, - 'gres': 'gpu:1' + "job-name": self.job_name, + "output": os.path.join(self.outdir, "all_%A_%a.out"), + "error": os.path.join(self.outdir, "all_%A_%a.err"), + "array": "0-" + str(num_experiments - 1), + "partition": "1080ti-short", + "ntasks": 1, + "mem-per-cpu": 4000, + "gres": "gpu:1", } sbatch_args.update(self.sbatch_args) for key, value in sbatch_args.items(): - script.write('#SBATCH --' + key + '=' + str(value) + '\n') - script.write('\n') + script.write("#SBATCH --" + key + "=" + str(value) + "\n") + script.write("\n") - script.write('python ' + sys.argv[0] + ' --experiment_id ' + str(self._id) + '\n') + script.write( + "python " + sys.argv[0] + " --experiment_id " + str(self._id) + "\n" + ) script.close() - print('created sbatch script:', self.script_name) + print("created sbatch script:", self.script_name) def make_output_directory(self): try: os.mkdir(self.outdir) - print('Created output directory:', self.outdir) + print("Created output directory:", self.outdir) except FileExistsError: - print('Output directory already exists:', self.outdir) + print("Output directory already exists:", self.outdir) def run_sbatch_script(self): result = subprocess.run( - ['sbatch', self.script_name], - stdout=subprocess.PIPE, - check=True + ["sbatch", self.script_name], stdout=subprocess.PIPE, check=True ) - print(result.stdout.decode('utf-8').rstrip()) + print(result.stdout.decode("utf-8").rstrip()) diff --git a/all/experiments/watch.py b/all/experiments/watch.py index b5922feb..0842448a 100644 --- a/all/experiments/watch.py +++ b/all/experiments/watch.py @@ -1,6 +1,7 @@ +import sys import time + import torch -import sys def watch(agent, env, fps=60, n_episodes=sys.maxsize): @@ -15,7 +16,7 @@ def watch(agent, env, fps=60, n_episodes=sys.maxsize): returns += env.state.reward if env.state.done: - print('returns:', returns) + print("returns:", returns) env.reset() returns = 0 diff --git a/all/experiments/watch_test.py b/all/experiments/watch_test.py index 6327cd69..2aaab832 100644 --- a/all/experiments/watch_test.py +++ b/all/experiments/watch_test.py @@ -1,17 +1,19 @@ import unittest from unittest import mock + import torch + from all.environments import GymEnvironment from all.experiments.watch import load_and_watch -class MockAgent(): +class MockAgent: def act(self): # sample from cartpole action space return torch.randint(0, 2, []) -class MockPreset(): +class MockPreset: def __init__(self, filename): self.filename = filename @@ -20,9 +22,9 @@ def test_agent(self): class WatchTest(unittest.TestCase): - @mock.patch('torch.load', lambda filename: MockPreset(filename)) - @mock.patch('time.sleep', mock.MagicMock()) - @mock.patch('sys.stdout', mock.MagicMock()) + @mock.patch("torch.load", lambda filename: MockPreset(filename)) + @mock.patch("time.sleep", mock.MagicMock()) + @mock.patch("sys.stdout", mock.MagicMock()) def test_load_and_watch(self): env = mock.MagicMock(GymEnvironment("CartPole-v0", render_mode="rgb_array")) load_and_watch("file.name", env, n_episodes=3) diff --git a/all/logging/__init__.py b/all/logging/__init__.py index 8bd7deed..df81429c 100644 --- a/all/logging/__init__.py +++ b/all/logging/__init__.py @@ -1,6 +1,5 @@ from ._logger import Logger from .dummy import DummyLogger -from .experiment import ExperimentLogger, CometLogger - +from .experiment import CometLogger, ExperimentLogger __all__ = ["Logger", "DummyLogger", "ExperimentLogger", "CometLogger"] diff --git a/all/logging/_logger.py b/all/logging/_logger.py index 23d5ecde..728060b0 100644 --- a/all/logging/_logger.py +++ b/all/logging/_logger.py @@ -6,7 +6,7 @@ class Logger(ABC): @abstractmethod def add_summary(self, name, mean, std, step="frame"): - ''' + """ Log a summary statistic. Args: @@ -14,54 +14,54 @@ def add_summary(self, name, mean, std, step="frame"): mean (float): The mean of the statistic at the current step std (float): The standard deviation of the statistic at the current step step (str, optional): Which step to use (e.g., "frame" or "episode") - ''' + """ @abstractmethod def add_loss(self, name, value, step="frame"): - ''' + """ Log the given loss metric at the current step. Args: name (str): The tag to associate with the loss value (number): The value of the loss at the current step step (str, optional): Which step to use (e.g., "frame" or "episode") - ''' + """ @abstractmethod def add_eval(self, name, value, step="frame"): - ''' + """ Log the given evaluation metric at the current step. Args: name (str): The tag to associate with the loss value (number): The evaluation metric at the current step step (str, optional): Which step to use (e.g., "frame" or "episode") - ''' + """ @abstractmethod def add_info(self, name, value, step="frame"): - ''' + """ Log the given informational metric at the current step. Args: name (str): The tag to associate with the loss value (number): The evaluation metric at the current step step (str, optional): Which step to use (e.g., "frame" or "episode") - ''' + """ @abstractmethod def add_schedule(self, name, value, step="frame"): - ''' + """ Log the current value of a hyperparameter according to some schedule. Args: name (str): The tag to associate with the hyperparameter schedule value (number): The value of the hyperparameter at the current step step (str, optional): Which step to use (e.g., "frame" or "episode") - ''' + """ @abstractmethod def close(self): - ''' + """ Close the logger and perform any necessary cleanup. - ''' + """ diff --git a/all/logging/dummy.py b/all/logging/dummy.py index 1e70d1d6..a7430105 100644 --- a/all/logging/dummy.py +++ b/all/logging/dummy.py @@ -2,7 +2,7 @@ class DummyLogger(Logger): - '''A default Logger object that performs no logging and has no side effects.''' + """A default Logger object that performs no logging and has no side effects.""" def add_summary(self, name, mean, std, step="frame"): pass diff --git a/all/logging/experiment.py b/all/logging/experiment.py index c2136f2d..d4fc5279 100644 --- a/all/logging/experiment.py +++ b/all/logging/experiment.py @@ -1,14 +1,15 @@ - -import os import csv +import os import subprocess from datetime import datetime + from torch.utils.tensorboard import SummaryWriter + from ._logger import Logger class ExperimentLogger(SummaryWriter, Logger): - ''' + """ The default Logger object used by all.experiments.Experiment. Writes logs using tensorboard into the current logdir directory ('runs' by default), tagging the run with a combination of the agent name, the commit hash of the @@ -19,11 +20,11 @@ class ExperimentLogger(SummaryWriter, Logger): agent_name (str): The name of the Agent the Experiment is being performed on env_name (str): The name of the environment the Experiment is being performed in verbose (bool, optional): Whether or not to log all data or only summary metrics. - ''' + """ - def __init__(self, experiment, agent_name, env_name, verbose=True, logdir='runs'): + def __init__(self, experiment, agent_name, env_name, verbose=True, logdir="runs"): self.env_name = env_name - current_time = datetime.now().strftime('%Y-%m-%d_%H:%M:%S_%f') + current_time = datetime.now().strftime("%Y-%m-%d_%H:%M:%S_%f") dir_name = "%s_%s_%s" % (agent_name, COMMIT_HASH, current_time) os.makedirs(os.path.join(logdir, dir_name, env_name)) self.log_dir = os.path.join(logdir, dir_name) @@ -32,10 +33,16 @@ def __init__(self, experiment, agent_name, env_name, verbose=True, logdir='runs' super().__init__(log_dir=self.log_dir) def add_summary(self, name, mean, std, step="frame"): - super().add_scalar('{}/summary/{}/mean'.format(self.env_name, name), mean, self._get_step(step)) - super().add_scalar('{}/summary/{}/std'.format(self.env_name, name), std, self._get_step(step)) + super().add_scalar( + "{}/summary/{}/mean".format(self.env_name, name), mean, self._get_step(step) + ) + super().add_scalar( + "{}/summary/{}/std".format(self.env_name, name), std, self._get_step(step) + ) - with open(os.path.join(self.log_dir, self.env_name, name + ".csv"), "a") as csvfile: + with open( + os.path.join(self.log_dir, self.env_name, name + ".csv"), "a" + ) as csvfile: csv.writer(csvfile).writerow([self._get_step(step), mean, std]) def add_loss(self, name, value, step="frame"): @@ -66,7 +73,7 @@ def close(self): class CometLogger(Logger): - ''' + """ A Logger object to be used by all.experiments.Experiment. Writes logs using comet.ml Requires an API key to be stored in .comet.config or as an environment variable. Look at https://www.comet.ml/docs/python-sdk/advanced/#python-configuration for more info. @@ -76,9 +83,9 @@ class CometLogger(Logger): env_name (str): The name of the environment the Experiment is being performed in loss (bool, optional): Whether or not to log loss/scheduling metrics, or only evaluation and summary metrics. logdir (str): The directory where run information is stored. - ''' + """ - def __init__(self, experiment, agent_name, env_name, verbose=True, logdir='runs'): + def __init__(self, experiment, agent_name, env_name, verbose=True, logdir="runs"): self.env_name = env_name self._experiment = experiment self._verbose = not verbose @@ -86,23 +93,33 @@ def __init__(self, experiment, agent_name, env_name, verbose=True, logdir='runs' try: from comet_ml import Experiment except ImportError as e: - print("Failed to import comet_ml. CometLogger requires that comet_ml be installed") + print( + "Failed to import comet_ml. CometLogger requires that comet_ml be installed" + ) raise e try: self._comet = Experiment(project_name=env_name) except ImportError as e: - print("See https://www.comet.ml/docs/python-sdk/warnings-errors/ for more info on this error.") + print( + "See https://www.comet.ml/docs/python-sdk/warnings-errors/ for more info on this error." + ) raise e except ValueError as e: - print("See https://www.comet.ml/docs/python-sdk/advanced/#python-configuration for more info on this error.") + print( + "See https://www.comet.ml/docs/python-sdk/advanced/#python-configuration for more info on this error." + ) raise e self._comet.set_name(agent_name) self.log_dir = logdir def add_summary(self, name, mean, std, step="frame"): - self._comet.log_metric('{}/summary/{}/mean'.format(self.env_name, name), mean, self._get_step(step)) - self._comet.log_metric('{}/summary/{}/std'.format(self.env_name, name), std, self._get_step(step)) + self._comet.log_metric( + "{}/summary/{}/mean".format(self.env_name, name), mean, self._get_step(step) + ) + self._comet.log_metric( + "{}/summary/{}/std".format(self.env_name, name), std, self._get_step(step) + ) def add_loss(self, name, value, step="frame"): self._add_scalar("loss/" + name, value, step) @@ -137,11 +154,11 @@ def get_commit_hash(): ["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, - check=False + check=False, ) return result.stdout.decode("utf-8").rstrip() except Exception: - return '' + return "" COMMIT_HASH = get_commit_hash() diff --git a/all/memory/__init__.py b/all/memory/__init__.py index f9f70a9d..f6cfd6aa 100644 --- a/all/memory/__init__.py +++ b/all/memory/__init__.py @@ -1,11 +1,11 @@ +from .advantage import NStepAdvantageBuffer +from .generalized_advantage import GeneralizedAdvantageBuffer from .replay_buffer import ( - ReplayBuffer, ExperienceReplayBuffer, - PrioritizedReplayBuffer, NStepReplayBuffer, + PrioritizedReplayBuffer, + ReplayBuffer, ) -from .advantage import NStepAdvantageBuffer -from .generalized_advantage import GeneralizedAdvantageBuffer __all__ = [ "ReplayBuffer", diff --git a/all/memory/advantage.py b/all/memory/advantage.py index 7ab42b20..caacbada 100644 --- a/all/memory/advantage.py +++ b/all/memory/advantage.py @@ -1,4 +1,5 @@ import torch + from all.core import State @@ -40,32 +41,22 @@ def advantages(self, states): advantages = self._compute_advantages(states, rewards, next_states, lengths) self._clear_buffers() - return ( - states, - actions, - advantages - ) + return (states, actions, advantages) def _compute_returns(self): sample_returns = torch.zeros( - (self.n_steps, self.n_envs), - device=self._rewards[0].device + (self.n_steps, self.n_envs), device=self._rewards[0].device ) sample_lengths = torch.zeros( - (self.n_steps, self.n_envs), - device=self._rewards[0].device + (self.n_steps, self.n_envs), device=self._rewards[0].device ) current_returns = self._rewards[0] * 0 current_lengths = current_returns.clone() for i in range(self.n_steps): t = self.n_steps - 1 - i mask = self._states[t + 1].mask.float() - current_returns = ( - self._rewards[t] + self.gamma * current_returns * mask - ) - current_lengths = ( - 1 + current_lengths * mask - ) + current_returns = self._rewards[t] + self.gamma * current_returns * mask + current_lengths = 1 + current_lengths * mask sample_returns[t] = current_returns sample_lengths[t] = current_lengths @@ -95,7 +86,7 @@ def _summarize_transitions(self): return ( State.array(sample_states), torch.stack(sample_actions), - State.array(sample_next_states) + State.array(sample_next_states), ) def _compute_advantages(self, states, rewards, next_states, lengths): diff --git a/all/memory/advantage_test.py b/all/memory/advantage_test.py index 2223e4d4..368eecd4 100644 --- a/all/memory/advantage_test.py +++ b/all/memory/advantage_test.py @@ -1,9 +1,11 @@ import unittest + import torch import torch_testing as tt + from all import nn +from all.approximation import FeatureNetwork, VNetwork from all.core import StateArray -from all.approximation import VNetwork, FeatureNetwork from all.memory import NStepAdvantageBuffer @@ -30,21 +32,22 @@ def test_rollout(self): expected_states = StateArray(torch.arange(0, 6).unsqueeze(1).float(), (6,)) expected_next_states = StateArray( - torch.cat((torch.arange(6, 9), torch.arange(6, 9))).unsqueeze(1).float(), (6,) + torch.cat((torch.arange(6, 9), torch.arange(6, 9))).unsqueeze(1).float(), + (6,), ) - expected_returns = torch.tensor([ - 0.5, 0.5, 0.5, - 1, 1, 1 - ]).float() - expected_lengths = torch.tensor([ - 2., 2, 2, - 1, 1, 1 - ]) + expected_returns = torch.tensor([0.5, 0.5, 0.5, 1, 1, 1]).float() + expected_lengths = torch.tensor([2.0, 2, 2, 1, 1, 1]) self.assert_states_equal(states, expected_states) - tt.assert_allclose(advantages, self._compute_expected_advantages( - expected_states, expected_returns, expected_next_states, expected_lengths - )) + tt.assert_allclose( + advantages, + self._compute_expected_advantages( + expected_states, + expected_returns, + expected_next_states, + expected_lengths, + ), + ) def test_rollout_with_dones(self): buffer = NStepAdvantageBuffer(self.v, self.features, 3, 3, discount_factor=0.5) @@ -59,31 +62,31 @@ def test_rollout_with_dones(self): buffer.store(states[6:9], actions, 2 * torch.ones(3)) states, actions, advantages = buffer.advantages(states[9:12]) - expected_states = StateArray(torch.arange(0, 9).unsqueeze(1).float(), (9,), done=done[0:9]) + expected_states = StateArray( + torch.arange(0, 9).unsqueeze(1).float(), (9,), done=done[0:9] + ) expected_next_done = torch.tensor([True] * 9) expected_next_done[5] = False expected_next_done[7] = False expected_next_done[8] = False - expected_next_states = StateArray(torch.tensor([ - 9, 7, 5, - 9, 7, 11, - 9, 10, 11 - ]).unsqueeze(1).float(), (9,), done=expected_next_done) - expected_returns = torch.tensor([ - 1, 0.5, 0, - 2, 1, 2, - 2, 2, 2 - ]).float() - expected_lengths = torch.tensor([ - 3, 2, 1, - 2, 1, 2, - 1, 1, 1 - ]).float() + expected_next_states = StateArray( + torch.tensor([9, 7, 5, 9, 7, 11, 9, 10, 11]).unsqueeze(1).float(), + (9,), + done=expected_next_done, + ) + expected_returns = torch.tensor([1, 0.5, 0, 2, 1, 2, 2, 2, 2]).float() + expected_lengths = torch.tensor([3, 2, 1, 2, 1, 2, 1, 1, 1]).float() self.assert_states_equal(states, expected_states) - tt.assert_allclose(advantages, self._compute_expected_advantages( - expected_states, expected_returns, expected_next_states, expected_lengths - )) + tt.assert_allclose( + advantages, + self._compute_expected_advantages( + expected_states, + expected_returns, + expected_next_states, + expected_lengths, + ), + ) def test_multi_rollout(self): buffer = NStepAdvantageBuffer(self.v, self.features, 2, 2, discount_factor=0.5) @@ -95,15 +98,20 @@ def test_multi_rollout(self): states, actions, advantages = buffer.advantages(raw_states[4:6]) expected_states = StateArray(torch.arange(0, 4).unsqueeze(1).float(), (4,)) expected_returns = torch.tensor([1.5, 1.5, 1, 1]) - expected_next_states = StateArray(torch.tensor([4., 5, 4, 5]).unsqueeze(1), (4,)) - expected_lengths = torch.tensor([2., 2, 1, 1]) + expected_next_states = StateArray( + torch.tensor([4.0, 5, 4, 5]).unsqueeze(1), (4,) + ) + expected_lengths = torch.tensor([2.0, 2, 1, 1]) self.assert_states_equal(states, expected_states) - tt.assert_allclose(advantages, self._compute_expected_advantages( - expected_states, - expected_returns, - expected_next_states, - expected_lengths - )) + tt.assert_allclose( + advantages, + self._compute_expected_advantages( + expected_states, + expected_returns, + expected_next_states, + expected_lengths, + ), + ) buffer.store(raw_states[4:6], actions, torch.ones(2)) buffer.store(raw_states[6:8], actions, torch.ones(2)) @@ -111,22 +119,28 @@ def test_multi_rollout(self): states, actions, advantages = buffer.advantages(raw_states[8:10]) expected_states = StateArray(torch.arange(4, 8).unsqueeze(1).float(), (4,)) self.assert_states_equal(states, expected_states) - tt.assert_allclose(advantages, self._compute_expected_advantages( - expected_states, - torch.tensor([1.5, 1.5, 1, 1]), - StateArray(torch.tensor([8, 9, 8, 9]).unsqueeze(1).float(), (4,)), - torch.tensor([2., 2, 1, 1]) - )) + tt.assert_allclose( + advantages, + self._compute_expected_advantages( + expected_states, + torch.tensor([1.5, 1.5, 1, 1]), + StateArray(torch.tensor([8, 9, 8, 9]).unsqueeze(1).float(), (4,)), + torch.tensor([2.0, 2, 1, 1]), + ), + ) def assert_array_equal(self, actual, expected): for i, exp in enumerate(expected): - self.assertEqual(actual[i], exp, msg=( - ("\nactual: %s\nexpected: %s") % (actual, expected))) + self.assertEqual( + actual[i], + exp, + msg=(("\nactual: %s\nexpected: %s") % (actual, expected)), + ) def assert_states_equal(self, actual, expected): tt.assert_almost_equal(actual.observation, expected.observation) tt.assert_equal(actual.mask, expected.mask) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/memory/generalized_advantage.py b/all/memory/generalized_advantage.py index ae5428f4..2bbbc8da 100644 --- a/all/memory/generalized_advantage.py +++ b/all/memory/generalized_advantage.py @@ -1,18 +1,19 @@ import torch + from all.core import State from all.optim import Schedulable class GeneralizedAdvantageBuffer(Schedulable): def __init__( - self, - v, - features, - n_steps, - n_envs, - discount_factor=1, - lam=1, - compute_batch_size=256, + self, + v, + features, + n_steps, + n_envs, + discount_factor=1, + lam=1, + compute_batch_size=256, ): self.v = v self.features = features @@ -48,23 +49,26 @@ def advantages(self, next_states): raise Exception("Not enough states received!") self._states.append(next_states) - states = State.array(self._states[0:self.n_steps + 1]) - actions = torch.cat(self._actions[:self.n_steps], dim=0) - rewards = torch.stack(self._rewards[:self.n_steps]) + states = State.array(self._states[0 : self.n_steps + 1]) + actions = torch.cat(self._actions[: self.n_steps], dim=0) + rewards = torch.stack(self._rewards[: self.n_steps]) - _values = states.flatten().batch_execute(self.compute_batch_size, lambda s: self.v.target(self.features.target(s))).view(states.shape) - values = _values[0:self.n_steps] + _values = ( + states.flatten() + .batch_execute( + self.compute_batch_size, + lambda s: self.v.target(self.features.target(s)), + ) + .view(states.shape) + ) + values = _values[0 : self.n_steps] next_values = _values[1:] td_errors = rewards + self.gamma * next_values - values advantages = self._compute_advantages(td_errors) self._clear_buffers() - return ( - states[0:-1].flatten(), - actions, - advantages.view(-1) - ) + return (states[0:-1].flatten(), actions, advantages.view(-1)) def _compute_advantages(self, td_errors): advantages = td_errors.clone() @@ -75,7 +79,9 @@ def _compute_advantages(self, td_errors): for i in range(self.n_steps): t = self.n_steps - 1 - i mask = self._states[t + 1].mask.float() - current_advantages = td_errors[t] + self.gamma * self.lam * current_advantages * mask + current_advantages = ( + td_errors[t] + self.gamma * self.lam * current_advantages * mask + ) advantages[t] = current_advantages return advantages diff --git a/all/memory/generalized_advantage_test.py b/all/memory/generalized_advantage_test.py index 593938a4..37cd0b05 100644 --- a/all/memory/generalized_advantage_test.py +++ b/all/memory/generalized_advantage_test.py @@ -1,9 +1,11 @@ import unittest + import torch import torch_testing as tt + from all import nn +from all.approximation import FeatureNetwork, VNetwork from all.core import State -from all.approximation import VNetwork, FeatureNetwork from all.memory import GeneralizedAdvantageBuffer @@ -16,27 +18,26 @@ def setUp(self): def _compute_expected_advantages(self, states, returns, next_states, lengths): return ( returns - + (0.5 ** lengths) * self.v.eval(self.features.eval(next_states)) + + (0.5**lengths) * self.v.eval(self.features.eval(next_states)) - self.v.eval(self.features.eval(states)) ) def test_simple(self): buffer = GeneralizedAdvantageBuffer( - self.v, - self.features, - 2, - 1, - discount_factor=0.5, - lam=0.5 + self.v, self.features, 2, 1, discount_factor=0.5, lam=0.5 ) actions = torch.ones((1)) - states = State.array([State({'observation': torch.tensor([float(x)])}) for x in range(3)]) - rewards = torch.tensor([1., 2, 4]) + states = State.array( + [State({"observation": torch.tensor([float(x)])}) for x in range(3)] + ) + rewards = torch.tensor([1.0, 2, 4]) buffer.store(states[0], actions, rewards[0]) buffer.store(states[1], actions, rewards[1]) values = self.v.eval(self.features.eval(states)) - tt.assert_almost_equal(values, torch.tensor([0.1826, -0.3476, -0.8777]), decimal=3) + tt.assert_almost_equal( + values, torch.tensor([0.1826, -0.3476, -0.8777]), decimal=3 + ) td_errors = torch.zeros(2) td_errors[0] = rewards[0] + 0.5 * values[1] - values[0] @@ -54,66 +55,66 @@ def test_simple(self): def test_parallel(self): buffer = GeneralizedAdvantageBuffer( - self.v, - self.features, - 2, - 2, - discount_factor=0.5, - lam=0.5 + self.v, self.features, 2, 2, discount_factor=0.5, lam=0.5 ) actions = torch.ones((2)) def make_states(x, y): - return State.array([ - State({'observation': torch.tensor([float(x)])}), - State({'observation': torch.tensor([float(y)])}) - ]) - - states = State.array([ - make_states(0, 3), - make_states(1, 4), - make_states(2, 5), - ]) + return State.array( + [ + State({"observation": torch.tensor([float(x)])}), + State({"observation": torch.tensor([float(y)])}), + ] + ) + + states = State.array( + [ + make_states(0, 3), + make_states(1, 4), + make_states(2, 5), + ] + ) self.assertEqual(states.shape, (3, 2)) - rewards = torch.tensor([[1., 1], [2, 1], [4, 1]]) + rewards = torch.tensor([[1.0, 1], [2, 1], [4, 1]]) buffer.store(states[0], actions, rewards[0]) buffer.store(states[1], actions, rewards[1]) values = self.v.eval(self.features.eval(states)).view(3, -1) - tt.assert_almost_equal(values, torch.tensor([ - [0.183, -1.408], - [-0.348, -1.938], - [-0.878, -2.468] - ]), decimal=3) + tt.assert_almost_equal( + values, + torch.tensor([[0.183, -1.408], [-0.348, -1.938], [-0.878, -2.468]]), + decimal=3, + ) td_errors = torch.zeros(2, 2) td_errors[0] = rewards[0] + 0.5 * values[1] - values[0] td_errors[1] = rewards[1] + 0.5 * values[2] - values[1] - tt.assert_almost_equal(td_errors, torch.tensor([ - [0.6436, 1.439], - [1.909, 1.704] - ]), decimal=3) + tt.assert_almost_equal( + td_errors, torch.tensor([[0.6436, 1.439], [1.909, 1.704]]), decimal=3 + ) advantages = torch.zeros(2, 2) advantages[0] = td_errors[0] + 0.25 * td_errors[1] advantages[1] = td_errors[1] - tt.assert_almost_equal(advantages, torch.tensor([ - [1.121, 1.865], - [1.909, 1.704] - ]), decimal=3) + tt.assert_almost_equal( + advantages, torch.tensor([[1.121, 1.865], [1.909, 1.704]]), decimal=3 + ) _states, _actions, _advantages = buffer.advantages(states[2]) tt.assert_almost_equal(_advantages, advantages.view(-1)) def assert_array_equal(self, actual, expected): for i, exp in enumerate(expected): - self.assertEqual(actual[i], exp, msg=( - ("\nactual: %s\nexpected: %s") % (actual, expected))) + self.assertEqual( + actual[i], + exp, + msg=(("\nactual: %s\nexpected: %s") % (actual, expected)), + ) def assert_states_equal(self, actual, expected): tt.assert_almost_equal(actual.raw, expected.raw) tt.assert_equal(actual.mask, expected.mask) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/memory/replay_buffer.py b/all/memory/replay_buffer.py index d949cace..a89c9c21 100644 --- a/all/memory/replay_buffer.py +++ b/all/memory/replay_buffer.py @@ -1,29 +1,32 @@ from abc import ABC, abstractmethod + import numpy as np import torch + from all.core import State from all.optim import Schedulable -from .segment_tree import SumSegmentTree, MinSegmentTree + +from .segment_tree import MinSegmentTree, SumSegmentTree class ReplayBuffer(ABC): @abstractmethod def store(self, state, action, reward, next_state): - '''Store the transition in the buffer''' + """Store the transition in the buffer""" @abstractmethod def sample(self, batch_size): - '''Sample from the stored transitions''' + """Sample from the stored transitions""" @abstractmethod def update_priorities(self, indexes, td_errors): - '''Update priorities based on the TD error''' + """Update priorities based on the TD error""" # Adapted from: # https://github.com/Shmuma/ptan/blob/master/ptan/experience.py class ExperienceReplayBuffer(ReplayBuffer): - def __init__(self, size, device='cpu', store_device=None): + def __init__(self, size, device="cpu", store_device=None): self.buffer = [] self.capacity = int(size) self.pos = 0 @@ -58,7 +61,9 @@ def _reshape(self, minibatch, weights): if torch.is_tensor(minibatch[0][1]): actions = torch.stack([sample[1] for sample in minibatch]).to(self.device) else: - actions = torch.tensor([sample[1] for sample in minibatch], device=self.device) + actions = torch.tensor( + [sample[1] for sample in minibatch], device=self.device + ) next_states = State.array([sample[2] for sample in minibatch]).to(self.device) return (states, actions, next_states.reward, next_states, weights) @@ -71,13 +76,13 @@ def __iter__(self): class PrioritizedReplayBuffer(ExperienceReplayBuffer, Schedulable): def __init__( - self, - buffer_size, - alpha=0.6, - beta=0.4, - epsilon=1e-5, - device=torch.device('cpu'), - store_device=None + self, + buffer_size, + alpha=0.6, + beta=0.4, + epsilon=1e-5, + device=torch.device("cpu"), + store_device=None, ): super().__init__(buffer_size, device=device, store_device=store_device) @@ -100,8 +105,8 @@ def store(self, state, action, next_state): return idx = self.pos super().store(state, action, next_state) - self._it_sum[idx] = self._max_priority ** self._alpha - self._it_min[idx] = self._max_priority ** self._alpha + self._it_sum[idx] = self._max_priority**self._alpha + self._it_min[idx] = self._max_priority**self._alpha def sample(self, batch_size): beta = self._beta @@ -120,7 +125,7 @@ def sample(self, batch_size): try: samples = [self.buffer[idx] for idx in idxes] except IndexError as e: - print('index out of range: ', idxes) + print("index out of range: ", idxes) raise e self._cache = idxes return self._reshape(samples, torch.from_numpy(weights).to(self.device)) @@ -134,8 +139,8 @@ def update_priorities(self, priorities): assert priority > 0 assert priority < np.inf assert 0 <= idx < len(self) - self._it_sum[idx] = priority ** self._alpha - self._it_min[idx] = priority ** self._alpha + self._it_sum[idx] = priority**self._alpha + self._it_min[idx] = priority**self._alpha self._max_priority = max(self._max_priority, priority) def _sample_proportional(self, batch_size): @@ -150,13 +155,13 @@ def _sample_proportional(self, batch_size): class NStepReplayBuffer(ReplayBuffer): - '''Converts any ReplayBuffer into an NStepReplayBuffer''' + """Converts any ReplayBuffer into an NStepReplayBuffer""" def __init__( - self, - steps, - discount_factor, - buffer, + self, + steps, + discount_factor, + buffer, ): assert steps >= 1 assert discount_factor >= 0 @@ -166,7 +171,7 @@ def __init__( self._states = [] self._actions = [] self._rewards = [] - self._reward = 0. + self._reward = 0.0 def store(self, state, action, next_state): if state is None or state.done: @@ -175,7 +180,9 @@ def store(self, state, action, next_state): self._states.append(state) self._actions.append(action) self._rewards.append(next_state.reward) - self._reward += (self.discount_factor ** (len(self._states) - 1)) * next_state.reward + self._reward += ( + self.discount_factor ** (len(self._states) - 1) + ) * next_state.reward if len(self._states) == self.steps: self._store_next(next_state) @@ -183,12 +190,14 @@ def store(self, state, action, next_state): if next_state.done: while self._states: self._store_next(next_state) - self._reward = 0. + self._reward = 0.0 def _store_next(self, next_state): - self.buffer.store(self._states[0], self._actions[0], next_state.update('reward', self._reward)) + self.buffer.store( + self._states[0], self._actions[0], next_state.update("reward", self._reward) + ) self._reward = self._reward - self._rewards[0] - self._reward *= self.discount_factor ** -1 + self._reward *= self.discount_factor**-1 del self._states[0] del self._actions[0] del self._rewards[0] diff --git a/all/memory/replay_buffer_test.py b/all/memory/replay_buffer_test.py index ccaa6202..52a4f40c 100644 --- a/all/memory/replay_buffer_test.py +++ b/all/memory/replay_buffer_test.py @@ -1,13 +1,15 @@ -import unittest import random -import torch +import unittest + import numpy as np +import torch import torch_testing as tt + from all.core import State, StateArray from all.memory import ( ExperienceReplayBuffer, - PrioritizedReplayBuffer, NStepReplayBuffer, + PrioritizedReplayBuffer, ) @@ -52,17 +54,21 @@ def test_run(self): def test_store_device(self): if torch.cuda.is_available(): - self.replay_buffer = ExperienceReplayBuffer(5, device='cuda', store_device='cpu') + self.replay_buffer = ExperienceReplayBuffer( + 5, device="cuda", store_device="cpu" + ) - states = torch.arange(0, 20).to('cuda') - actions = torch.arange(0, 20).view((-1, 1)).to('cuda') - rewards = torch.arange(0, 20).to('cuda') + states = torch.arange(0, 20).to("cuda") + actions = torch.arange(0, 20).view((-1, 1)).to("cuda") + rewards = torch.arange(0, 20).to("cuda") state = State(states[0]) next_state = State(states[1], reward=rewards[1]) self.replay_buffer.store(state, actions[0], next_state) sample = self.replay_buffer.sample(3) - self.assertEqual(sample[0].device, torch.device('cuda')) - self.assertEqual(self.replay_buffer.buffer[0][0].device, torch.device('cpu')) + self.assertEqual(sample[0].device, torch.device("cuda")) + self.assertEqual( + self.replay_buffer.buffer[0][0].device, torch.device("cpu") + ) class TestPrioritizedReplayBuffer(unittest.TestCase): @@ -73,7 +79,9 @@ def setUp(self): self.replay_buffer = PrioritizedReplayBuffer(5, 0.6) def test_run(self): - states = StateArray(torch.arange(0, 20), (20,), reward=torch.arange(-1, 19).float()) + states = StateArray( + torch.arange(0, 20), (20,), reward=torch.arange(-1, 19).float() + ) actions = torch.arange(0, 20).view((-1, 1)) expected_samples = State( torch.tensor( @@ -127,7 +135,9 @@ def setUp(self): self.replay_buffer = NStepReplayBuffer(4, 0.5, ExperienceReplayBuffer(100)) def test_run(self): - states = StateArray(torch.arange(0, 20), (20,), reward=torch.arange(-1, 19).float()) + states = StateArray( + torch.arange(0, 20), (20,), reward=torch.arange(-1, 19).float() + ) actions = torch.arange(0, 20) for i in range(3): @@ -141,22 +151,24 @@ def test_run(self): sample = self.replay_buffer.buffer.buffer[0] self.assert_states_equal(sample[0], states[0]) tt.assert_equal(sample[1], actions[0]) - tt.assert_equal(sample[2].reward, torch.tensor(0 + 1 * 0.5 + 2 * 0.25 + 3 * 0.125)) + tt.assert_equal( + sample[2].reward, torch.tensor(0 + 1 * 0.5 + 2 * 0.25 + 3 * 0.125) + ) tt.assert_equal( self.replay_buffer.buffer.buffer[1][2].reward, torch.tensor(1 + 2 * 0.5 + 3 * 0.25 + 4 * 0.125), ) def test_done(self): - state = State(torch.tensor(1), reward=1.) + state = State(torch.tensor(1), reward=1.0) action = torch.tensor(0) - done_state = State(torch.tensor(1), reward=1., done=True) + done_state = State(torch.tensor(1), reward=1.0, done=True) self.replay_buffer.store(state, action, done_state) self.assertEqual(len(self.replay_buffer), 1) sample = self.replay_buffer.buffer.buffer[0] self.assert_states_equal(state, sample[0]) - self.assertEqual(sample[2].reward, 1.) + self.assertEqual(sample[2].reward, 1.0) self.replay_buffer.store(state, action, state) self.replay_buffer.store(state, action, state) diff --git a/all/memory/segment_tree.py b/all/memory/segment_tree.py index a2313a5d..988ff907 100644 --- a/all/memory/segment_tree.py +++ b/all/memory/segment_tree.py @@ -27,7 +27,9 @@ def __init__(self, capacity, operation, neutral_element): neutral element for the operation above. eg. float('-inf') for max and 0 for sum. """ - assert capacity > 0 and capacity & (capacity - 1) == 0, "capacity must be positive and a power of 2." + assert ( + capacity > 0 and capacity & (capacity - 1) == 0 + ), "capacity must be positive and a power of 2." self._capacity = capacity self._value = [neutral_element for _ in range(2 * capacity)] self._operation = operation @@ -44,7 +46,7 @@ def _reduce_helper(self, start, end, node, node_start, node_end): else: return self._operation( self._reduce_helper(start, mid, 2 * node, node_start, mid), - self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end) + self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end), ) def reduce(self, start=0, end=None): @@ -76,8 +78,7 @@ def __setitem__(self, idx, val): idx //= 2 while idx >= 1: self._value[idx] = self._operation( - self._value[2 * idx], - self._value[2 * idx + 1] + self._value[2 * idx], self._value[2 * idx + 1] ) idx //= 2 @@ -90,9 +91,7 @@ class SumSegmentTree(SegmentTree): # stolen from https://github.com/Shmuma/ptan/blob/master/ptan/common/utils.py def __init__(self, capacity): super(SumSegmentTree, self).__init__( - capacity=capacity, - operation=operator.add, - neutral_element=0.0 + capacity=capacity, operation=operator.add, neutral_element=0.0 ) def sum(self, start=0, end=None): @@ -129,9 +128,7 @@ class MinSegmentTree(SegmentTree): # stolen from https://github.com/Shmuma/ptan/blob/master/ptan/common/utils.py def __init__(self, capacity): super(MinSegmentTree, self).__init__( - capacity=capacity, - operation=min, - neutral_element=float('inf') + capacity=capacity, operation=min, neutral_element=float("inf") ) def min(self, start=0, end=None): diff --git a/all/nn/__init__.py b/all/nn/__init__.py index c60c9a97..67dfa9db 100644 --- a/all/nn/__init__.py +++ b/all/nn/__init__.py @@ -1,10 +1,8 @@ +import numpy as np import torch from torch import nn from torch.nn import * # noqa from torch.nn import functional as F -import numpy as np -from all.core import State - """"A Pytorch Module""" Module = nn.Module @@ -21,7 +19,7 @@ def __init__(self, model, _=None): self.device = next(model.parameters()).device def forward(self, state): - return state.apply(self.model, 'observation') + return state.apply(self.model, "observation") class Aggregation(nn.Module): @@ -139,7 +137,9 @@ class NoisyFactorizedLinear(nn.Linear): N.B. nn.Linear already initializes weight and bias to """ - def __init__(self, in_features, out_features, sigma_init=0.4, init_scale=3, bias=True): + def __init__( + self, in_features, out_features, sigma_init=0.4, init_scale=3, bias=True + ): self.init_scale = init_scale super().__init__(in_features, out_features, bias=bias) sigma_init = sigma_init / np.sqrt(in_features) @@ -149,9 +149,7 @@ def __init__(self, in_features, out_features, sigma_init=0.4, init_scale=3, bias self.register_buffer("epsilon_input", torch.zeros(1, in_features)) self.register_buffer("epsilon_output", torch.zeros(out_features, 1)) if bias: - self.sigma_bias = nn.Parameter( - torch.Tensor(out_features).fill_(sigma_init) - ) + self.sigma_bias = nn.Parameter(torch.Tensor(out_features).fill_(sigma_init)) def reset_parameters(self): std = np.sqrt(self.init_scale / self.in_features) @@ -218,13 +216,13 @@ def _loss(estimates, errors): return _loss -def weighted_mse_loss(input, target, weight, reduction='mean'): - loss = (weight * ((target - input) ** 2)) - return torch.mean(loss) if reduction == 'mean' else torch.sum(loss) +def weighted_mse_loss(input, target, weight, reduction="mean"): + loss = weight * ((target - input) ** 2) + return torch.mean(loss) if reduction == "mean" else torch.sum(loss) -def weighted_smooth_l1_loss(input, target, weight, reduction='mean'): +def weighted_smooth_l1_loss(input, target, weight, reduction="mean"): t = torch.abs(input - target) - loss = torch.where(t < 1, 0.5 * t ** 2, t - 0.5) + loss = torch.where(t < 1, 0.5 * t**2, t - 0.5) loss = weight * loss - return torch.mean(loss) if reduction == 'mean' else torch.sum(loss) + return torch.mean(loss) if reduction == "mean" else torch.sum(loss) diff --git a/all/nn/nn_test.py b/all/nn/nn_test.py index 5e465532..4380e14f 100644 --- a/all/nn/nn_test.py +++ b/all/nn/nn_test.py @@ -1,8 +1,10 @@ import unittest + +import gymnasium import numpy as np import torch import torch_testing as tt -import gymnasium + from all import nn from all.core import StateArray diff --git a/all/optim/__init__.py b/all/optim/__init__.py index c2a3058a..694126c7 100644 --- a/all/optim/__init__.py +++ b/all/optim/__init__.py @@ -1,3 +1,3 @@ from .scheduler import LinearScheduler, Schedulable -__all__ = ['Schedulable', 'LinearScheduler'] +__all__ = ["Schedulable", "LinearScheduler"] diff --git a/all/optim/scheduler.py b/all/optim/scheduler.py index d79c5d77..6fe5eae1 100644 --- a/all/optim/scheduler.py +++ b/all/optim/scheduler.py @@ -2,7 +2,7 @@ class Schedulable: - '''Allow "instance" descriptors to implement parameter scheduling.''' + """Allow "instance" descriptors to implement parameter scheduling.""" def __getattribute__(self, name): value = object.__getattribute__(self, name) @@ -17,13 +17,13 @@ class Scheduler: class LinearScheduler(Scheduler): def __init__( - self, - initial_value, - final_value, - decay_start, - decay_end, - name='variable', - logger=DummyLogger(), + self, + initial_value, + final_value, + decay_start, + decay_end, + name="variable", + logger=DummyLogger(), ): self._initial_value = initial_value self._final_value = final_value diff --git a/all/optim/scheduler_test.py b/all/optim/scheduler_test.py index 3a66d237..e96a2152 100644 --- a/all/optim/scheduler_test.py +++ b/all/optim/scheduler_test.py @@ -1,6 +1,8 @@ import unittest + import numpy as np -from all.optim import Schedulable, LinearScheduler + +from all.optim import LinearScheduler, Schedulable class Obj(Schedulable): @@ -17,5 +19,5 @@ def test_linear_scheduler(self): np.testing.assert_allclose(actual, expected) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/policies/__init__.py b/all/policies/__init__.py index 1d264856..a92e7fac 100644 --- a/all/policies/__init__.py +++ b/all/policies/__init__.py @@ -1,8 +1,8 @@ +from .deterministic import DeterministicPolicy from .gaussian import GaussianPolicy from .greedy import GreedyPolicy, ParallelGreedyPolicy -from .softmax import SoftmaxPolicy -from .deterministic import DeterministicPolicy from .soft_deterministic import SoftDeterministicPolicy +from .softmax import SoftmaxPolicy __all__ = [ "GaussianPolicy", @@ -10,5 +10,5 @@ "ParallelGreedyPolicy", "SoftmaxPolicy", "DeterministicPolicy", - "SoftDeterministicPolicy" + "SoftDeterministicPolicy", ] diff --git a/all/policies/deterministic.py b/all/policies/deterministic.py index e69253ec..8a740ebe 100644 --- a/all/policies/deterministic.py +++ b/all/policies/deterministic.py @@ -1,10 +1,11 @@ import torch + from all.approximation import Approximation from all.nn import RLNetwork class DeterministicPolicy(Approximation): - ''' + """ A DDPG-style deterministic policy. Args: @@ -15,23 +16,11 @@ class DeterministicPolicy(Approximation): model parameters, e.g. SGD, Adam, RMSprop, etc. action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation - ''' - - def __init__( - self, - model, - optimizer=None, - space=None, - name='policy', - **kwargs - ): + """ + + def __init__(self, model, optimizer=None, space=None, name="policy", **kwargs): model = DeterministicPolicyNetwork(model, space) - super().__init__( - model, - optimizer, - name=name, - **kwargs - ) + super().__init__(model, optimizer, name=name, **kwargs) class DeterministicPolicyNetwork(RLNetwork): diff --git a/all/policies/deterministic_test.py b/all/policies/deterministic_test.py index 683800a6..9b42077e 100644 --- a/all/policies/deterministic_test.py +++ b/all/policies/deterministic_test.py @@ -1,10 +1,12 @@ import unittest + +import numpy as np import torch import torch_testing as tt -import numpy as np from gymnasium.spaces import Box + from all import nn -from all.approximation import FixedTarget, DummyCheckpointer +from all.approximation import DummyCheckpointer, FixedTarget from all.core import State from all.policies import DeterministicPolicy @@ -15,16 +17,11 @@ class TestDeterministic(unittest.TestCase): def setUp(self): torch.manual_seed(2) - self.model = nn.Sequential( - nn.Linear0(STATE_DIM, ACTION_DIM) - ) + self.model = nn.Sequential(nn.Linear0(STATE_DIM, ACTION_DIM)) self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=0.01) self.space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.policy = DeterministicPolicy( - self.model, - self.optimizer, - self.space, - checkpointer=DummyCheckpointer() + self.model, self.optimizer, self.space, checkpointer=DummyCheckpointer() ) def test_output_shape(self): @@ -54,10 +51,7 @@ def test_converge(self): def test_target(self): self.policy = DeterministicPolicy( - self.model, - self.optimizer, - self.space, - target=FixedTarget(3) + self.model, self.optimizer, self.space, target=FixedTarget(3) ) state = State(torch.ones(1, STATE_DIM)) @@ -81,5 +75,5 @@ def test_target(self): ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/policies/gaussian.py b/all/policies/gaussian.py index eaf76c67..85a68095 100644 --- a/all/policies/gaussian.py +++ b/all/policies/gaussian.py @@ -1,12 +1,13 @@ import torch from torch.distributions.independent import Independent from torch.distributions.normal import Normal + from all.approximation import Approximation from all.nn import RLNetwork class GaussianPolicy(Approximation): - ''' + """ A Gaussian stochastic policy. This policy will choose actions from a distribution represented by a spherical Gaussian. @@ -24,21 +25,11 @@ class GaussianPolicy(Approximation): model parameters, e.g. SGD, Adam, RMSprop, etc. action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation - ''' - - def __init__( - self, - model, - optimizer=None, - space=None, - name='policy', - **kwargs - ): + """ + + def __init__(self, model, optimizer=None, space=None, name="policy", **kwargs): super().__init__( - GaussianPolicyNetwork(model, space), - optimizer, - name=name, - **kwargs + GaussianPolicyNetwork(model, space), optimizer, name=name, **kwargs ) diff --git a/all/policies/gaussian_test.py b/all/policies/gaussian_test.py index 3bcaf83f..e83be068 100644 --- a/all/policies/gaussian_test.py +++ b/all/policies/gaussian_test.py @@ -1,9 +1,11 @@ import unittest + import numpy as np import torch -from torch import nn import torch_testing as tt from gymnasium.spaces import Box +from torch import nn + from all.approximation import DummyCheckpointer from all.core import State from all.policies import GaussianPolicy @@ -16,11 +18,11 @@ class TestGaussian(unittest.TestCase): def setUp(self): torch.manual_seed(2) self.space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) - self.model = nn.Sequential( - nn.Linear(STATE_DIM, ACTION_DIM * 2) - ) + self.model = nn.Sequential(nn.Linear(STATE_DIM, ACTION_DIM * 2)) optimizer = torch.optim.RMSprop(self.model.parameters(), lr=0.01) - self.policy = GaussianPolicy(self.model, optimizer, self.space, checkpointer=DummyCheckpointer()) + self.policy = GaussianPolicy( + self.model, optimizer, self.space, checkpointer=DummyCheckpointer() + ) def test_output_shape(self): state = State(torch.randn(1, STATE_DIM)) @@ -45,7 +47,7 @@ def test_reinforce_one(self): def test_converge(self): state = State(torch.randn(1, STATE_DIM)) - target = torch.tensor([1., 2., -1.]) + target = torch.tensor([1.0, 2.0, -1.0]) for _ in range(0, 1000): dist = self.policy(state) @@ -60,11 +62,13 @@ def test_converge(self): def test_eval(self): state = State(torch.randn(1, STATE_DIM)) dist = self.policy.no_grad(state) - tt.assert_almost_equal(dist.mean, torch.tensor([[-0.237, 0.497, -0.058]]), decimal=3) + tt.assert_almost_equal( + dist.mean, torch.tensor([[-0.237, 0.497, -0.058]]), decimal=3 + ) tt.assert_almost_equal(dist.entropy(), torch.tensor([4.254]), decimal=3) best = self.policy.eval(state).sample() tt.assert_almost_equal(best, torch.tensor([[-0.888, -0.887, 0.404]]), decimal=3) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/policies/greedy.py b/all/policies/greedy.py index d24ed7c8..cc8e168a 100644 --- a/all/policies/greedy.py +++ b/all/policies/greedy.py @@ -1,10 +1,11 @@ import numpy as np import torch + from all.optim import Schedulable class GreedyPolicy(Schedulable): - ''' + """ An "epsilon-greedy" action selection policy for discrete action spaces. This policy will usually choose the optimal action according to an approximation @@ -16,13 +17,13 @@ class GreedyPolicy(Schedulable): q (all.approximation.QNetwork): The action-value or "q-function" num_actions (int): The number of available actions. epsilon (float, optional): The probability of selecting a random action. - ''' + """ def __init__( - self, - q, - num_actions, - epsilon=0., + self, + q, + num_actions, + epsilon=0.0, ): self.q = q self.num_actions = num_actions @@ -45,7 +46,7 @@ def eval(self, state): class ParallelGreedyPolicy(Schedulable): - ''' + """ A parallel version of the "epsilon-greedy" action selection policy for discrete action spaces. This policy will usually choose the optimal action according to an approximation @@ -57,13 +58,13 @@ class ParallelGreedyPolicy(Schedulable): q (all.approximation.QNetwork): The action-value or "q-function" num_actions (int): The number of available actions. epsilon (float, optional): The probability of selecting a random action. - ''' + """ def __init__( - self, - q, - num_actions, - epsilon=0., + self, + q, + num_actions, + epsilon=0.0, ): self.q = q self.num_actions = num_actions @@ -80,6 +81,10 @@ def eval(self, state): def _choose_action(self, action_values): best_actions = torch.argmax(action_values, dim=-1) - random_actions = torch.randint(0, self.num_actions, best_actions.shape, device=best_actions.device) - choices = (torch.rand(best_actions.shape, device=best_actions.device) < self.epsilon).int() + random_actions = torch.randint( + 0, self.num_actions, best_actions.shape, device=best_actions.device + ) + choices = ( + torch.rand(best_actions.shape, device=best_actions.device) < self.epsilon + ).int() return choices * random_actions + (1 - choices) * best_actions diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py index ffb8a0d0..b3075e3e 100644 --- a/all/policies/soft_deterministic.py +++ b/all/policies/soft_deterministic.py @@ -1,10 +1,11 @@ import torch + from all.approximation import Approximation from all.nn import RLNetwork class SoftDeterministicPolicy(Approximation): - ''' + """ A "soft" deterministic policy compatible with soft actor-critic (SAC). Args: @@ -17,16 +18,9 @@ class SoftDeterministicPolicy(Approximation): model parameters, e.g. SGD, Adam, RMSprop, etc. action_space (gymnasium.spaces.Box): The Box representing the action space. kwargs (optional): Any other arguments accepted by all.approximation.Approximation - ''' + """ - def __init__( - self, - model, - optimizer=None, - space=None, - name="policy", - **kwargs - ): + def __init__(self, model, optimizer=None, space=None, name="policy", **kwargs): model = SoftDeterministicPolicyNetwork(model, space) self._inner_model = model super().__init__(model, optimizer, name=name, **kwargs) @@ -48,8 +42,8 @@ def forward(self, state): return self._squash(normal.loc) def _normal(self, outputs): - means = outputs[..., 0:self._action_dim] - logvars = outputs[..., self._action_dim:] + means = outputs[..., 0 : self._action_dim] + logvars = outputs[..., self._action_dim :] std = logvars.mul(0.5).exp_() return torch.distributions.normal.Normal(means, std) @@ -59,7 +53,7 @@ def _sample(self, normal): return self._squash(raw), log_prob def _log_prob(self, normal, raw): - ''' + """ Compute the log probability of a raw action after the action is squashed. Both inputs act on the raw underlying distribution. Because tanh_mean does not affect the density, we can ignore it. @@ -72,7 +66,7 @@ def _log_prob(self, normal, raw): Returns: torch.Tensor: The probability of the raw action, accounting for the affects of tanh. - ''' + """ log_prob = normal.log_prob(raw) log_prob -= torch.log(1 - torch.tanh(raw).pow(2) + 1e-6) log_prob -= torch.log(self._tanh_scale) diff --git a/all/policies/soft_deterministic_test.py b/all/policies/soft_deterministic_test.py index f8c4a62a..5479a35e 100644 --- a/all/policies/soft_deterministic_test.py +++ b/all/policies/soft_deterministic_test.py @@ -1,8 +1,10 @@ import unittest -import torch + import numpy as np +import torch import torch_testing as tt from gymnasium.spaces import Box + from all import nn from all.approximation import DummyCheckpointer from all.core import State @@ -15,16 +17,11 @@ class TestSoftDeterministic(unittest.TestCase): def setUp(self): torch.manual_seed(2) - self.model = nn.Sequential( - nn.Linear0(STATE_DIM, ACTION_DIM * 2) - ) + self.model = nn.Sequential(nn.Linear0(STATE_DIM, ACTION_DIM * 2)) self.optimizer = torch.optim.RMSprop(self.model.parameters(), lr=0.01) self.space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.policy = SoftDeterministicPolicy( - self.model, - self.optimizer, - self.space, - checkpointer=DummyCheckpointer() + self.model, self.optimizer, self.space, checkpointer=DummyCheckpointer() ) def test_output_shape(self): @@ -61,7 +58,7 @@ def test_scaling(self): policy1 = SoftDeterministicPolicy( self.model, self.optimizer, - Box(np.array([-1., -1., -1.]), np.array([1., 1., 1.])) + Box(np.array([-1.0, -1.0, -1.0]), np.array([1.0, 1.0, 1.0])), ) action1, log_prob1 = policy1(state) @@ -71,7 +68,7 @@ def test_scaling(self): policy2 = SoftDeterministicPolicy( self.model, self.optimizer, - Box(np.array([-2., -1., -1.]), np.array([2., 1., 1.])) + Box(np.array([-2.0, -1.0, -1.0]), np.array([2.0, 1.0, 1.0])), ) action2, log_prob2 = policy2(state) @@ -80,5 +77,5 @@ def test_scaling(self): tt.assert_allclose(log_prob1 - np.log(2), log_prob2) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/policies/softmax.py b/all/policies/softmax.py index 431333fe..f011d5b9 100644 --- a/all/policies/softmax.py +++ b/all/policies/softmax.py @@ -1,11 +1,12 @@ import torch from torch.nn import functional -from all.nn import RLNetwork + from all.approximation import Approximation +from all.nn import RLNetwork class SoftmaxPolicy(Approximation): - ''' + """ A softmax (or Boltzmann) stochastic policy for discrete actions. Args: @@ -15,15 +16,9 @@ class SoftmaxPolicy(Approximation): optimizer (torch.optim.Optimizer): A optimizer initialized with the model parameters, e.g. SGD, Adam, RMSprop, etc. kwargs (optional): Any other arguments accepted by all.approximation.Approximation - ''' - - def __init__( - self, - model, - optimizer=None, - name='policy', - **kwargs - ): + """ + + def __init__(self, model, optimizer=None, name="policy", **kwargs): model = SoftmaxPolicyNetwork(model) super().__init__(model, optimizer, name=name, **kwargs) diff --git a/all/policies/softmax_test.py b/all/policies/softmax_test.py index 6584b44c..a543ac15 100644 --- a/all/policies/softmax_test.py +++ b/all/policies/softmax_test.py @@ -1,7 +1,9 @@ import unittest + import torch -from torch import nn import torch_testing as tt +from torch import nn + from all.core import State from all.policies import SoftmaxPolicy @@ -12,9 +14,7 @@ class TestSoftmax(unittest.TestCase): def setUp(self): torch.manual_seed(2) - self.model = nn.Sequential( - nn.Linear(STATE_DIM, ACTIONS) - ) + self.model = nn.Sequential(nn.Linear(STATE_DIM, ACTIONS)) optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1) self.policy = SoftmaxPolicy(self.model, optimizer) @@ -63,25 +63,33 @@ def loss(log_probs): # notice the values increase with each successive reinforce log_probs = self.policy(states).log_prob(actions) - tt.assert_almost_equal(log_probs, torch.tensor([-0.84, -0.62, -0.757]), decimal=3) + tt.assert_almost_equal( + log_probs, torch.tensor([-0.84, -0.62, -0.757]), decimal=3 + ) self.policy.reinforce(loss(log_probs)) log_probs = self.policy(states).log_prob(actions) - tt.assert_almost_equal(log_probs, torch.tensor([-0.811, -0.561, -0.701]), decimal=3) + tt.assert_almost_equal( + log_probs, torch.tensor([-0.811, -0.561, -0.701]), decimal=3 + ) self.policy.reinforce(loss(log_probs)) log_probs = self.policy(states).log_prob(actions) - tt.assert_almost_equal(log_probs, torch.tensor([-0.785, -0.51, -0.651]), decimal=3) + tt.assert_almost_equal( + log_probs, torch.tensor([-0.785, -0.51, -0.651]), decimal=3 + ) def test_eval(self): states = State(torch.randn(3, STATE_DIM), torch.tensor([1, 1, 1])) dist = self.policy.no_grad(states) - tt.assert_almost_equal(dist.probs, torch.tensor([ - [0.352, 0.216, 0.432], - [0.266, 0.196, 0.538], - [0.469, 0.227, 0.304] - ]), decimal=3) + tt.assert_almost_equal( + dist.probs, + torch.tensor( + [[0.352, 0.216, 0.432], [0.266, 0.196, 0.538], [0.469, 0.227, 0.304]] + ), + decimal=3, + ) best = self.policy.eval(states).sample() tt.assert_equal(best, torch.tensor([2, 2, 0])) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/all/presets/__init__.py b/all/presets/__init__.py index 82d875eb..be333fb1 100644 --- a/all/presets/__init__.py +++ b/all/presets/__init__.py @@ -1,9 +1,8 @@ -from all.presets import atari -from all.presets import classic_control -from all.presets import continuous -from .preset import Preset, ParallelPreset -from .builder import PresetBuilder, ParallelPresetBuilder +from all.presets import atari, classic_control, continuous + +from .builder import ParallelPresetBuilder, PresetBuilder from .independent_multiagent import IndependentMultiagentPreset +from .preset import ParallelPreset, Preset __all__ = [ "Preset", @@ -13,5 +12,5 @@ "atari", "classic_control", "continuous", - "IndependentMultiagentPreset" + "IndependentMultiagentPreset", ] diff --git a/all/presets/atari/__init__.py b/all/presets/atari/__init__.py index 836b5657..f5b338de 100644 --- a/all/presets/atari/__init__.py +++ b/all/presets/atari/__init__.py @@ -1,14 +1,13 @@ -from .a2c import a2c, A2CAtariPreset -from .c51 import c51, C51AtariPreset -from .dqn import dqn, DQNAtariPreset -from .ddqn import ddqn, DDQNAtariPreset -from .ppo import ppo, PPOAtariPreset -from .rainbow import rainbow, RainbowAtariPreset -from .vac import vac, VACAtariPreset -from .vpg import vpg, VPGAtariPreset -from .vqn import vqn, VQNAtariPreset -from .vsarsa import vsarsa, VSarsaAtariPreset - +from .a2c import a2c +from .c51 import c51 +from .ddqn import ddqn +from .dqn import dqn +from .ppo import ppo +from .rainbow import rainbow +from .vac import vac +from .vpg import vpg +from .vqn import vqn +from .vsarsa import vsarsa __all__ = [ "a2c", diff --git a/all/presets/atari/a2c.py b/all/presets/atari/a2c.py index 030bff3f..a61c8e5d 100644 --- a/all/presets/atari/a2c.py +++ b/all/presets/atari/a2c.py @@ -1,16 +1,20 @@ import copy -import math + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import A2C, A2CTestAgent +from all.approximation import FeatureNetwork, VNetwork from all.bodies import DeepmindAtariBody -from all.approximation import VNetwork, FeatureNetwork from all.logging import DummyLogger from all.policies import SoftmaxPolicy +from all.presets.atari.models import ( + nature_features, + nature_policy_head, + nature_value_head, +) from all.presets.builder import ParallelPresetBuilder from all.presets.preset import ParallelPreset -from all.presets.atari.models import nature_features, nature_value_head, nature_policy_head - default_hyperparameters = { # Common settings @@ -28,7 +32,7 @@ # Model construction "feature_model_constructor": nature_features, "value_model_constructor": nature_value_head, - "policy_model_constructor": nature_policy_head + "policy_model_constructor": nature_policy_head, } @@ -58,23 +62,37 @@ class A2CAtariPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor']().to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"]().to(device) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps / (self.hyperparameters['n_steps'] * self.hyperparameters['n_envs']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = train_steps / ( + self.hyperparameters["n_steps"] * self.hyperparameters["n_envs"] + ) - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, scheduler=CosineAnnealingLR(feature_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -83,7 +101,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): scheduler=CosineAnnealingLR(value_optimizer, n_updates), loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( @@ -91,7 +109,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): policy_optimizer, scheduler=CosineAnnealingLR(policy_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return DeepmindAtariBody( @@ -103,7 +121,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): n_steps=self.hyperparameters["n_steps"], discount_factor=self.hyperparameters["discount_factor"], entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"], - logger=logger + logger=logger, ), ) @@ -116,4 +134,4 @@ def parallel_test_agent(self): return self.test_agent() -a2c = ParallelPresetBuilder('a2c', default_hyperparameters, A2CAtariPreset) +a2c = ParallelPresetBuilder("a2c", default_hyperparameters, A2CAtariPreset) diff --git a/all/presets/atari/c51.py b/all/presets/atari/c51.py index ce1ceeb4..6ebf56b7 100644 --- a/all/presets/atari/c51.py +++ b/all/presets/atari/c51.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QDist, FixedTarget + from all.agents import C51, C51TestAgent +from all.approximation import FixedTarget, QDist from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.optim import LinearScheduler +from all.presets.atari.models import nature_c51 from all.presets.builder import PresetBuilder from all.presets.preset import Preset -from all.presets.atari.models import nature_c51 - default_hyperparameters = { "discount_factor": 0.99, @@ -26,7 +27,7 @@ "replay_buffer_size": 1000000, # Explicit exploration "initial_exploration": 0.02, - "final_exploration": 0., + "final_exploration": 0.0, "final_exploration_step": 250000, "test_exploration": 0.001, # Distributional RL @@ -34,7 +35,7 @@ "v_min": -10, "v_max": 10, # Model construction - "model_constructor": nature_c51 + "model_constructor": nature_c51, } @@ -70,33 +71,36 @@ class C51AtariPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env, atoms=hyperparameters['atoms']).to(device) + self.model = hyperparameters["model_constructor"]( + env, atoms=hyperparameters["atoms"] + ).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters['replay_start_size']) / self.hyperparameters['update_frequency'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QDist( self.model, optimizer, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], - target=FixedTarget(self.hyperparameters['target_update_frequency']), + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + target=FixedTarget(self.hyperparameters["target_update_frequency"]), scheduler=CosineAnnealingLR(optimizer, n_updates), logger=logger, ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters['replay_buffer_size'], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) return DeepmindAtariBody( @@ -104,10 +108,11 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): q, replay_buffer, exploration=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] - self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="epsilon", logger=logger, ), @@ -115,10 +120,10 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): minibatch_size=self.hyperparameters["minibatch_size"], replay_start_size=self.hyperparameters["replay_start_size"], update_frequency=self.hyperparameters["update_frequency"], - logger=logger + logger=logger, ), lazy_frames=True, - episodic_lives=True + episodic_lives=True, ) def test_agent(self): @@ -126,11 +131,15 @@ def test_agent(self): copy.deepcopy(self.model), None, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + ) + return DeepmindAtariBody( + C51TestAgent( + q_dist, self.n_actions, self.hyperparameters["test_exploration"] + ) ) - return DeepmindAtariBody(C51TestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"])) -c51 = PresetBuilder('c51', default_hyperparameters, C51AtariPreset) +c51 = PresetBuilder("c51", default_hyperparameters, C51AtariPreset) diff --git a/all/presets/atari/ddqn.py b/all/presets/atari/ddqn.py index 1d04f887..d5754534 100644 --- a/all/presets/atari/ddqn.py +++ b/all/presets/atari/ddqn.py @@ -1,18 +1,19 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QNetwork, FixedTarget + from all.agents import DDQN, DDQNTestAgent +from all.approximation import FixedTarget, QNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.memory import PrioritizedReplayBuffer from all.nn import weighted_smooth_l1_loss from all.optim import LinearScheduler from all.policies import GreedyPolicy +from all.presets.atari.models import nature_ddqn from all.presets.builder import PresetBuilder from all.presets.preset import Preset -from all.presets.atari.models import nature_ddqn - default_hyperparameters = { # Common settings @@ -30,12 +31,12 @@ "alpha": 0.5, "beta": 0.5, # Explicit exploration - "initial_exploration": 1., + "initial_exploration": 1.0, "final_exploration": 0.01, "final_exploration_step": 250000, "test_exploration": 0.001, # Model construction - "model_constructor": nature_ddqn + "model_constructor": nature_ddqn, } @@ -71,65 +72,69 @@ class DDQNAtariPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters['replay_start_size']) / self.hyperparameters['update_frequency'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - target=FixedTarget(self.hyperparameters['target_update_frequency']), - logger=logger + target=FixedTarget(self.hyperparameters["target_update_frequency"]), + logger=logger, ) policy = GreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], - self.hyperparameters['replay_start_size'], - self.hyperparameters['final_exploration_step'] - self.hyperparameters['replay_start_size'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], + self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger - ) + logger=logger, + ), ) replay_buffer = PrioritizedReplayBuffer( - self.hyperparameters['replay_buffer_size'], - alpha=self.hyperparameters['alpha'], - beta=self.hyperparameters['beta'], - device=self.device + self.hyperparameters["replay_buffer_size"], + alpha=self.hyperparameters["alpha"], + beta=self.hyperparameters["beta"], + device=self.device, ) return DeepmindAtariBody( - DDQN(q, policy, replay_buffer, - loss=weighted_smooth_l1_loss, - discount_factor=self.hyperparameters["discount_factor"], - minibatch_size=self.hyperparameters["minibatch_size"], - replay_start_size=self.hyperparameters["replay_start_size"], - update_frequency=self.hyperparameters["update_frequency"], - ), - lazy_frames=True + DDQN( + q, + policy, + replay_buffer, + loss=weighted_smooth_l1_loss, + discount_factor=self.hyperparameters["discount_factor"], + minibatch_size=self.hyperparameters["minibatch_size"], + replay_start_size=self.hyperparameters["replay_start_size"], + update_frequency=self.hyperparameters["update_frequency"], + ), + lazy_frames=True, ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) policy = GreedyPolicy( - q, - self.n_actions, - epsilon=self.hyperparameters['test_exploration'] + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] ) return DeepmindAtariBody(DDQNTestAgent(policy)) -ddqn = PresetBuilder('ddqn', default_hyperparameters, DDQNAtariPreset) +ddqn = PresetBuilder("ddqn", default_hyperparameters, DDQNAtariPreset) diff --git a/all/presets/atari/dqn.py b/all/presets/atari/dqn.py index 2d5c4d3e..7d89a821 100644 --- a/all/presets/atari/dqn.py +++ b/all/presets/atari/dqn.py @@ -1,21 +1,19 @@ import copy -import torch -import numpy as np + +from torch.nn.functional import smooth_l1_loss from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from torch.nn.functional import smooth_l1_loss -from all import nn -from all.approximation import QNetwork, FixedTarget -from all.agents import Agent, DQN, DQNTestAgent + +from all.agents import DQN, DQNTestAgent +from all.approximation import FixedTarget, QNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.optim import LinearScheduler from all.policies import GreedyPolicy +from all.presets.atari.models import nature_dqn from all.presets.builder import PresetBuilder from all.presets.preset import Preset -from all.presets.atari.models import nature_dqn - default_hyperparameters = { # Common settings @@ -31,12 +29,12 @@ "replay_start_size": 80000, "replay_buffer_size": 1000000, # Explicit exploration - "initial_exploration": 1., + "initial_exploration": 1.0, "final_exploration": 0.01, "final_exploration_step": 250000, "test_exploration": 0.001, # Model construction - "model_constructor": nature_dqn + "model_constructor": nature_dqn, } @@ -69,42 +67,44 @@ class DQNAtariPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) hyperparameters = {**default_hyperparameters, **hyperparameters} - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters['replay_start_size']) / self.hyperparameters['update_frequency'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - target=FixedTarget(self.hyperparameters['target_update_frequency']), - logger=logger + target=FixedTarget(self.hyperparameters["target_update_frequency"]), + logger=logger, ) policy = GreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], - self.hyperparameters['replay_start_size'], - self.hyperparameters['final_exploration_step'] - self.hyperparameters['replay_start_size'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], + self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger - ) + logger=logger, + ), ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters['replay_buffer_size'], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) return DeepmindAtariBody( @@ -112,23 +112,21 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): q, policy, replay_buffer, - discount_factor=self.hyperparameters['discount_factor'], + discount_factor=self.hyperparameters["discount_factor"], loss=smooth_l1_loss, - minibatch_size=self.hyperparameters['minibatch_size'], - replay_start_size=self.hyperparameters['replay_start_size'], - update_frequency=self.hyperparameters['update_frequency'], + minibatch_size=self.hyperparameters["minibatch_size"], + replay_start_size=self.hyperparameters["replay_start_size"], + update_frequency=self.hyperparameters["update_frequency"], ), - lazy_frames=True + lazy_frames=True, ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) policy = GreedyPolicy( - q, - self.n_actions, - epsilon=self.hyperparameters['test_exploration'] + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] ) return DeepmindAtariBody(DQNTestAgent(policy)) -dqn = PresetBuilder('dqn', default_hyperparameters, DQNAtariPreset) +dqn = PresetBuilder("dqn", default_hyperparameters, DQNAtariPreset) diff --git a/all/presets/atari/models/__init__.py b/all/presets/atari/models/__init__.py index c2627408..3d410de1 100644 --- a/all/presets/atari/models/__init__.py +++ b/all/presets/atari/models/__init__.py @@ -13,7 +13,7 @@ def nature_dqn(env, frames=4): nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), - nn.Linear0(512, env.action_space.n) + nn.Linear0(512, env.action_space.n), ) @@ -28,17 +28,11 @@ def nature_ddqn(env, frames=4): nn.ReLU(), nn.Flatten(), nn.Dueling( + nn.Sequential(nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, 1)), nn.Sequential( - nn.Linear(3136, 512), - nn.ReLU(), - nn.Linear0(512, 1) - ), - nn.Sequential( - nn.Linear(3136, 512), - nn.ReLU(), - nn.Linear0(512, env.action_space.n) + nn.Linear(3136, 512), nn.ReLU(), nn.Linear0(512, env.action_space.n) ), - ) + ), ) @@ -77,7 +71,7 @@ def nature_c51(env, frames=4, atoms=51): nn.Flatten(), nn.Linear(3136, 512), nn.ReLU(), - nn.Linear0(512, env.action_space.n * atoms) + nn.Linear0(512, env.action_space.n * atoms), ) @@ -95,22 +89,14 @@ def nature_rainbow(env, frames=4, hidden=512, atoms=51, sigma=0.5): nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), - nn.NoisyFactorizedLinear( - hidden, - atoms, - init_scale=0, - sigma_init=sigma - ) + nn.NoisyFactorizedLinear(hidden, atoms, init_scale=0, sigma_init=sigma), ), nn.Sequential( nn.NoisyFactorizedLinear(3136, hidden, sigma_init=sigma), nn.ReLU(), nn.NoisyFactorizedLinear( - hidden, - env.action_space.n * atoms, - init_scale=0, - sigma_init=sigma - ) - ) - ) + hidden, env.action_space.n * atoms, init_scale=0, sigma_init=sigma + ), + ), + ), ) diff --git a/all/presets/atari/ppo.py b/all/presets/atari/ppo.py index 47bccb90..9b50af93 100644 --- a/all/presets/atari/ppo.py +++ b/all/presets/atari/ppo.py @@ -1,16 +1,21 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import PPO, PPOTestAgent +from all.approximation import FeatureNetwork, VNetwork from all.bodies import DeepmindAtariBody -from all.approximation import VNetwork, FeatureNetwork from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import SoftmaxPolicy +from all.presets.atari.models import ( + nature_features, + nature_policy_head, + nature_value_head, +) from all.presets.builder import ParallelPresetBuilder from all.presets.preset import ParallelPreset -from all.presets.atari.models import nature_features, nature_value_head, nature_policy_head - default_hyperparameters = { # Common settings @@ -34,7 +39,7 @@ # Model construction "feature_model_constructor": nature_features, "value_model_constructor": nature_value_head, - "policy_model_constructor": nature_policy_head + "policy_model_constructor": nature_policy_head, } @@ -69,23 +74,40 @@ class PPOAtariPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor']().to(device) - - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps * self.hyperparameters['epochs'] * self.hyperparameters['minibatches'] / (self.hyperparameters['n_steps'] * self.hyperparameters['n_envs']) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"]().to(device) + + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps + * self.hyperparameters["epochs"] + * self.hyperparameters["minibatches"] + / (self.hyperparameters["n_steps"] * self.hyperparameters["n_envs"]) + ) - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, scheduler=CosineAnnealingLR(feature_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -94,7 +116,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): scheduler=CosineAnnealingLR(value_optimizer, n_updates), loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( @@ -102,7 +124,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): policy_optimizer, scheduler=CosineAnnealingLR(policy_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return DeepmindAtariBody( @@ -115,8 +137,8 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): self.hyperparameters["clip_final"], 0, n_updates, - name='clip', - logger=logger + name="clip", + logger=logger, ), epochs=self.hyperparameters["epochs"], minibatches=self.hyperparameters["minibatches"], @@ -138,4 +160,4 @@ def parallel_test_agent(self): return self.test_agent() -ppo = ParallelPresetBuilder('ppo', default_hyperparameters, PPOAtariPreset) +ppo = ParallelPresetBuilder("ppo", default_hyperparameters, PPOAtariPreset) diff --git a/all/presets/atari/rainbow.py b/all/presets/atari/rainbow.py index 1e99e2a5..e2465ad8 100644 --- a/all/presets/atari/rainbow.py +++ b/all/presets/atari/rainbow.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QDist, FixedTarget + from all.agents import Rainbow, RainbowTestAgent +from all.approximation import FixedTarget, QDist from all.bodies import DeepmindAtariBody from all.logging import DummyLogger -from all.memory import PrioritizedReplayBuffer, NStepReplayBuffer +from all.memory import NStepReplayBuffer, PrioritizedReplayBuffer from all.optim import LinearScheduler +from all.presets.atari.models import nature_rainbow from all.presets.builder import PresetBuilder from all.presets.preset import Preset -from all.presets.atari.models import nature_rainbow - default_hyperparameters = { "discount_factor": 0.99, @@ -25,7 +26,7 @@ "replay_buffer_size": 1000000, # Explicit exploration "initial_exploration": 0.02, - "final_exploration": 0., + "final_exploration": 0.0, "test_exploration": 0.001, # Prioritized replay settings "alpha": 0.5, @@ -39,7 +40,7 @@ # Noisy Nets "sigma": 0.5, # Model construction - "model_constructor": nature_rainbow + "model_constructor": nature_rainbow, } @@ -79,39 +80,43 @@ class RainbowAtariPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env, atoms=hyperparameters["atoms"], sigma=hyperparameters["sigma"]).to(device) + self.model = hyperparameters["model_constructor"]( + env, atoms=hyperparameters["atoms"], sigma=hyperparameters["sigma"] + ).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters['replay_start_size']) / self.hyperparameters['update_frequency'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q_dist = QDist( self.model, optimizer, self.n_actions, - self.hyperparameters['atoms'], + self.hyperparameters["atoms"], scheduler=CosineAnnealingLR(optimizer, n_updates), - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], - target=FixedTarget(self.hyperparameters['target_update_frequency']), + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + target=FixedTarget(self.hyperparameters["target_update_frequency"]), logger=logger, ) replay_buffer = NStepReplayBuffer( - self.hyperparameters['n_steps'], - self.hyperparameters['discount_factor'], + self.hyperparameters["n_steps"], + self.hyperparameters["discount_factor"], PrioritizedReplayBuffer( - self.hyperparameters['replay_buffer_size'], - alpha=self.hyperparameters['alpha'], - beta=self.hyperparameters['beta'], - device=self.device - ) + self.hyperparameters["replay_buffer_size"], + alpha=self.hyperparameters["alpha"], + beta=self.hyperparameters["beta"], + device=self.device, + ), ) return DeepmindAtariBody( @@ -119,21 +124,22 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): q_dist, replay_buffer, exploration=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - train_steps - self.hyperparameters['replay_start_size'], + train_steps - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger + logger=logger, ), - discount_factor=self.hyperparameters['discount_factor'] ** self.hyperparameters["n_steps"], - minibatch_size=self.hyperparameters['minibatch_size'], - replay_start_size=self.hyperparameters['replay_start_size'], - update_frequency=self.hyperparameters['update_frequency'], + discount_factor=self.hyperparameters["discount_factor"] + ** self.hyperparameters["n_steps"], + minibatch_size=self.hyperparameters["minibatch_size"], + replay_start_size=self.hyperparameters["replay_start_size"], + update_frequency=self.hyperparameters["update_frequency"], logger=logger, ), lazy_frames=True, - episodic_lives=True + episodic_lives=True, ) def test_agent(self): @@ -141,11 +147,15 @@ def test_agent(self): copy.deepcopy(self.model), None, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + ) + return DeepmindAtariBody( + RainbowTestAgent( + q_dist, self.n_actions, self.hyperparameters["test_exploration"] + ) ) - return DeepmindAtariBody(RainbowTestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"])) -rainbow = PresetBuilder('rainbow', default_hyperparameters, RainbowAtariPreset) +rainbow = PresetBuilder("rainbow", default_hyperparameters, RainbowAtariPreset) diff --git a/all/presets/atari/vac.py b/all/presets/atari/vac.py index 81d38ab5..3cb2638e 100644 --- a/all/presets/atari/vac.py +++ b/all/presets/atari/vac.py @@ -1,15 +1,20 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import VAC, VACTestAgent -from all.approximation import VNetwork, FeatureNetwork +from all.approximation import FeatureNetwork, VNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.policies import SoftmaxPolicy +from all.presets.atari.models import ( + nature_features, + nature_policy_head, + nature_value_head, +) from all.presets.builder import ParallelPresetBuilder from all.presets.preset import ParallelPreset -from all.presets.atari.models import nature_features, nature_value_head, nature_policy_head - default_hyperparameters = { # Common settings @@ -26,7 +31,7 @@ # Model construction "feature_model_constructor": nature_features, "value_model_constructor": nature_value_head, - "policy_model_constructor": nature_policy_head + "policy_model_constructor": nature_policy_head, } @@ -55,23 +60,35 @@ class VACAtariPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor']().to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"]().to(device) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): + def agent(self, logger=DummyLogger(), train_steps=float("inf")): n_updates = train_steps / self.hyperparameters["n_envs"] - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr_v"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr_v"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, scheduler=CosineAnnealingLR(feature_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -80,7 +97,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): scheduler=CosineAnnealingLR(value_optimizer, n_updates), loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( @@ -88,11 +105,16 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): policy_optimizer, scheduler=CosineAnnealingLR(policy_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return DeepmindAtariBody( - VAC(features, v, policy, discount_factor=self.hyperparameters["discount_factor"]), + VAC( + features, + v, + policy, + discount_factor=self.hyperparameters["discount_factor"], + ), ) def test_agent(self): @@ -104,4 +126,4 @@ def parallel_test_agent(self): return self.test_agent() -vac = ParallelPresetBuilder('vac', default_hyperparameters, VACAtariPreset) +vac = ParallelPresetBuilder("vac", default_hyperparameters, VACAtariPreset) diff --git a/all/presets/atari/vpg.py b/all/presets/atari/vpg.py index 0b6fb68e..b51a486d 100644 --- a/all/presets/atari/vpg.py +++ b/all/presets/atari/vpg.py @@ -1,15 +1,20 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import VPG, VPGTestAgent -from all.approximation import VNetwork, FeatureNetwork +from all.approximation import FeatureNetwork, VNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.policies import SoftmaxPolicy +from all.presets.atari.models import ( + nature_features, + nature_policy_head, + nature_value_head, +) from all.presets.builder import PresetBuilder from all.presets.preset import Preset -from all.presets.atari.models import nature_features, nature_value_head, nature_policy_head - default_hyperparameters = { # Common settings @@ -25,7 +30,7 @@ # Model construction "feature_model_constructor": nature_features, "value_model_constructor": nature_value_head, - "policy_model_constructor": nature_policy_head + "policy_model_constructor": nature_policy_head, } @@ -54,23 +59,35 @@ class VPGAtariPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor']().to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"]().to(device) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): + def agent(self, logger=DummyLogger(), train_steps=float("inf")): n_updates = train_steps / self.hyperparameters["min_batch_size"] - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr_v"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr_v"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, scheduler=CosineAnnealingLR(feature_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -79,7 +96,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): scheduler=CosineAnnealingLR(value_optimizer, n_updates), loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( @@ -87,11 +104,17 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): policy_optimizer, scheduler=CosineAnnealingLR(policy_optimizer, n_updates), clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return DeepmindAtariBody( - VPG(features, v, policy, discount_factor=self.hyperparameters["discount_factor"], min_batch_size=self.hyperparameters["min_batch_size"]), + VPG( + features, + v, + policy, + discount_factor=self.hyperparameters["discount_factor"], + min_batch_size=self.hyperparameters["min_batch_size"], + ), ) def test_agent(self): @@ -103,4 +126,4 @@ def parallel_test_agent(self): return self.test_agent() -vpg = PresetBuilder('vpg', default_hyperparameters, VPGAtariPreset) +vpg = PresetBuilder("vpg", default_hyperparameters, VPGAtariPreset) diff --git a/all/presets/atari/vqn.py b/all/presets/atari/vqn.py index 4818bdae..99a2adc5 100644 --- a/all/presets/atari/vqn.py +++ b/all/presets/atari/vqn.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QNetwork + from all.agents import VQN, VQNTestAgent +from all.approximation import QNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GreedyPolicy, ParallelGreedyPolicy +from all.presets.atari.models import nature_ddqn from all.presets.builder import ParallelPresetBuilder from all.presets.preset import ParallelPreset -from all.presets.atari.models import nature_ddqn - default_hyperparameters = { # Common settings @@ -19,14 +20,14 @@ "lr": 1e-3, "eps": 1.5e-4, # Explicit exploration - "initial_exploration": 1., + "initial_exploration": 1.0, "final_exploration": 0.01, "final_exploration_step": 250000, "test_exploration": 0.001, # Parallel actors "n_envs": 64, # Model construction - "model_constructor": nature_ddqn + "model_constructor": nature_ddqn, } @@ -54,51 +55,56 @@ class VQNAtariPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps / self.hyperparameters['n_envs'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = train_steps / self.hyperparameters["n_envs"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - logger=logger + logger=logger, ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], + self.hyperparameters["final_exploration_step"] + / self.hyperparameters["n_envs"], name="exploration", - logger=logger - ) + logger=logger, + ), ) return DeepmindAtariBody( - VQN(q, policy, discount_factor=self.hyperparameters['discount_factor']), + VQN(q, policy, discount_factor=self.hyperparameters["discount_factor"]), ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DeepmindAtariBody(VQNTestAgent(policy)) def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = ParallelGreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = ParallelGreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DeepmindAtariBody(VQNTestAgent(policy)) -vqn = ParallelPresetBuilder('vqn', default_hyperparameters, VQNAtariPreset) +vqn = ParallelPresetBuilder("vqn", default_hyperparameters, VQNAtariPreset) diff --git a/all/presets/atari/vsarsa.py b/all/presets/atari/vsarsa.py index d41369e3..4aba9971 100644 --- a/all/presets/atari/vsarsa.py +++ b/all/presets/atari/vsarsa.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QNetwork + from all.agents import VSarsa, VSarsaTestAgent +from all.approximation import QNetwork from all.bodies import DeepmindAtariBody from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GreedyPolicy, ParallelGreedyPolicy +from all.presets.atari.models import nature_ddqn from all.presets.builder import ParallelPresetBuilder from all.presets.preset import ParallelPreset -from all.presets.atari.models import nature_ddqn - default_hyperparameters = { # Common settings @@ -19,14 +20,14 @@ "lr": 1e-3, "eps": 1.5e-4, # Explicit exploration - "initial_exploration": 1., + "initial_exploration": 1.0, "final_exploration": 0.01, "final_exploration_step": 250000, "test_exploration": 0.001, # Parallel actors "n_envs": 64, # Model construction - "model_constructor": nature_ddqn + "model_constructor": nature_ddqn, } @@ -54,51 +55,56 @@ class VSarsaAtariPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps / self.hyperparameters['n_envs'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = train_steps / self.hyperparameters["n_envs"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - logger=logger + logger=logger, ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], + self.hyperparameters["final_exploration_step"] + / self.hyperparameters["n_envs"], name="exploration", - logger=logger - ) + logger=logger, + ), ) return DeepmindAtariBody( - VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor']), + VSarsa(q, policy, discount_factor=self.hyperparameters["discount_factor"]), ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DeepmindAtariBody(VSarsaTestAgent(policy)) def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = ParallelGreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = ParallelGreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DeepmindAtariBody(VSarsaTestAgent(policy)) -vsarsa = ParallelPresetBuilder('vsarsa', default_hyperparameters, VSarsaAtariPreset) +vsarsa = ParallelPresetBuilder("vsarsa", default_hyperparameters, VSarsaAtariPreset) diff --git a/all/presets/atari_test.py b/all/presets/atari_test.py index 5840d94a..65df9987 100644 --- a/all/presets/atari_test.py +++ b/all/presets/atari_test.py @@ -1,33 +1,26 @@ import os import unittest + import torch + from all.environments import AtariEnvironment, DuplicateEnvironment from all.logging import DummyLogger -from all.presets import Preset, ParallelPreset -from all.presets.atari import ( - a2c, - c51, - ddqn, - dqn, - ppo, - rainbow, - vac, - vpg, - vsarsa, - vqn -) +from all.presets import ParallelPreset +from all.presets.atari import a2c, c51, ddqn, dqn, ppo, rainbow, vac, vpg, vqn, vsarsa class TestAtariPresets(unittest.TestCase): def setUp(self): - self.env = AtariEnvironment('Breakout') + self.env = AtariEnvironment("Breakout") self.env.reset() - self.parallel_env = DuplicateEnvironment([AtariEnvironment('Breakout'), AtariEnvironment('Breakout')]) + self.parallel_env = DuplicateEnvironment( + [AtariEnvironment("Breakout"), AtariEnvironment("Breakout")] + ) self.parallel_env.reset() def tearDown(self): - if os.path.exists('test_preset.pt'): - os.remove('test_preset.pt') + if os.path.exists("test_preset.pt"): + os.remove("test_preset.pt") def test_a2c(self): self.validate_preset(a2c) @@ -60,7 +53,7 @@ def test_vqn(self): self.validate_preset(vqn) def validate_preset(self, builder): - preset = builder.device('cpu').env(self.env).build() + preset = builder.device("cpu").env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) @@ -73,8 +66,8 @@ def validate_standard_preset(self, preset): test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) @@ -89,8 +82,8 @@ def validate_parallel_preset(self, preset): parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) diff --git a/all/presets/builder.py b/all/presets/builder.py index 768c2610..dd51e933 100644 --- a/all/presets/builder.py +++ b/all/presets/builder.py @@ -1,7 +1,4 @@ -from abc import ABC, abstractmethod - - -class PresetBuilder(): +class PresetBuilder: def __init__( self, default_name, @@ -17,7 +14,9 @@ def __init__( self.constructor = constructor self._device = device self._env = env - self._hyperparameters = self._merge_hyperparameters(default_hyperparameters, hyperparameters) + self._hyperparameters = self._merge_hyperparameters( + default_hyperparameters, hyperparameters + ) self._name = name or default_name def __call__(self, **kwargs): @@ -30,20 +29,21 @@ def env(self, env): return self._preset_builder(env=env) def hyperparameters(self, **hyperparameters): - return self._preset_builder(hyperparameters=self._merge_hyperparameters(self._hyperparameters, hyperparameters)) + return self._preset_builder( + hyperparameters=self._merge_hyperparameters( + self._hyperparameters, hyperparameters + ) + ) def name(self, name): return self._preset_builder(name=name) def build(self): if not self._env: - raise Exception('Env is required') + raise Exception("Env is required") return self.constructor( - self._env, - device=self._device, - name=self._name, - **self._hyperparameters + self._env, device=self._device, name=self._name, **self._hyperparameters ) def _merge_hyperparameters(self, h1, h2): @@ -56,12 +56,17 @@ def _merge_hyperparameters(self, h1, h2): def _preset_builder(self, **kwargs): old_kwargs = { - 'device': self._device, - 'env': self._env, - 'hyperparameters': self._hyperparameters, - 'name': self._name, + "device": self._device, + "env": self._env, + "hyperparameters": self._hyperparameters, + "name": self._name, } - return PresetBuilder(self.default_name, self.default_hyperparameters, self.constructor, **{**old_kwargs, **kwargs}) + return PresetBuilder( + self.default_name, + self.default_hyperparameters, + self.constructor, + **{**old_kwargs, **kwargs} + ) class ParallelPresetBuilder(PresetBuilder): @@ -75,8 +80,8 @@ def __init__( hyperparameters=None, name=None, ): - if 'n_envs' not in default_hyperparameters: - raise Exception('ParallelPreset hyperparameters must include n_envs') + if "n_envs" not in default_hyperparameters: + raise Exception("ParallelPreset hyperparameters must include n_envs") super().__init__( default_name, default_hyperparameters, @@ -84,7 +89,7 @@ def __init__( device=device, env=env, hyperparameters=hyperparameters, - name=name + name=name, ) def build(self): diff --git a/all/presets/builder_test.py b/all/presets/builder_test.py index 7250436f..ba14f092 100644 --- a/all/presets/builder_test.py +++ b/all/presets/builder_test.py @@ -1,23 +1,24 @@ import unittest from unittest.mock import Mock + from all.presets import PresetBuilder class TestPresetBuilder(unittest.TestCase): def setUp(self): self.name = "my_preset" - self.default_hyperparameters = { - "lr": 1e-4, - "gamma": 0.99 - } + self.default_hyperparameters = {"lr": 1e-4, "gamma": 0.99} - class MockPreset(): + class MockPreset: def __init__(self, env, name, device, **hyperparameters): self.env = env self.name = name self.device = device self.hyperparameters = hyperparameters - self.builder = PresetBuilder(self.name, self.default_hyperparameters, MockPreset) + + self.builder = PresetBuilder( + self.name, self.default_hyperparameters, MockPreset + ) def test_default_name(self): agent = self.builder.env(Mock).build() @@ -33,7 +34,9 @@ def test_default_hyperparameters(self): def test_override_hyperparameters(self): agent = self.builder.hyperparameters(lr=0.01).env(Mock).build() - self.assertEqual(agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01}) + self.assertEqual( + agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01} + ) def test_bad_hyperparameters(self): with self.assertRaises(KeyError): @@ -41,26 +44,34 @@ def test_bad_hyperparameters(self): def test_default_device(self): agent = self.builder.env(Mock).build() - self.assertEqual(agent.device, 'cuda') + self.assertEqual(agent.device, "cuda") def test_override_device(self): - agent = self.builder.device('cpu').env(Mock).build() - self.assertEqual(agent.device, 'cpu') + agent = self.builder.device("cpu").env(Mock).build() + self.assertEqual(agent.device, "cpu") def test_no_side_effects(self): - self.builder.device('cpu').hyperparameters(lr=0.01).device('cpu').env(Mock).build() + self.builder.device("cpu").hyperparameters(lr=0.01).device("cpu").env( + Mock + ).build() my_env = Mock agent = self.builder.env(Mock).build() self.assertEqual(agent.name, self.name) self.assertEqual(agent.hyperparameters, self.default_hyperparameters) - self.assertEqual(agent.device, 'cuda') + self.assertEqual(agent.device, "cuda") self.assertEqual(agent.env, my_env) def test_call_api(self): - agent = self.builder(device='cpu', hyperparameters={"lr": 0.01}, name="cool_name").env(Mock).build() + agent = ( + self.builder(device="cpu", hyperparameters={"lr": 0.01}, name="cool_name") + .env(Mock) + .build() + ) self.assertEqual(agent.name, "cool_name") - self.assertEqual(agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01}) - self.assertEqual(agent.device, 'cpu') + self.assertEqual( + agent.hyperparameters, {**self.default_hyperparameters, "lr": 0.01} + ) + self.assertEqual(agent.device, "cpu") if __name__ == "__main__": diff --git a/all/presets/classic_control/__init__.py b/all/presets/classic_control/__init__.py index 8eb72a2c..f5b338de 100644 --- a/all/presets/classic_control/__init__.py +++ b/all/presets/classic_control/__init__.py @@ -1,13 +1,13 @@ -from .a2c import a2c, A2CClassicControlPreset -from .c51 import c51, C51ClassicControlPreset -from .ddqn import ddqn, DDQNClassicControlPreset -from .dqn import dqn, DQNClassicControlPreset -from .ppo import ppo, PPOClassicControlPreset -from .rainbow import rainbow, RainbowClassicControlPreset -from .vac import vac, VACClassicControlPreset -from .vpg import vpg, VPGClassicControlPreset -from .vqn import vqn, VQNClassicControlPreset -from .vsarsa import vsarsa, VSarsaClassicControlPreset +from .a2c import a2c +from .c51 import c51 +from .ddqn import ddqn +from .dqn import dqn +from .ppo import ppo +from .rainbow import rainbow +from .vac import vac +from .vpg import vpg +from .vqn import vqn +from .vsarsa import vsarsa __all__ = [ "a2c", diff --git a/all/presets/classic_control/a2c.py b/all/presets/classic_control/a2c.py index 2726e684..2551fba9 100644 --- a/all/presets/classic_control/a2c.py +++ b/all/presets/classic_control/a2c.py @@ -1,13 +1,18 @@ import copy + from torch.optim import Adam + from all.agents import A2C, A2CTestAgent -from all.approximation import VNetwork, FeatureNetwork +from all.approximation import FeatureNetwork, VNetwork from all.logging import DummyLogger from all.policies import SoftmaxPolicy from all.presets.builder import ParallelPresetBuilder +from all.presets.classic_control.models import ( + fc_policy_head, + fc_relu_features, + fc_value_head, +) from all.presets.preset import ParallelPreset -from all.presets.classic_control.models import fc_relu_features, fc_policy_head, fc_value_head - default_hyperparameters = { # Common settings @@ -24,7 +29,7 @@ # Model construction "feature_model_constructor": fc_relu_features, "value_model_constructor": fc_value_head, - "policy_model_constructor": fc_policy_head + "policy_model_constructor": fc_policy_head, } @@ -54,33 +59,41 @@ class A2CClassicControlPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor'](env).to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"](env).to( + device + ) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr"]) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + feature_optimizer = Adam( + self.feature_model.parameters(), lr=self.hyperparameters["lr"] + ) + value_optimizer = Adam( + self.value_model.parameters(), lr=self.hyperparameters["lr"] + ) + policy_optimizer = Adam( + self.policy_model.parameters(), lr=self.hyperparameters["lr"] + ) features = FeatureNetwork( self.feature_model, feature_optimizer, - clip_grad=self.hyperparameters["clip_grad"] + clip_grad=self.hyperparameters["clip_grad"], ) v = VNetwork( self.value_model, value_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( self.policy_model, policy_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return A2C( @@ -91,7 +104,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): n_steps=self.hyperparameters["n_steps"], discount_factor=self.hyperparameters["discount_factor"], entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"], - logger=logger + logger=logger, ) def test_agent(self): @@ -103,4 +116,4 @@ def parallel_test_agent(self): return self.test_agent() -a2c = ParallelPresetBuilder('a2c', default_hyperparameters, A2CClassicControlPreset) +a2c = ParallelPresetBuilder("a2c", default_hyperparameters, A2CClassicControlPreset) diff --git a/all/presets/classic_control/c51.py b/all/presets/classic_control/c51.py index da54d871..adfb3983 100644 --- a/all/presets/classic_control/c51.py +++ b/all/presets/classic_control/c51.py @@ -1,14 +1,15 @@ import copy + from torch.optim import Adam + from all.agents import C51, C51TestAgent -from all.approximation import QDist, FixedTarget +from all.approximation import FixedTarget, QDist from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.optim import LinearScheduler from all.presets.builder import PresetBuilder -from all.presets.preset import Preset from all.presets.classic_control.models import fc_relu_dist_q - +from all.presets.preset import Preset default_hyperparameters = { "discount_factor": 0.99, @@ -31,7 +32,7 @@ "v_min": -100, "v_max": 100, # Model construction - "model_constructor": fc_relu_dist_q + "model_constructor": fc_relu_dist_q, } @@ -66,36 +67,38 @@ class C51ClassicControlPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env, atoms=hyperparameters['atoms']).to(device) + self.model = hyperparameters["model_constructor"]( + env, atoms=hyperparameters["atoms"] + ).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - optimizer = Adam(self.model.parameters(), lr=self.hyperparameters['lr']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + optimizer = Adam(self.model.parameters(), lr=self.hyperparameters["lr"]) q = QDist( self.model, optimizer, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], - target=FixedTarget(self.hyperparameters['target_update_frequency']), + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + target=FixedTarget(self.hyperparameters["target_update_frequency"]), logger=logger, ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters['replay_buffer_size'], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) return C51( q, replay_buffer, exploration=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] - self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="epsilon", logger=logger, ), @@ -103,7 +106,7 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): minibatch_size=self.hyperparameters["minibatch_size"], replay_start_size=self.hyperparameters["replay_start_size"], update_frequency=self.hyperparameters["update_frequency"], - logger=logger + logger=logger, ) def test_agent(self): @@ -111,11 +114,13 @@ def test_agent(self): copy.deepcopy(self.model), None, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + ) + return C51TestAgent( + q_dist, self.n_actions, self.hyperparameters["test_exploration"] ) - return C51TestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"]) -c51 = PresetBuilder('c51', default_hyperparameters, C51ClassicControlPreset) +c51 = PresetBuilder("c51", default_hyperparameters, C51ClassicControlPreset) diff --git a/all/presets/classic_control/ddqn.py b/all/presets/classic_control/ddqn.py index e8e70c45..f7203c7e 100644 --- a/all/presets/classic_control/ddqn.py +++ b/all/presets/classic_control/ddqn.py @@ -1,15 +1,16 @@ import copy + from torch.optim import Adam + from all.agents import DDQN, DDQNTestAgent -from all.approximation import QNetwork, FixedTarget +from all.approximation import FixedTarget, QNetwork from all.logging import DummyLogger from all.memory import PrioritizedReplayBuffer from all.optim import LinearScheduler from all.policies import GreedyPolicy from all.presets.builder import PresetBuilder -from all.presets.preset import Preset from all.presets.classic_control.models import dueling_fc_relu_q - +from all.presets.preset import Preset default_hyperparameters = { "discount_factor": 0.99, @@ -23,15 +24,15 @@ "replay_start_size": 1000, "replay_buffer_size": 10000, # Exploration settings - "initial_exploration": 1., - "final_exploration": 0., + "initial_exploration": 1.0, + "final_exploration": 0.0, "final_exploration_step": 10000, "test_exploration": 0.001, # Prioritized replay settings "alpha": 0.2, "beta": 0.6, # Model construction - "model_constructor": dueling_fc_relu_q + "model_constructor": dueling_fc_relu_q, } @@ -66,37 +67,38 @@ class DDQNClassicControlPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - optimizer = Adam(self.model.parameters(), lr=self.hyperparameters['lr']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + optimizer = Adam(self.model.parameters(), lr=self.hyperparameters["lr"]) q = QNetwork( self.model, optimizer, - target=FixedTarget(self.hyperparameters['target_update_frequency']), - logger=logger + target=FixedTarget(self.hyperparameters["target_update_frequency"]), + logger=logger, ) policy = GreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], - self.hyperparameters['replay_start_size'], - self.hyperparameters['final_exploration_step'] - self.hyperparameters['replay_start_size'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], + self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger - ) + logger=logger, + ), ) replay_buffer = PrioritizedReplayBuffer( - self.hyperparameters['replay_buffer_size'], - alpha=self.hyperparameters['alpha'], - beta=self.hyperparameters['beta'], - device=self.device + self.hyperparameters["replay_buffer_size"], + alpha=self.hyperparameters["alpha"], + beta=self.hyperparameters["beta"], + device=self.device, ) return DDQN( @@ -111,8 +113,10 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DDQNTestAgent(policy) -ddqn = PresetBuilder('ddqn', default_hyperparameters, DDQNClassicControlPreset) +ddqn = PresetBuilder("ddqn", default_hyperparameters, DDQNClassicControlPreset) diff --git a/all/presets/classic_control/dqn.py b/all/presets/classic_control/dqn.py index 6fb84bc1..fdc94e4e 100644 --- a/all/presets/classic_control/dqn.py +++ b/all/presets/classic_control/dqn.py @@ -1,15 +1,16 @@ import copy + from torch.optim import Adam + from all.agents import DQN, DQNTestAgent -from all.approximation import QNetwork, FixedTarget +from all.approximation import FixedTarget, QNetwork from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.optim import LinearScheduler from all.policies import GreedyPolicy from all.presets.builder import PresetBuilder -from all.presets.preset import Preset from all.presets.classic_control.models import fc_relu_q - +from all.presets.preset import Preset default_hyperparameters = { # Common settings @@ -24,12 +25,12 @@ "replay_start_size": 1000, "replay_buffer_size": 10000, # Explicit exploration - "initial_exploration": 1., - "final_exploration": 0., + "initial_exploration": 1.0, + "final_exploration": 0.0, "final_exploration_step": 10000, "test_exploration": 0.001, # Model construction - "model_constructor": fc_relu_q + "model_constructor": fc_relu_q, } @@ -60,51 +61,53 @@ class DQNClassicControlPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - optimizer = Adam(self.model.parameters(), lr=self.hyperparameters['lr']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + optimizer = Adam(self.model.parameters(), lr=self.hyperparameters["lr"]) q = QNetwork( self.model, optimizer, - target=FixedTarget(self.hyperparameters['target_update_frequency']), - logger=logger + target=FixedTarget(self.hyperparameters["target_update_frequency"]), + logger=logger, ) policy = GreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], - self.hyperparameters['replay_start_size'], - self.hyperparameters['final_exploration_step'] - self.hyperparameters['replay_start_size'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], + self.hyperparameters["replay_start_size"], + self.hyperparameters["final_exploration_step"] + - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger - ) + logger=logger, + ), ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters['replay_buffer_size'], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) return DQN( q, policy, replay_buffer, - discount_factor=self.hyperparameters['discount_factor'], - minibatch_size=self.hyperparameters['minibatch_size'], - replay_start_size=self.hyperparameters['replay_start_size'], - update_frequency=self.hyperparameters['update_frequency'], + discount_factor=self.hyperparameters["discount_factor"], + minibatch_size=self.hyperparameters["minibatch_size"], + replay_start_size=self.hyperparameters["replay_start_size"], + update_frequency=self.hyperparameters["update_frequency"], ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters['test_exploration']) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return DQNTestAgent(policy) -dqn = PresetBuilder('dqn', default_hyperparameters, DQNClassicControlPreset) +dqn = PresetBuilder("dqn", default_hyperparameters, DQNClassicControlPreset) diff --git a/all/presets/classic_control/ppo.py b/all/presets/classic_control/ppo.py index 961405e7..cf27709f 100644 --- a/all/presets/classic_control/ppo.py +++ b/all/presets/classic_control/ppo.py @@ -1,16 +1,19 @@ import copy + from torch.optim import Adam -from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import PPO, PPOTestAgent -from all.bodies import DeepmindAtariBody -from all.approximation import VNetwork, FeatureNetwork +from all.approximation import FeatureNetwork, VNetwork from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import SoftmaxPolicy from all.presets.builder import ParallelPresetBuilder +from all.presets.classic_control.models import ( + fc_policy_head, + fc_relu_features, + fc_value_head, +) from all.presets.preset import ParallelPreset -from all.presets.classic_control.models import fc_relu_features, fc_policy_head, fc_value_head - default_hyperparameters = { # Common settings @@ -34,7 +37,7 @@ # Model construction "feature_model_constructor": fc_relu_features, "value_model_constructor": fc_value_head, - "policy_model_constructor": fc_policy_head + "policy_model_constructor": fc_policy_head, } @@ -69,22 +72,41 @@ class PPOClassicControlPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor'](env).to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"](env).to( + device + ) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps * self.hyperparameters['epochs'] * self.hyperparameters['minibatches'] / (self.hyperparameters['n_steps'] * self.hyperparameters['n_envs']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps + * self.hyperparameters["epochs"] + * self.hyperparameters["minibatches"] + / (self.hyperparameters["n_steps"] * self.hyperparameters["n_envs"]) + ) - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr"], eps=self.hyperparameters["eps"]) + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -92,14 +114,14 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): value_optimizer, loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( self.policy_model, policy_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) return PPO( @@ -111,8 +133,8 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): self.hyperparameters["clip_final"], 0, n_updates, - name='clip', - logger=logger + name="clip", + logger=logger, ), epochs=self.hyperparameters["epochs"], minibatches=self.hyperparameters["minibatches"], @@ -133,4 +155,4 @@ def parallel_test_agent(self): return self.test_agent() -ppo = ParallelPresetBuilder('ppo', default_hyperparameters, PPOClassicControlPreset) +ppo = ParallelPresetBuilder("ppo", default_hyperparameters, PPOClassicControlPreset) diff --git a/all/presets/classic_control/rainbow.py b/all/presets/classic_control/rainbow.py index c280e742..f60e1186 100644 --- a/all/presets/classic_control/rainbow.py +++ b/all/presets/classic_control/rainbow.py @@ -1,16 +1,15 @@ import copy + from torch.optim import Adam -from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QDist, FixedTarget + from all.agents import Rainbow, RainbowTestAgent -from all.bodies import DeepmindAtariBody +from all.approximation import FixedTarget, QDist from all.logging import DummyLogger -from all.memory import PrioritizedReplayBuffer, NStepReplayBuffer +from all.memory import NStepReplayBuffer, PrioritizedReplayBuffer from all.optim import LinearScheduler from all.presets.builder import PresetBuilder -from all.presets.preset import Preset from all.presets.classic_control.models import fc_relu_rainbow - +from all.presets.preset import Preset default_hyperparameters = { "discount_factor": 0.99, @@ -25,7 +24,7 @@ "replay_buffer_size": 20000, # Explicit exploration "initial_exploration": 0.02, - "final_exploration": 0., + "final_exploration": 0.0, "test_exploration": 0.001, # Prioritized replay settings "alpha": 0.5, @@ -39,7 +38,7 @@ # Noisy Nets "sigma": 0.5, # Model construction - "model_constructor": fc_relu_rainbow + "model_constructor": fc_relu_rainbow, } @@ -79,53 +78,56 @@ class RainbowClassicControlPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env, atoms=hyperparameters["atoms"], sigma=hyperparameters["sigma"]).to(device) + self.model = hyperparameters["model_constructor"]( + env, atoms=hyperparameters["atoms"], sigma=hyperparameters["sigma"] + ).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): + def agent(self, logger=DummyLogger(), train_steps=float("inf")): optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q_dist = QDist( self.model, optimizer, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], - target=FixedTarget(self.hyperparameters['target_update_frequency']), + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + target=FixedTarget(self.hyperparameters["target_update_frequency"]), logger=logger, ) replay_buffer = NStepReplayBuffer( - self.hyperparameters['n_steps'], - self.hyperparameters['discount_factor'], + self.hyperparameters["n_steps"], + self.hyperparameters["discount_factor"], PrioritizedReplayBuffer( - self.hyperparameters['replay_buffer_size'], - alpha=self.hyperparameters['alpha'], - beta=self.hyperparameters['beta'], - device=self.device - ) + self.hyperparameters["replay_buffer_size"], + alpha=self.hyperparameters["alpha"], + beta=self.hyperparameters["beta"], + device=self.device, + ), ) return Rainbow( q_dist, replay_buffer, exploration=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - train_steps - self.hyperparameters['replay_start_size'], + train_steps - self.hyperparameters["replay_start_size"], name="exploration", - logger=logger + logger=logger, ), - discount_factor=self.hyperparameters['discount_factor'] ** self.hyperparameters["n_steps"], - minibatch_size=self.hyperparameters['minibatch_size'], - replay_start_size=self.hyperparameters['replay_start_size'], - update_frequency=self.hyperparameters['update_frequency'], + discount_factor=self.hyperparameters["discount_factor"] + ** self.hyperparameters["n_steps"], + minibatch_size=self.hyperparameters["minibatch_size"], + replay_start_size=self.hyperparameters["replay_start_size"], + update_frequency=self.hyperparameters["update_frequency"], logger=logger, ) @@ -134,11 +136,13 @@ def test_agent(self): copy.deepcopy(self.model), None, self.n_actions, - self.hyperparameters['atoms'], - v_min=self.hyperparameters['v_min'], - v_max=self.hyperparameters['v_max'], + self.hyperparameters["atoms"], + v_min=self.hyperparameters["v_min"], + v_max=self.hyperparameters["v_max"], + ) + return RainbowTestAgent( + q_dist, self.n_actions, self.hyperparameters["test_exploration"] ) - return RainbowTestAgent(q_dist, self.n_actions, self.hyperparameters["test_exploration"]) -rainbow = PresetBuilder('rainbow', default_hyperparameters, RainbowClassicControlPreset) +rainbow = PresetBuilder("rainbow", default_hyperparameters, RainbowClassicControlPreset) diff --git a/all/presets/classic_control/vac.py b/all/presets/classic_control/vac.py index c607e250..52fcbaae 100644 --- a/all/presets/classic_control/vac.py +++ b/all/presets/classic_control/vac.py @@ -1,15 +1,18 @@ import copy + from torch.optim import Adam -from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import VAC, VACTestAgent -from all.approximation import VNetwork, FeatureNetwork -from all.bodies import DeepmindAtariBody +from all.approximation import FeatureNetwork, VNetwork from all.logging import DummyLogger from all.policies import SoftmaxPolicy from all.presets.builder import ParallelPresetBuilder +from all.presets.classic_control.models import ( + fc_policy_head, + fc_relu_features, + fc_value_head, +) from all.presets.preset import ParallelPreset -from all.presets.classic_control.models import fc_relu_features, fc_policy_head, fc_value_head - default_hyperparameters = { # Common settings @@ -26,7 +29,7 @@ # Model construction "feature_model_constructor": fc_relu_features, "value_model_constructor": fc_value_head, - "policy_model_constructor": fc_policy_head + "policy_model_constructor": fc_policy_head, } @@ -55,20 +58,34 @@ class VACClassicControlPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor'](env).to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"](env).to( + device + ) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr_v"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr_v"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -76,17 +93,19 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): value_optimizer, loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( self.policy_model, policy_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) - return VAC(features, v, policy, discount_factor=self.hyperparameters["discount_factor"]) + return VAC( + features, v, policy, discount_factor=self.hyperparameters["discount_factor"] + ) def test_agent(self): features = FeatureNetwork(copy.deepcopy(self.feature_model)) @@ -97,4 +116,4 @@ def parallel_test_agent(self): return self.test_agent() -vac = ParallelPresetBuilder('vac', default_hyperparameters, VACClassicControlPreset) +vac = ParallelPresetBuilder("vac", default_hyperparameters, VACClassicControlPreset) diff --git a/all/presets/classic_control/vpg.py b/all/presets/classic_control/vpg.py index 4392d80b..b1b39d2b 100644 --- a/all/presets/classic_control/vpg.py +++ b/all/presets/classic_control/vpg.py @@ -1,15 +1,18 @@ import copy + from torch.optim import Adam -from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import VPG, VPGTestAgent -from all.approximation import VNetwork, FeatureNetwork -from all.bodies import DeepmindAtariBody +from all.approximation import FeatureNetwork, VNetwork from all.logging import DummyLogger from all.policies import SoftmaxPolicy from all.presets.builder import PresetBuilder +from all.presets.classic_control.models import ( + fc_policy_head, + fc_relu_features, + fc_value_head, +) from all.presets.preset import Preset -from all.presets.classic_control.models import fc_relu_features, fc_policy_head, fc_value_head - default_hyperparameters = { # Common settings @@ -25,7 +28,7 @@ # Model construction "feature_model_constructor": fc_relu_features, "value_model_constructor": fc_value_head, - "policy_model_constructor": fc_policy_head + "policy_model_constructor": fc_policy_head, } @@ -54,20 +57,34 @@ class VPGClassicControlPreset(Preset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.value_model = hyperparameters['value_model_constructor']().to(device) - self.policy_model = hyperparameters['policy_model_constructor'](env).to(device) - self.feature_model = hyperparameters['feature_model_constructor'](env).to(device) + self.value_model = hyperparameters["value_model_constructor"]().to(device) + self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) + self.feature_model = hyperparameters["feature_model_constructor"](env).to( + device + ) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - feature_optimizer = Adam(self.feature_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters["lr_v"], eps=self.hyperparameters["eps"]) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"], eps=self.hyperparameters["eps"]) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + feature_optimizer = Adam( + self.feature_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr_v"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr_pi"], + eps=self.hyperparameters["eps"], + ) features = FeatureNetwork( self.feature_model, feature_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) v = VNetwork( @@ -75,17 +92,23 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): value_optimizer, loss_scaling=self.hyperparameters["value_loss_scaling"], clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) policy = SoftmaxPolicy( self.policy_model, policy_optimizer, clip_grad=self.hyperparameters["clip_grad"], - logger=logger + logger=logger, ) - return VPG(features, v, policy, discount_factor=self.hyperparameters["discount_factor"], min_batch_size=self.hyperparameters["min_batch_size"]) + return VPG( + features, + v, + policy, + discount_factor=self.hyperparameters["discount_factor"], + min_batch_size=self.hyperparameters["min_batch_size"], + ) def test_agent(self): features = FeatureNetwork(copy.deepcopy(self.feature_model)) @@ -96,4 +119,4 @@ def parallel_test_agent(self): return self.test_agent() -vpg = PresetBuilder('vpg', default_hyperparameters, VPGClassicControlPreset) +vpg = PresetBuilder("vpg", default_hyperparameters, VPGClassicControlPreset) diff --git a/all/presets/classic_control/vqn.py b/all/presets/classic_control/vqn.py index 76258676..557db597 100644 --- a/all/presets/classic_control/vqn.py +++ b/all/presets/classic_control/vqn.py @@ -1,16 +1,16 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QNetwork + from all.agents import VQN, VQNTestAgent -from all.bodies import DeepmindAtariBody +from all.approximation import QNetwork from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GreedyPolicy, ParallelGreedyPolicy from all.presets.builder import ParallelPresetBuilder -from all.presets.preset import ParallelPreset from all.presets.classic_control.models import dueling_fc_relu_q - +from all.presets.preset import ParallelPreset default_hyperparameters = { # Common settings @@ -19,14 +19,14 @@ "lr": 1e-2, "eps": 1.5e-4, # Explicit exploration - "initial_exploration": 1., - "final_exploration": 0., + "initial_exploration": 1.0, + "final_exploration": 0.0, "final_exploration_step": 10000, "test_exploration": 0.001, # Parallel actors "n_envs": 8, # Model construction - "model_constructor": dueling_fc_relu_q + "model_constructor": dueling_fc_relu_q, } @@ -54,49 +54,54 @@ class VQNClassicControlPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps / self.hyperparameters['n_envs'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = train_steps / self.hyperparameters["n_envs"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - logger=logger + logger=logger, ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], + self.hyperparameters["final_exploration_step"] + / self.hyperparameters["n_envs"], name="exploration", - logger=logger - ) + logger=logger, + ), ) - return VQN(q, policy, discount_factor=self.hyperparameters['discount_factor']) + return VQN(q, policy, discount_factor=self.hyperparameters["discount_factor"]) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"]) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return VQNTestAgent(policy) def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = ParallelGreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"]) + policy = ParallelGreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return VQNTestAgent(policy) -vqn = ParallelPresetBuilder('vqn', default_hyperparameters, VQNClassicControlPreset) +vqn = ParallelPresetBuilder("vqn", default_hyperparameters, VQNClassicControlPreset) diff --git a/all/presets/classic_control/vsarsa.py b/all/presets/classic_control/vsarsa.py index 50db3372..e1c963c1 100644 --- a/all/presets/classic_control/vsarsa.py +++ b/all/presets/classic_control/vsarsa.py @@ -1,16 +1,16 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR -from all.approximation import QNetwork + from all.agents import VSarsa, VSarsaTestAgent -from all.bodies import DeepmindAtariBody +from all.approximation import QNetwork from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GreedyPolicy, ParallelGreedyPolicy from all.presets.builder import ParallelPresetBuilder -from all.presets.preset import ParallelPreset from all.presets.classic_control.models import dueling_fc_relu_q - +from all.presets.preset import ParallelPreset default_hyperparameters = { # Common settings @@ -19,14 +19,14 @@ "lr": 1e-2, "eps": 1.5e-4, # Explicit exploration - "initial_exploration": 1., - "final_exploration": 0., + "initial_exploration": 1.0, + "final_exploration": 0.0, "final_exploration_step": 10000, "test_exploration": 0.001, # Parallel actors "n_envs": 8, # Model construction - "model_constructor": dueling_fc_relu_q + "model_constructor": dueling_fc_relu_q, } @@ -54,49 +54,58 @@ class VSarsaClassicControlPreset(ParallelPreset): def __init__(self, env, name, device, **hyperparameters): super().__init__(name, device, hyperparameters) - self.model = hyperparameters['model_constructor'](env).to(device) + self.model = hyperparameters["model_constructor"](env).to(device) self.n_actions = env.action_space.n - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps / self.hyperparameters['n_envs'] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = train_steps / self.hyperparameters["n_envs"] optimizer = Adam( self.model.parameters(), - lr=self.hyperparameters['lr'], - eps=self.hyperparameters['eps'] + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], ) q = QNetwork( self.model, optimizer, scheduler=CosineAnnealingLR(optimizer, n_updates), - logger=logger + logger=logger, ) policy = ParallelGreedyPolicy( q, self.n_actions, epsilon=LinearScheduler( - self.hyperparameters['initial_exploration'], - self.hyperparameters['final_exploration'], + self.hyperparameters["initial_exploration"], + self.hyperparameters["final_exploration"], 0, - self.hyperparameters["final_exploration_step"] / self.hyperparameters["n_envs"], + self.hyperparameters["final_exploration_step"] + / self.hyperparameters["n_envs"], name="exploration", - logger=logger - ) + logger=logger, + ), ) - return VSarsa(q, policy, discount_factor=self.hyperparameters['discount_factor']) + return VSarsa( + q, policy, discount_factor=self.hyperparameters["discount_factor"] + ) def test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = GreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"]) + policy = GreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return VSarsaTestAgent(policy) def parallel_test_agent(self): q = QNetwork(copy.deepcopy(self.model)) - policy = ParallelGreedyPolicy(q, self.n_actions, epsilon=self.hyperparameters["test_exploration"]) + policy = ParallelGreedyPolicy( + q, self.n_actions, epsilon=self.hyperparameters["test_exploration"] + ) return VSarsaTestAgent(policy) -vsarsa = ParallelPresetBuilder('vsarsa', default_hyperparameters, VSarsaClassicControlPreset) +vsarsa = ParallelPresetBuilder( + "vsarsa", default_hyperparameters, VSarsaClassicControlPreset +) diff --git a/all/presets/classic_control_test.py b/all/presets/classic_control_test.py index 39b93faa..2f06eff5 100644 --- a/all/presets/classic_control_test.py +++ b/all/presets/classic_control_test.py @@ -1,9 +1,11 @@ import os import unittest + import torch -from all.environments import GymEnvironment, DuplicateEnvironment + +from all.environments import DuplicateEnvironment, GymEnvironment from all.logging import DummyLogger -from all.presets import Preset, ParallelPreset +from all.presets import ParallelPreset from all.presets.classic_control import ( a2c, c51, @@ -20,14 +22,16 @@ class TestClassicControlPresets(unittest.TestCase): def setUp(self): - self.env = GymEnvironment('CartPole-v0') + self.env = GymEnvironment("CartPole-v0") self.env.reset() - self.parallel_env = DuplicateEnvironment([GymEnvironment('CartPole-v0'), GymEnvironment('CartPole-v0')]) + self.parallel_env = DuplicateEnvironment( + [GymEnvironment("CartPole-v0"), GymEnvironment("CartPole-v0")] + ) self.parallel_env.reset() def tearDown(self): - if os.path.exists('test_preset.pt'): - os.remove('test_preset.pt') + if os.path.exists("test_preset.pt"): + os.remove("test_preset.pt") def test_a2c(self): self.validate(a2c) @@ -60,7 +64,7 @@ def test_vqn(self): self.validate(vqn) def validate(self, builder): - preset = builder.device('cpu').env(self.env).build() + preset = builder.device("cpu").env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) @@ -73,8 +77,8 @@ def validate_standard_preset(self, preset): test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) @@ -89,8 +93,8 @@ def validate_parallel_preset(self, preset): parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) diff --git a/all/presets/continuous/__init__.py b/all/presets/continuous/__init__.py index 32a98557..7b0a9fc4 100644 --- a/all/presets/continuous/__init__.py +++ b/all/presets/continuous/__init__.py @@ -1,10 +1,10 @@ # from .actor_critic import actor_critic -from .ddpg import ddpg, DDPGContinuousPreset -from .ppo import ppo, PPOContinuousPreset +from .ddpg import ddpg +from .ppo import ppo from .sac import sac __all__ = [ - 'ddpg', - 'ppo', - 'sac', + "ddpg", + "ppo", + "sac", ] diff --git a/all/presets/continuous/ddpg.py b/all/presets/continuous/ddpg.py index 9fb2d244..62c130dd 100644 --- a/all/presets/continuous/ddpg.py +++ b/all/presets/continuous/ddpg.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import DDPG, DDPGTestAgent -from all.approximation import QContinuous, PolyakTarget +from all.approximation import PolyakTarget, QContinuous from all.bodies import TimeFeature from all.logging import DummyLogger -from all.policies import DeterministicPolicy from all.memory import ExperienceReplayBuffer +from all.policies import DeterministicPolicy from all.presets.builder import PresetBuilder +from all.presets.continuous.models import fc_deterministic_policy, fc_q from all.presets.preset import Preset -from all.presets.continuous.models import fc_q, fc_deterministic_policy - default_hyperparameters = { # Common settings @@ -29,7 +30,7 @@ "noise": 0.1, # Model construction "q_model_constructor": fc_q, - "policy_model_constructor": fc_deterministic_policy + "policy_model_constructor": fc_deterministic_policy, } @@ -62,8 +63,10 @@ def __init__(self, env, name, device, **hyperparameters): self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) self.action_space = env.action_space - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters["replay_start_size"]) / self.hyperparameters["update_frequency"] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] q_optimizer = Adam(self.q_model.parameters(), lr=self.hyperparameters["lr_q"]) @@ -71,42 +74,39 @@ def agent(self, logger=DummyLogger(), train_steps=float('inf')): self.q_model, q_optimizer, target=PolyakTarget(self.hyperparameters["polyak_rate"]), - scheduler=CosineAnnealingLR( - q_optimizer, - n_updates - ), - logger=logger + scheduler=CosineAnnealingLR(q_optimizer, n_updates), + logger=logger, ) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"]) + policy_optimizer = Adam( + self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"] + ) policy = DeterministicPolicy( self.policy_model, policy_optimizer, self.action_space, target=PolyakTarget(self.hyperparameters["polyak_rate"]), - scheduler=CosineAnnealingLR( - policy_optimizer, - n_updates - ), - logger=logger + scheduler=CosineAnnealingLR(policy_optimizer, n_updates), + logger=logger, ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters["replay_buffer_size"], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) - return TimeFeature(DDPG( - q, - policy, - replay_buffer, - self.action_space, - noise=self.hyperparameters["noise"], - replay_start_size=self.hyperparameters["replay_start_size"], - discount_factor=self.hyperparameters["discount_factor"], - update_frequency=self.hyperparameters["update_frequency"], - minibatch_size=self.hyperparameters["minibatch_size"], - )) + return TimeFeature( + DDPG( + q, + policy, + replay_buffer, + self.action_space, + noise=self.hyperparameters["noise"], + replay_start_size=self.hyperparameters["replay_start_size"], + discount_factor=self.hyperparameters["discount_factor"], + update_frequency=self.hyperparameters["update_frequency"], + minibatch_size=self.hyperparameters["minibatch_size"], + ) + ) def test_agent(self): policy = DeterministicPolicy( @@ -117,4 +117,4 @@ def test_agent(self): return TimeFeature(DDPGTestAgent(policy)) -ddpg = PresetBuilder('ddpg', default_hyperparameters, DDPGContinuousPreset) +ddpg = PresetBuilder("ddpg", default_hyperparameters, DDPGContinuousPreset) diff --git a/all/presets/continuous/models/__init__.py b/all/presets/continuous/models/__init__.py index 012b56c9..937ca44f 100644 --- a/all/presets/continuous/models/__init__.py +++ b/all/presets/continuous/models/__init__.py @@ -1,11 +1,13 @@ -''' +""" Pytorch models for continuous control. All models assume that a feature representing the current timestep is used in addition to the features received from the environment. -''' +""" + import torch + from all import nn @@ -62,7 +64,7 @@ def __init__(self, env, hidden1=400, hidden2=300): nn.Tanh(), nn.Linear(hidden1, hidden2), nn.Tanh(), - nn.Linear(hidden2, env.action_space.shape[0]) + nn.Linear(hidden2, env.action_space.shape[0]), ) self.log_stds = nn.Parameter(torch.zeros(env.action_space.shape[0])) diff --git a/all/presets/continuous/ppo.py b/all/presets/continuous/ppo.py index 412c77e3..dbf73568 100644 --- a/all/presets/continuous/ppo.py +++ b/all/presets/continuous/ppo.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import PPO, PPOTestAgent -from all.approximation import VNetwork, FeatureNetwork, Identity +from all.approximation import Identity, VNetwork from all.bodies import TimeFeature from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GaussianPolicy from all.presets.builder import ParallelPresetBuilder -from all.presets.preset import ParallelPreset from all.presets.continuous.models import fc_policy, fc_v - +from all.presets.preset import ParallelPreset default_hyperparameters = { # Common settings @@ -72,66 +73,77 @@ def __init__(self, env, name, device, **hyperparameters): self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) self.action_space = env.action_space - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = train_steps * self.hyperparameters['epochs'] * self.hyperparameters['minibatches'] / (self.hyperparameters['n_steps'] * self.hyperparameters['n_envs']) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps + * self.hyperparameters["epochs"] + * self.hyperparameters["minibatches"] + / (self.hyperparameters["n_steps"] * self.hyperparameters["n_envs"]) + ) - value_optimizer = Adam(self.value_model.parameters(), lr=self.hyperparameters['lr'], eps=self.hyperparameters['eps']) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters['lr'], eps=self.hyperparameters['eps']) + value_optimizer = Adam( + self.value_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) + policy_optimizer = Adam( + self.policy_model.parameters(), + lr=self.hyperparameters["lr"], + eps=self.hyperparameters["eps"], + ) features = Identity(self.device) v = VNetwork( self.value_model, value_optimizer, - loss_scaling=self.hyperparameters['value_loss_scaling'], - clip_grad=self.hyperparameters['clip_grad'], + loss_scaling=self.hyperparameters["value_loss_scaling"], + clip_grad=self.hyperparameters["clip_grad"], logger=logger, - scheduler=CosineAnnealingLR( - value_optimizer, - n_updates - ), + scheduler=CosineAnnealingLR(value_optimizer, n_updates), ) policy = GaussianPolicy( self.policy_model, policy_optimizer, self.action_space, - clip_grad=self.hyperparameters['clip_grad'], + clip_grad=self.hyperparameters["clip_grad"], logger=logger, - scheduler=CosineAnnealingLR( - policy_optimizer, - n_updates - ), + scheduler=CosineAnnealingLR(policy_optimizer, n_updates), ) - return TimeFeature(PPO( - features, - v, - policy, - epsilon=LinearScheduler( - self.hyperparameters['clip_initial'], - self.hyperparameters['clip_final'], - 0, - n_updates, - name='clip', - logger=logger - ), - epochs=self.hyperparameters['epochs'], - minibatches=self.hyperparameters['minibatches'], - n_envs=self.hyperparameters['n_envs'], - n_steps=self.hyperparameters['n_steps'], - discount_factor=self.hyperparameters['discount_factor'], - lam=self.hyperparameters['lam'], - entropy_loss_scaling=self.hyperparameters['entropy_loss_scaling'], - logger=logger, - )) + return TimeFeature( + PPO( + features, + v, + policy, + epsilon=LinearScheduler( + self.hyperparameters["clip_initial"], + self.hyperparameters["clip_final"], + 0, + n_updates, + name="clip", + logger=logger, + ), + epochs=self.hyperparameters["epochs"], + minibatches=self.hyperparameters["minibatches"], + n_envs=self.hyperparameters["n_envs"], + n_steps=self.hyperparameters["n_steps"], + discount_factor=self.hyperparameters["discount_factor"], + lam=self.hyperparameters["lam"], + entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"], + logger=logger, + ) + ) def test_agent(self): - policy = GaussianPolicy(copy.deepcopy(self.policy_model), space=self.action_space) + policy = GaussianPolicy( + copy.deepcopy(self.policy_model), space=self.action_space + ) return TimeFeature(PPOTestAgent(Identity(self.device), policy)) def parallel_test_agent(self): return self.test_agent() -ppo = ParallelPresetBuilder('ppo', default_hyperparameters, PPOContinuousPreset) +ppo = ParallelPresetBuilder("ppo", default_hyperparameters, PPOContinuousPreset) diff --git a/all/presets/continuous/sac.py b/all/presets/continuous/sac.py index f4d9a76f..c33aeddf 100644 --- a/all/presets/continuous/sac.py +++ b/all/presets/continuous/sac.py @@ -1,16 +1,17 @@ import copy + from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR + from all.agents import SAC, SACTestAgent -from all.approximation import QContinuous, PolyakTarget, VNetwork +from all.approximation import PolyakTarget, QContinuous from all.bodies import TimeFeature from all.logging import DummyLogger -from all.policies.soft_deterministic import SoftDeterministicPolicy from all.memory import ExperienceReplayBuffer +from all.policies.soft_deterministic import SoftDeterministicPolicy from all.presets.builder import PresetBuilder -from all.presets.preset import Preset from all.presets.continuous.models import fc_q, fc_soft_policy - +from all.presets.preset import Preset default_hyperparameters = { # Common settings @@ -29,11 +30,11 @@ "temperature_initial": 0.1, "lr_temperature": 1e-5, "entropy_backups": True, - "entropy_target_scaling": 1., + "entropy_target_scaling": 1.0, # Model construction "q1_model_constructor": fc_q, "q2_model_constructor": fc_q, - "policy_model_constructor": fc_soft_policy + "policy_model_constructor": fc_soft_policy, } @@ -70,71 +71,72 @@ def __init__(self, env, name, device, **hyperparameters): self.policy_model = hyperparameters["policy_model_constructor"](env).to(device) self.action_space = env.action_space - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - n_updates = (train_steps - self.hyperparameters["replay_start_size"]) / self.hyperparameters["update_frequency"] + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + n_updates = ( + train_steps - self.hyperparameters["replay_start_size"] + ) / self.hyperparameters["update_frequency"] q1_optimizer = Adam(self.q1_model.parameters(), lr=self.hyperparameters["lr_q"]) q1 = QContinuous( self.q1_model, q1_optimizer, - scheduler=CosineAnnealingLR( - q1_optimizer, - n_updates - ), + scheduler=CosineAnnealingLR(q1_optimizer, n_updates), target=PolyakTarget(self.hyperparameters["polyak_rate"]), logger=logger, - name='q1' + name="q1", ) q2_optimizer = Adam(self.q2_model.parameters(), lr=self.hyperparameters["lr_q"]) q2 = QContinuous( self.q2_model, q2_optimizer, - scheduler=CosineAnnealingLR( - q2_optimizer, - n_updates - ), + scheduler=CosineAnnealingLR(q2_optimizer, n_updates), target=PolyakTarget(self.hyperparameters["polyak_rate"]), logger=logger, - name='q2' + name="q2", ) - policy_optimizer = Adam(self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"]) + policy_optimizer = Adam( + self.policy_model.parameters(), lr=self.hyperparameters["lr_pi"] + ) policy = SoftDeterministicPolicy( self.policy_model, policy_optimizer, self.action_space, - scheduler=CosineAnnealingLR( - policy_optimizer, - n_updates - ), - logger=logger + scheduler=CosineAnnealingLR(policy_optimizer, n_updates), + logger=logger, ) replay_buffer = ExperienceReplayBuffer( - self.hyperparameters["replay_buffer_size"], - device=self.device + self.hyperparameters["replay_buffer_size"], device=self.device ) - return TimeFeature(SAC( - policy, - q1, - q2, - replay_buffer, - discount_factor=self.hyperparameters["discount_factor"], - entropy_backups=self.hyperparameters["entropy_backups"], - entropy_target=(-self.action_space.shape[0] * self.hyperparameters["entropy_target_scaling"]), - lr_temperature=self.hyperparameters["lr_temperature"], - minibatch_size=self.hyperparameters["minibatch_size"], - replay_start_size=self.hyperparameters["replay_start_size"], - temperature_initial=self.hyperparameters["temperature_initial"], - update_frequency=self.hyperparameters["update_frequency"], - logger=logger - )) + return TimeFeature( + SAC( + policy, + q1, + q2, + replay_buffer, + temperature_initial=self.hyperparameters["temperature_initial"], + entropy_backups=self.hyperparameters["entropy_backups"], + entropy_target=( + -self.action_space.shape[0] + * self.hyperparameters["entropy_target_scaling"] + ), + lr_temperature=self.hyperparameters["lr_temperature"], + replay_start_size=self.hyperparameters["replay_start_size"], + discount_factor=self.hyperparameters["discount_factor"], + update_frequency=self.hyperparameters["update_frequency"], + minibatch_size=self.hyperparameters["minibatch_size"], + logger=logger, + ) + ) def test_agent(self): - policy = SoftDeterministicPolicy(copy.deepcopy(self.policy_model), space=self.action_space) + policy = SoftDeterministicPolicy( + copy.deepcopy(self.policy_model), space=self.action_space + ) return TimeFeature(SACTestAgent(policy)) -sac = PresetBuilder('sac', default_hyperparameters, SACContinuousPreset) +sac = PresetBuilder("sac", default_hyperparameters, SACContinuousPreset) diff --git a/all/presets/continuous_test.py b/all/presets/continuous_test.py index 4d003ae8..5f3278a1 100644 --- a/all/presets/continuous_test.py +++ b/all/presets/continuous_test.py @@ -1,30 +1,29 @@ import os import unittest + import torch -from all.core import State -from all.environments import GymEnvironment, DuplicateEnvironment + +from all.environments import DuplicateEnvironment, GymEnvironment from all.logging import DummyLogger -from all.presets import Preset, ParallelPreset -from all.presets.continuous import ( - ddpg, - ppo, - sac, -) +from all.presets import ParallelPreset +from all.presets.continuous import ddpg, ppo, sac class TestContinuousPresets(unittest.TestCase): def setUp(self): - self.env = GymEnvironment('LunarLanderContinuous-v2') + self.env = GymEnvironment("LunarLanderContinuous-v2") self.env.reset() - self.parallel_env = DuplicateEnvironment([ - GymEnvironment('LunarLanderContinuous-v2'), - GymEnvironment('LunarLanderContinuous-v2'), - ]) + self.parallel_env = DuplicateEnvironment( + [ + GymEnvironment("LunarLanderContinuous-v2"), + GymEnvironment("LunarLanderContinuous-v2"), + ] + ) self.parallel_env.reset() def tearDown(self): - if os.path.exists('test_preset.pt'): - os.remove('test_preset.pt') + if os.path.exists("test_preset.pt"): + os.remove("test_preset.pt") def test_ddpg(self): self.validate(ddpg) @@ -36,7 +35,7 @@ def test_sac(self): self.validate(sac) def validate(self, builder): - preset = builder.device('cpu').env(self.env).build() + preset = builder.device("cpu").env(self.env).build() if isinstance(preset, ParallelPreset): return self.validate_parallel_preset(preset) return self.validate_standard_preset(preset) @@ -49,8 +48,8 @@ def validate_standard_preset(self, preset): test_agent = preset.test_agent() test_agent.act(self.env.state) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) @@ -65,8 +64,8 @@ def validate_parallel_preset(self, preset): parallel_test_agent = preset.test_agent() parallel_test_agent.act(self.parallel_env.state_array) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.state) diff --git a/all/presets/independent_multiagent.py b/all/presets/independent_multiagent.py index 4eef0bd5..841045bd 100644 --- a/all/presets/independent_multiagent.py +++ b/all/presets/independent_multiagent.py @@ -1,21 +1,25 @@ -from .builder import PresetBuilder -from .preset import Preset from all.agents import IndependentMultiagent from all.logging import DummyLogger +from .preset import Preset + class IndependentMultiagentPreset(Preset): def __init__(self, name, device, presets): super().__init__(name, device, presets) - def agent(self, logger=DummyLogger(), train_steps=float('inf')): - return IndependentMultiagent({ - agent_id: preset.agent(logger=logger, train_steps=train_steps) - for agent_id, preset in self.hyperparameters.items() - }) + def agent(self, logger=DummyLogger(), train_steps=float("inf")): + return IndependentMultiagent( + { + agent_id: preset.agent(logger=logger, train_steps=train_steps) + for agent_id, preset in self.hyperparameters.items() + } + ) def test_agent(self): - return IndependentMultiagent({ - agent_id: preset.test_agent() - for agent_id, preset in self.hyperparameters.items() - }) + return IndependentMultiagent( + { + agent_id: preset.test_agent() + for agent_id, preset in self.hyperparameters.items() + } + ) diff --git a/all/presets/multiagent_atari_test.py b/all/presets/multiagent_atari_test.py index 4f752da5..4173cbbc 100644 --- a/all/presets/multiagent_atari_test.py +++ b/all/presets/multiagent_atari_test.py @@ -1,27 +1,31 @@ import os import unittest + import torch + from all.environments import MultiagentAtariEnv from all.logging import DummyLogger -from all.presets.atari import dqn from all.presets import IndependentMultiagentPreset +from all.presets.atari import dqn class TestMultiagentAtariPresets(unittest.TestCase): def setUp(self): - self.env = MultiagentAtariEnv('pong_v3', device='cpu') + self.env = MultiagentAtariEnv("pong_v3", device="cpu") self.env.reset() def tearDown(self): - if os.path.exists('test_preset.pt'): - os.remove('test_preset.pt') + if os.path.exists("test_preset.pt"): + os.remove("test_preset.pt") def test_independent(self): presets = { - agent_id: dqn.device('cpu').env(self.env.subenvs[agent_id]).build() + agent_id: dqn.device("cpu").env(self.env.subenvs[agent_id]).build() for agent_id in self.env.agents } - self.validate_preset(IndependentMultiagentPreset('independent', 'cpu', presets), self.env) + self.validate_preset( + IndependentMultiagentPreset("independent", "cpu", presets), self.env + ) def validate_preset(self, preset, env): # normal agent @@ -31,8 +35,8 @@ def validate_preset(self, preset, env): test_agent = preset.test_agent() test_agent.act(self.env.last()) # test save/load - preset.save('test_preset.pt') - preset = torch.load('test_preset.pt') + preset.save("test_preset.pt") + preset = torch.load("test_preset.pt") test_agent = preset.test_agent() test_agent.act(self.env.last()) diff --git a/all/presets/preset.py b/all/presets/preset.py index 924933b5..58ef6e6f 100644 --- a/all/presets/preset.py +++ b/all/presets/preset.py @@ -1,4 +1,5 @@ from abc import ABC, abstractmethod + import torch @@ -18,7 +19,7 @@ def __init__(self, name, device, hyperparameters): self.hyperparameters = hyperparameters @abstractmethod - def agent(self, logger=None, train_steps=float('inf')): + def agent(self, logger=None, train_steps=float("inf")): """ Instantiate a training-mode Agent with the existing model. @@ -71,7 +72,7 @@ def __init__(self, name, device, hyperparameters): self.hyperparameters = hyperparameters @abstractmethod - def agent(self, logger=None, train_steps=float('inf')): + def agent(self, logger=None, train_steps=float("inf")): """ Instantiate a training-mode ParallelAgent with the existing model. @@ -108,7 +109,7 @@ def parallel_test_agent(self): @property def n_envs(self): - return self.hyperparameters['n_envs'] + return self.hyperparameters["n_envs"] def save(self, filename): """ diff --git a/all/scripts/atari.py b/all/scripts/atari.py index da98f7ae..804f46d6 100644 --- a/all/scripts/atari.py +++ b/all/scripts/atari.py @@ -1,4 +1,5 @@ import argparse + from all.environments import AtariEnvironment from all.experiments import run_experiment from all.presets import atari @@ -21,13 +22,13 @@ def main(): parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) + parser.add_argument("--logdir", default="runs", help="The base logging directory.") parser.add_argument( - "--logdir", default='runs', help="The base logging directory." - ) - parser.add_argument( - "--logger", default='tensorboard', help="The backend used for tracking experiment metrics." + "--logger", + default="tensorboard", + help="The backend used for tracking experiment metrics.", ) - parser.add_argument('--hyperparameters', default=[], nargs='*') + parser.add_argument("--hyperparameters", default=[], nargs="*") args = parser.parse_args() env = AtariEnvironment(args.env, device=args.device) @@ -39,7 +40,7 @@ def main(): # parse hyperparameters hyperparameters = {} for hp in args.hyperparameters: - key, value = hp.split('=') + key, value = hp.split("=") hyperparameters[key] = type(agent.default_hyperparameters[key])(value) agent = agent.hyperparameters(**hyperparameters) diff --git a/all/scripts/classic.py b/all/scripts/classic.py index 0e80e15c..59808c84 100644 --- a/all/scripts/classic.py +++ b/all/scripts/classic.py @@ -1,4 +1,5 @@ import argparse + from all.environments import GymEnvironment from all.experiments import run_experiment from all.presets import classic_control @@ -21,15 +22,17 @@ def main(): parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) + parser.add_argument("--logdir", default="runs", help="The base logging directory.") parser.add_argument( - "--logdir", default='runs', help="The base logging directory." + "--logger", + default="tensorboard", + help="The backend used for tracking experiment metrics.", ) - parser.add_argument("--logger", default='tensorboard', help="The backend used for tracking experiment metrics.") parser.add_argument( - '--hyperparameters', + "--hyperparameters", default=[], - nargs='*', - help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc." + nargs="*", + help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc.", ) args = parser.parse_args() @@ -42,7 +45,7 @@ def main(): # parse hyperparameters hyperparameters = {} for hp in args.hyperparameters: - key, value = hp.split('=') + key, value = hp.split("=") hyperparameters[key] = type(agent.default_hyperparameters[key])(value) agent = agent.hyperparameters(**hyperparameters) diff --git a/all/scripts/continuous.py b/all/scripts/continuous.py index c4aa7608..a46b6be3 100644 --- a/all/scripts/continuous.py +++ b/all/scripts/continuous.py @@ -1,10 +1,10 @@ # pylint: disable=unused-import import argparse + from all.environments import GymEnvironment, PybulletEnvironment from all.experiments import run_experiment from all.presets import continuous - # see also: PybulletEnvironment.short_names ENVS = { "mountaincar": "MountainCarContinuous-v0", @@ -29,21 +29,23 @@ def main(): parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) + parser.add_argument("--logdir", default="runs", help="The base logging directory.") parser.add_argument( - "--logdir", default='runs', help="The base logging directory." + "--logger", + default="tensorboard", + help="The backend used for tracking experiment metrics.", ) - parser.add_argument("--logger", default='tensorboard', help="The backend used for tracking experiment metrics.") parser.add_argument( - '--hyperparameters', + "--hyperparameters", default=[], - nargs='*', - help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc." + nargs="*", + help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc.", ) args = parser.parse_args() if args.env in ENVS: env = GymEnvironment(ENVS[args.env], device=args.device) - elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names: + elif "BulletEnv" in args.env or args.env in PybulletEnvironment.short_names: env = PybulletEnvironment(args.env, device=args.device) else: env = GymEnvironment(args.env, device=args.device) @@ -55,7 +57,7 @@ def main(): # parse hyperparameters hyperparameters = {} for hp in args.hyperparameters: - key, value = hp.split('=') + key, value = hp.split("=") hyperparameters[key] = type(agent.default_hyperparameters[key])(value) agent = agent.hyperparameters(**hyperparameters) diff --git a/all/scripts/multiagent_atari.py b/all/scripts/multiagent_atari.py index 6a3f1867..a76acfe7 100644 --- a/all/scripts/multiagent_atari.py +++ b/all/scripts/multiagent_atari.py @@ -1,11 +1,11 @@ import argparse + from all.environments import MultiagentAtariEnv from all.experiments.multiagent_env_experiment import MultiagentEnvExperiment -from all.presets import atari -from all.presets import IndependentMultiagentPreset +from all.presets import IndependentMultiagentPreset, atari -class DummyEnv(): +class DummyEnv: def __init__(self, state_space, action_space): self.state_space = state_space self.action_space = action_space @@ -14,9 +14,7 @@ def __init__(self, state_space, action_space): def main(): parser = argparse.ArgumentParser(description="Run an multiagent Atari benchmark.") parser.add_argument("env", help="Name of the Atari game (e.g. pong_v2).") - parser.add_argument( - "agents", nargs='*', help="List of agents." - ) + parser.add_argument("agents", nargs="*", help="List of agents.") parser.add_argument( "--device", default="cuda", @@ -34,13 +32,17 @@ def main(): "--render", action="store_true", default=False, help="Render the environment." ) parser.add_argument( - "--logger", default='tensorboard', help="The backend used for tracking experiment metrics." + "--logger", + default="tensorboard", + help="The backend used for tracking experiment metrics.", ) args = parser.parse_args() env = MultiagentAtariEnv(args.env, device=args.device) - assert len(env.agents) == len(args.agents), f"Must specify {len(env.agents)} agents for this environment." + assert len(env.agents) == len( + args.agents + ), f"Must specify {len(env.agents)} agents for this environment." presets = { agent_id: getattr(atari, agent_type) @@ -52,7 +54,7 @@ def main(): } experiment = MultiagentEnvExperiment( - IndependentMultiagentPreset('Independent', args.device, presets), + IndependentMultiagentPreset("Independent", args.device, presets), env, verbose=False, render=args.render, diff --git a/all/scripts/plot.py b/all/scripts/plot.py index b041657a..0ba4472a 100644 --- a/all/scripts/plot.py +++ b/all/scripts/plot.py @@ -1,11 +1,17 @@ import argparse + from all.experiments import plot_returns_100 def main(): parser = argparse.ArgumentParser(description="Plots the results of experiments.") - parser.add_argument("--logdir", help="Output directory", default='runs') - parser.add_argument("--timesteps", type=int, default=-1, help="The final point will be fixed to this x-value") + parser.add_argument("--logdir", help="Output directory", default="runs") + parser.add_argument( + "--timesteps", + type=int, + default=-1, + help="The final point will be fixed to this x-value", + ) args = parser.parse_args() plot_returns_100(args.logdir, timesteps=args.timesteps) diff --git a/all/scripts/release.py b/all/scripts/release.py index 475e5b80..01a342f7 100644 --- a/all/scripts/release.py +++ b/all/scripts/release.py @@ -1,4 +1,5 @@ -'''Create slurm tasks to run release test suite''' +"""Create slurm tasks to run release test suite""" + from all.environments import AtariEnvironment, GymEnvironment from all.experiments import SlurmExperiment from all.presets import atari, classic_control, continuous @@ -6,7 +7,7 @@ def main(): # run on gpu - device = 'cuda' + device = "cuda" def get_agents(preset): agents = [getattr(preset, agent_name) for agent_name in preset.__all__] @@ -14,29 +15,23 @@ def get_agents(preset): SlurmExperiment( get_agents(atari), - AtariEnvironment('Breakout', device=device), + AtariEnvironment("Breakout", device=device), 10e7, - sbatch_args={ - 'partition': '1080ti-long' - } + sbatch_args={"partition": "1080ti-long"}, ) SlurmExperiment( get_agents(classic_control), - GymEnvironment('CartPole-v0', device=device), + GymEnvironment("CartPole-v0", device=device), 100000, - sbatch_args={ - 'partition': '1080ti-short' - } + sbatch_args={"partition": "1080ti-short"}, ) SlurmExperiment( get_agents(continuous), - GymEnvironment('LunarLanderContinuous-v2', device=device), + GymEnvironment("LunarLanderContinuous-v2", device=device), 500000, - sbatch_args={ - 'partition': '1080ti-short' - } + sbatch_args={"partition": "1080ti-short"}, ) diff --git a/all/scripts/watch_atari.py b/all/scripts/watch_atari.py index b5cd6a28..cd278afc 100644 --- a/all/scripts/watch_atari.py +++ b/all/scripts/watch_atari.py @@ -1,5 +1,5 @@ import argparse -from all.bodies import DeepmindAtariBody + from all.environments import AtariEnvironment from all.experiments import load_and_watch diff --git a/all/scripts/watch_classic.py b/all/scripts/watch_classic.py index 0b2e7416..239f0964 100644 --- a/all/scripts/watch_classic.py +++ b/all/scripts/watch_classic.py @@ -1,11 +1,14 @@ import argparse + from all.environments import GymEnvironment from all.experiments import load_and_watch def main(): parser = argparse.ArgumentParser(description="Run an Atari benchmark.") - parser.add_argument("env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1") + parser.add_argument( + "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1" + ) parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", diff --git a/all/scripts/watch_continuous.py b/all/scripts/watch_continuous.py index 2cd17250..903f83d7 100644 --- a/all/scripts/watch_continuous.py +++ b/all/scripts/watch_continuous.py @@ -1,8 +1,9 @@ # pylint: disable=unused-import import argparse -from all.bodies import TimeFeature + from all.environments import GymEnvironment, PybulletEnvironment from all.experiments import load_and_watch + from .continuous import ENVS @@ -24,7 +25,7 @@ def main(): if args.env in ENVS: env = GymEnvironment(args.env, device=args.device, render_mode="human") - elif 'BulletEnv' in args.env or args.env in PybulletEnvironment.short_names: + elif "BulletEnv" in args.env or args.env in PybulletEnvironment.short_names: env = PybulletEnvironment(args.env, device=args.device, render_mode="human") else: env = GymEnvironment(args.env, device=args.device, render_mode="human") diff --git a/all/scripts/watch_multiagent_atari.py b/all/scripts/watch_multiagent_atari.py index fe6095eb..9d16f1c9 100644 --- a/all/scripts/watch_multiagent_atari.py +++ b/all/scripts/watch_multiagent_atari.py @@ -1,9 +1,9 @@ import argparse import time + import torch -from all.bodies import DeepmindAtariBody + from all.environments import MultiagentAtariEnv -from all.experiments import load_and_watch def watch(env, filename, fps, reload): @@ -15,7 +15,7 @@ def watch(env, filename, fps, reload): try: agent = torch.load(filename).test_agent() except Exception as e: - print('Warning: error reloading model: {}'.format(filename)) + print("Warning: error reloading model: {}".format(filename)) print(e) @@ -49,7 +49,9 @@ def main(): ) parser.add_argument( "--reload", - action="store_true", default=False, help="Reload the model from disk after every episode" + action="store_true", + default=False, + help="Reload the model from disk after every episode", ) args = parser.parse_args() env = MultiagentAtariEnv(args.env, device=args.device, render_mode="human") diff --git a/benchmarks/atari40.py b/benchmarks/atari40.py index 4a1ffdae..3dc88c7c 100644 --- a/benchmarks/atari40.py +++ b/benchmarks/atari40.py @@ -1,6 +1,6 @@ +from all.environments import AtariEnvironment from all.experiments import SlurmExperiment from all.presets import atari -from all.environments import AtariEnvironment def main(): @@ -12,10 +12,17 @@ def main(): atari.ppo, atari.rainbow, ] - envs = [AtariEnvironment(env, device='cuda') for env in ['BeamRider', 'Breakout', 'Pong', 'Qbert', 'SpaceInvaders']] - SlurmExperiment(agents, envs, 10e6, logdir='benchmarks/atari40', sbatch_args={ - 'partition': 'gpu-long' - }) + envs = [ + AtariEnvironment(env, device="cuda") + for env in ["BeamRider", "Breakout", "Pong", "Qbert", "SpaceInvaders"] + ] + SlurmExperiment( + agents, + envs, + 10e6, + logdir="benchmarks/atari40", + sbatch_args={"partition": "gpu-long"}, + ) if __name__ == "__main__": diff --git a/benchmarks/pybullet.py b/benchmarks/pybullet.py index 0762add8..dc045fd6 100644 --- a/benchmarks/pybullet.py +++ b/benchmarks/pybullet.py @@ -1,22 +1,25 @@ +from all.environments import PybulletEnvironment from all.experiments import SlurmExperiment from all.presets.continuous import ddpg, ppo, sac -from all.environments import PybulletEnvironment def main(): frames = int(1e7) - agents = [ - ddpg, - ppo, - sac - ] + agents = [ddpg, ppo, sac] - envs = [PybulletEnvironment(env, device='cuda') for env in PybulletEnvironment.short_names] + envs = [ + PybulletEnvironment(env, device="cuda") + for env in PybulletEnvironment.short_names + ] - SlurmExperiment(agents, envs, frames, logdir='benchmarks/pybullet', sbatch_args={ - 'partition': 'gpu-long' - }) + SlurmExperiment( + agents, + envs, + frames, + logdir="benchmarks/pybullet", + sbatch_args={"partition": "gpu-long"}, + ) if __name__ == "__main__": diff --git a/examples/experiment.py b/examples/experiment.py index 9492aea4..e03d1398 100644 --- a/examples/experiment.py +++ b/examples/experiment.py @@ -1,13 +1,14 @@ -''' +""" Quick example of usage of the run_experiment API. -''' -from all.experiments import run_experiment, plot_returns_100 -from all.presets.classic_control import dqn, a2c +""" + from all.environments import GymEnvironment +from all.experiments import plot_returns_100, run_experiment +from all.presets.classic_control import a2c, dqn def main(): - DEVICE = 'cpu' + DEVICE = "cpu" # DEVICE = 'cuda' # uncomment for gpu support timesteps = 40000 run_experiment( @@ -15,14 +16,16 @@ def main(): # DQN with default hyperparameters dqn.device(DEVICE), # DQN with a custom hyperparameters and a custom name. - dqn.device(DEVICE).hyperparameters(replay_buffer_size=100).name('dqn-small-buffer'), + dqn.device(DEVICE) + .hyperparameters(replay_buffer_size=100) + .name("dqn-small-buffer"), # A2C with a custom name - a2c.device(DEVICE).name('not-dqn') + a2c.device(DEVICE).name("not-dqn"), ], - [GymEnvironment('CartPole-v0', DEVICE), GymEnvironment('Acrobot-v1', DEVICE)], + [GymEnvironment("CartPole-v0", DEVICE), GymEnvironment("Acrobot-v1", DEVICE)], timesteps, ) - plot_returns_100('runs', timesteps=timesteps) + plot_returns_100("runs", timesteps=timesteps) if __name__ == "__main__": diff --git a/examples/slurm_experiment.py b/examples/slurm_experiment.py index 155fdb5b..f938cc0b 100644 --- a/examples/slurm_experiment.py +++ b/examples/slurm_experiment.py @@ -1,19 +1,25 @@ -''' +""" Quick example of a2c running on slurm, a distributed cluster. Note that it only runs for 1 million frames. For real experiments, you will surely need a modified version of this script. -''' +""" + +from all.environments import AtariEnvironment from all.experiments import SlurmExperiment from all.presets.atari import a2c, dqn -from all.environments import AtariEnvironment def main(): - device = 'cuda' - envs = [AtariEnvironment(env, device) for env in ['Pong', 'Breakout', 'SpaceInvaders']] - SlurmExperiment([a2c.device(device), dqn.device(device)], envs, 1e6, sbatch_args={ - 'partition': '1080ti-short' - }) + device = "cuda" + envs = [ + AtariEnvironment(env, device) for env in ["Pong", "Breakout", "SpaceInvaders"] + ] + SlurmExperiment( + [a2c.device(device), dqn.device(device)], + envs, + 1e6, + sbatch_args={"partition": "1080ti-short"}, + ) if __name__ == "__main__": diff --git a/integration/atari_test.py b/integration/atari_test.py index 7ebad9f4..8ffc9743 100644 --- a/integration/atari_test.py +++ b/integration/atari_test.py @@ -1,20 +1,10 @@ import unittest + import torch -from all.environments import AtariEnvironment -from all.presets.atari import ( - a2c, - c51, - ddqn, - dqn, - ppo, - rainbow, - vac, - vpg, - vsarsa, - vqn -) from validate_agent import validate_agent +from all.environments import AtariEnvironment +from all.presets.atari import a2c, c51, ddqn, dqn, ppo, rainbow, vac, vpg, vqn, vsarsa CPU = torch.device("cpu") if torch.cuda.is_available(): @@ -66,10 +56,16 @@ def test_dqn_cuda(self): ) def test_ppo(self): - validate_agent(ppo.device(CPU).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CPU)) + validate_agent( + ppo.device(CPU).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CPU), + ) def test_ppo_cuda(self): - validate_agent(ppo.device(CUDA).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CUDA)) + validate_agent( + ppo.device(CUDA).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CUDA), + ) def test_rainbow(self): validate_agent( @@ -84,35 +80,45 @@ def test_rainbow_cuda(self): ) def test_vac(self): - validate_agent(vac.device(CPU).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CPU)) + validate_agent( + vac.device(CPU).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CPU), + ) def test_vac_cuda(self): validate_agent( - vac.device(CUDA).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CUDA) + vac.device(CUDA).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CUDA), ) def test_vpg(self): validate_agent(vpg.device(CPU), AtariEnvironment("Breakout", device=CPU)) def test_vpg_cuda(self): - validate_agent( - vpg.device(CUDA), AtariEnvironment("Breakout", device=CUDA) - ) + validate_agent(vpg.device(CUDA), AtariEnvironment("Breakout", device=CUDA)) def test_vsarsa(self): - validate_agent(vsarsa.device(CPU).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CPU)) + validate_agent( + vsarsa.device(CPU).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CPU), + ) def test_vsarsa_cuda(self): validate_agent( - vsarsa.device(CUDA).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CUDA) + vsarsa.device(CUDA).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CUDA), ) def test_vqn(self): - validate_agent(vqn.device(CPU).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CPU)) + validate_agent( + vqn.device(CPU).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CPU), + ) def test_vqn_cuda(self): validate_agent( - vqn.device(CUDA).hyperparameters(n_envs=4), AtariEnvironment("Breakout", device=CUDA) + vqn.device(CUDA).hyperparameters(n_envs=4), + AtariEnvironment("Breakout", device=CUDA), ) diff --git a/integration/classic_control_test.py b/integration/classic_control_test.py index a1b49748..67bd72e8 100644 --- a/integration/classic_control_test.py +++ b/integration/classic_control_test.py @@ -1,4 +1,7 @@ import unittest + +from validate_agent import validate_agent + from all.environments import GymEnvironment from all.presets.classic_control import ( a2c, @@ -12,7 +15,6 @@ vqn, vsarsa, ) -from validate_agent import validate_agent class TestClassicControlPresets(unittest.TestCase): @@ -47,7 +49,7 @@ def test_vqn(self): self.validate(vqn) def validate(self, builder): - validate_agent(builder.device('cpu'), GymEnvironment("CartPole-v0")) + validate_agent(builder.device("cpu"), GymEnvironment("CartPole-v0")) if __name__ == "__main__": diff --git a/integration/continuous_test.py b/integration/continuous_test.py index f3e528a9..10414703 100644 --- a/integration/continuous_test.py +++ b/integration/continuous_test.py @@ -1,8 +1,10 @@ import unittest -from all.environments import GymEnvironment, PybulletEnvironment, MujocoEnvironment -from all.presets.continuous import ddpg, ppo, sac + from validate_agent import validate_agent +from all.environments import GymEnvironment, MujocoEnvironment, PybulletEnvironment +from all.presets.continuous import ddpg, ppo, sac + class TestContinuousPresets(unittest.TestCase): def test_ddpg(self): diff --git a/integration/multiagent_atari_test.py b/integration/multiagent_atari_test.py index 9e0b54cc..f57e465d 100644 --- a/integration/multiagent_atari_test.py +++ b/integration/multiagent_atari_test.py @@ -1,10 +1,11 @@ import unittest + import torch +from validate_agent import validate_multiagent + from all.environments import MultiagentAtariEnv from all.presets import IndependentMultiagentPreset from all.presets.atari import dqn -from validate_agent import validate_multiagent - CPU = torch.device("cpu") if torch.cuda.is_available(): @@ -20,20 +21,24 @@ class TestMultiagentAtariPresets(unittest.TestCase): def test_independent(self): - env = MultiagentAtariEnv('pong_v3', max_cycles=1000, device=CPU) + env = MultiagentAtariEnv("pong_v3", max_cycles=1000, device=CPU) presets = { agent_id: dqn.device(CPU).env(env.subenvs[agent_id]).build() for agent_id in env.agents } - validate_multiagent(IndependentMultiagentPreset('independent', CPU, presets), env) + validate_multiagent( + IndependentMultiagentPreset("independent", CPU, presets), env + ) def test_independent_cuda(self): - env = MultiagentAtariEnv('pong_v3', max_cycles=1000, device=CUDA) + env = MultiagentAtariEnv("pong_v3", max_cycles=1000, device=CUDA) presets = { agent_id: dqn.device(CUDA).env(env.subenvs[agent_id]).build() for agent_id in env.agents } - validate_multiagent(IndependentMultiagentPreset('independent', CUDA, presets), env) + validate_multiagent( + IndependentMultiagentPreset("independent", CUDA, presets), env + ) if __name__ == "__main__": diff --git a/integration/validate_agent.py b/integration/validate_agent.py index d0b753bd..1b786efe 100644 --- a/integration/validate_agent.py +++ b/integration/validate_agent.py @@ -1,7 +1,12 @@ import os + +from all.experiments import ( + MultiagentEnvExperiment, + ParallelEnvExperiment, + SingleEnvExperiment, +) from all.logging import DummyLogger -from all.experiments import SingleEnvExperiment, ParallelEnvExperiment, MultiagentEnvExperiment -from all.presets import ParallelPreset, Preset +from all.presets import ParallelPreset class TestSingleEnvExperiment(SingleEnvExperiment): diff --git a/setup.py b/setup.py index 42f5b3a7..3c136805 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ -from setuptools import setup, find_packages - +from setuptools import find_packages, setup GYM_VERSION = "0.29.1" PETTINGZOO_VERSION = "1.24.2" @@ -24,8 +23,9 @@ "supersuit~=3.9.1", ], "test": [ - "flake8>=3.8", # linter for pep8 compliance - "autopep8>=1.5", # automatically fixes some pep8 errors + "black>=24.1.1", # linting/formatting + "isort>=5.13.2", # sort imports + "flake8>=7.0.0", # more linting "torch-testing>=0.0.2", # pytorch assertion library ], "docs": [ @@ -71,13 +71,13 @@ ], }, install_requires=[ - "gymnasium~={}".format(GYM_VERSION), # common environment interface - "numpy>=1.22.3", # math library - "matplotlib>=3.5.1", # plotting library - "opencv-python-headless>=4.0.0", # used by atari wrappers - "torch>=1.11.0", # core deep learning library - "tensorboard>=2.8.0", # logging and visualization - "cloudpickle>=2.0.0", # used to copy environments + "gymnasium~={}".format(GYM_VERSION), # common environment interface + "numpy>=1.22.3", # math library + "matplotlib>=3.5.1", # plotting library + "opencv-python-headless>=4.0.0", # used by atari wrappers + "torch>=1.11.0", # core deep learning library + "tensorboard>=2.8.0", # logging and visualization + "cloudpickle>=2.0.0", # used to copy environments ], extras_require=extras, ) From 73ac02a3d46e1eeebefff26e206015e4e82ba1db Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 25 Feb 2024 11:41:18 -0500 Subject: [PATCH 11/26] Fix key error warnings (#300) * fix key error warnings * run formatter * remove unused import --- all/environments/atari_test.py | 1 + all/environments/atari_wrappers.py | 3 ++- all/environments/gym_wrappers.py | 16 ++++++++++++++++ all/environments/gym_wrappers_test.py | 0 all/environments/mujoco.py | 12 +++++++++++- all/environments/mujoco_test.py | 13 +++++++++++++ 6 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 all/environments/gym_wrappers.py create mode 100644 all/environments/gym_wrappers_test.py diff --git a/all/environments/atari_test.py b/all/environments/atari_test.py index fef2dfd6..4d589ab8 100644 --- a/all/environments/atari_test.py +++ b/all/environments/atari_test.py @@ -11,6 +11,7 @@ def test_reset(self): self.assertEqual(state.reward, 0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) + self.assertEqual(state["life_lost"], False) def test_step(self): env = AtariEnvironment("Breakout") diff --git a/all/environments/atari_wrappers.py b/all/environments/atari_wrappers.py index 7f513851..b2159391 100644 --- a/all/environments/atari_wrappers.py +++ b/all/environments/atari_wrappers.py @@ -180,8 +180,9 @@ def __init__(self, env): self.lives = 0 def reset(self): + obs, _ = self.env.reset() self.lives = 0 - return self.env.reset() + return obs, {"life_lost": False} def step(self, action): obs, reward, terminated, truncated, _ = self.env.step(action) diff --git a/all/environments/gym_wrappers.py b/all/environments/gym_wrappers.py new file mode 100644 index 00000000..fa4c99d3 --- /dev/null +++ b/all/environments/gym_wrappers.py @@ -0,0 +1,16 @@ +import gymnasium + + +class NoInfoWrapper(gymnasium.Wrapper): + """ + Wrapper to suppress info and simply return a dict. + This prevents State.from_gym() from create keys. + """ + + def reset(self, seed=None, options=None): + obs, _ = self.env.reset(seed=seed, options=options) + return obs, {} + + def step(self, action): + *obs, info = self.env.step(action) + return *obs, {} diff --git a/all/environments/gym_wrappers_test.py b/all/environments/gym_wrappers_test.py new file mode 100644 index 00000000..e69de29b diff --git a/all/environments/mujoco.py b/all/environments/mujoco.py index 8463cc51..88f609c4 100644 --- a/all/environments/mujoco.py +++ b/all/environments/mujoco.py @@ -1,5 +1,15 @@ +import torch + from .gym import GymEnvironment +from .gym_wrappers import NoInfoWrapper class MujocoEnvironment(GymEnvironment): - """Simply inherit the Gym Environment""" + """A Mujoco Environment""" + + def __init__( + self, id, device=torch.device("cpu"), name=None, no_info=True, **gym_make_kwargs + ): + super().__init__(id, device=device, name=name, **gym_make_kwargs) + if no_info: + self._env = NoInfoWrapper(self._env) diff --git a/all/environments/mujoco_test.py b/all/environments/mujoco_test.py index 7fb240b7..26bbe544 100644 --- a/all/environments/mujoco_test.py +++ b/all/environments/mujoco_test.py @@ -34,3 +34,16 @@ def test_step(self): self.assertNotEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) + + def test_no_info_wrapper(self): + env = MujocoEnvironment("Ant-v4") + state = env.reset(seed=0) + self.assertFalse("reward_forward" in state) + state = env.step(env.action_space.sample()) + self.assertFalse("reward_forward" in state) + + def test_with_info(self): + env = MujocoEnvironment("Ant-v4", no_info=False) + state = env.reset(seed=0) + state = env.step(env.action_space.sample()) + self.assertTrue("reward_forward" in state) From 1a2265c8f5bd1022341f314d727f71b9adb48210 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 25 Feb 2024 11:41:35 -0500 Subject: [PATCH 12/26] finish docstring for nn aggregation (#301) --- all/nn/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/all/nn/__init__.py b/all/nn/__init__.py index 67dfa9db..5316a980 100644 --- a/all/nn/__init__.py +++ b/all/nn/__init__.py @@ -30,7 +30,8 @@ class Aggregation(nn.Module): This layer computes a Q function by combining an estimate of V with an estimate of the advantage. The advantage is normalized by subtracting the average - advantage so that we can properly + advantage to force action-independent value to be + represented by value. """ def forward(self, value, advantages): From 2ccda12d3bdde0a7b8bc75f38a40d9149645883f Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 25 Feb 2024 12:46:33 -0500 Subject: [PATCH 13/26] Bugfix/publish workflow (#303) * update publish workflow and add alpha tag * make sure setuptools and wheel are installed before publish * add the required id-token permission * update alpha version number --- .github/workflows/python-publish.yml | 37 +++++++++++++++------------- docs/source/conf.py | 2 +- setup.py | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index e89dad68..c072dc2e 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,33 +1,36 @@ # This workflow will upload a Python Package using Twine when a release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries name: Upload Python Package on: release: - types: [created] + types: [published] + +permissions: + contents: read jobs: deploy: - runs-on: ubuntu-latest - - environment: deployment - + environment: publish + permissions: + id-token: write steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: - python-version: '3.x' + python-version: 3.9 - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* + sudo apt-get install swig + sudo apt-get install unrar + pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu + pip install setuptools wheel + make install + - name: Build package + run: make build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/docs/source/conf.py b/docs/source/conf.py index 808ec0e0..203248f9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Chris Nota' # The full version, including alpha/beta/rc tags -release = '0.9.0' +release = '0.9.1-alpha.4' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 3c136805..3f0ee0b9 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ setup( name="autonomous-learning-library", - version="0.9.0", + version="0.9.1-alpha.4", description=("A library for building reinforcement learning agents in Pytorch"), packages=find_packages(), url="https://github.com/cpnota/autonomous-learning-library.git", From dbcf5ed25a3cfc7c4e324d889442f028e0d283ce Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 25 Feb 2024 18:21:31 -0500 Subject: [PATCH 14/26] Add save_freq argument and refactor scripts (#305) * add save_freq argument and refactor scripts * run formatter * fix test --- all/experiments/parallel_env_experiment.py | 9 ++- .../parallel_env_experiment_test.py | 2 +- all/experiments/run_experiment.py | 2 + all/experiments/single_env_experiment.py | 7 ++ all/scripts/classic.py | 63 ---------------- all/scripts/continuous.py | 75 ------------------- all/scripts/{atari.py => train.py} | 38 ++++++---- all/scripts/train_atari.py | 18 +++++ all/scripts/train_classic.py | 18 +++++ all/scripts/train_continuous.py | 18 +++++ all/scripts/train_mujoco.py | 18 +++++ ...ent_atari.py => train_multiagent_atari.py} | 7 +- all/scripts/train_pybullet.py | 18 +++++ all/scripts/watch_classic.py | 6 +- all/scripts/watch_continuous.py | 18 ++--- all/scripts/watch_mujoco.py | 27 +++++++ all/scripts/watch_multiagent_atari.py | 2 +- all/scripts/watch_pybullet.py | 29 +++++++ setup.py | 12 ++- 19 files changed, 211 insertions(+), 176 deletions(-) delete mode 100644 all/scripts/classic.py delete mode 100644 all/scripts/continuous.py rename all/scripts/{atari.py => train.py} (58%) create mode 100644 all/scripts/train_atari.py create mode 100644 all/scripts/train_classic.py create mode 100644 all/scripts/train_continuous.py create mode 100644 all/scripts/train_mujoco.py rename all/scripts/{multiagent_atari.py => train_multiagent_atari.py} (92%) create mode 100644 all/scripts/train_pybullet.py create mode 100644 all/scripts/watch_mujoco.py create mode 100644 all/scripts/watch_pybullet.py diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index cc462974..ff0327cf 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -21,6 +21,7 @@ def __init__( logdir="runs", quiet=False, render=False, + save_freq=100, verbose=True, logger="tensorboard", ): @@ -37,6 +38,7 @@ def __init__( self._preset = preset self._agent = preset.agent(logger=self._logger, train_steps=train_steps) self._render = render + self._save_freq = save_freq # training state self._returns = [] @@ -87,9 +89,10 @@ def train(self, frames=np.inf, episodes=np.inf): for i in range(num_envs): if dones[i]: self._log_training_episode(returns[i], episode_lengths[i], fps) + self._save_model() returns[i] = 0 episode_lengths[i] = 0 - self._episode += episodes_completed + self._episode += 1 def test(self, episodes=100): test_agent = self._preset.parallel_test_agent() @@ -144,3 +147,7 @@ def _make_logger(self, logdir, agent_name, env_name, verbose, logger): return ExperimentLogger( self, agent_name, env_name, verbose=verbose, logdir=logdir ) + + def _save_model(self): + if self._save_freq != float("inf") and self._episode % self._save_freq == 0: + self.save() diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py index f65d9e49..f922fe51 100644 --- a/all/experiments/parallel_env_experiment_test.py +++ b/all/experiments/parallel_env_experiment_test.py @@ -36,7 +36,7 @@ def test_writes_training_returns_episode(self): self.experiment.train(episodes=4) np.testing.assert_equal( self.experiment._logger.data["eval/returns/episode"]["steps"], - np.array([1, 2, 3, 3]), + np.array([1, 2, 3, 4]), ) np.testing.assert_equal( self.experiment._logger.data["eval/returns/episode"]["values"], diff --git a/all/experiments/run_experiment.py b/all/experiments/run_experiment.py index 3ed2021d..a89d3e9c 100644 --- a/all/experiments/run_experiment.py +++ b/all/experiments/run_experiment.py @@ -11,6 +11,7 @@ def run_experiment( logdir="runs", quiet=False, render=False, + save_freq=100, test_episodes=100, verbose=True, logger="tensorboard", @@ -32,6 +33,7 @@ def run_experiment( logdir=logdir, quiet=quiet, render=render, + save_freq=save_freq, verbose=verbose, logger=logger, ) diff --git a/all/experiments/single_env_experiment.py b/all/experiments/single_env_experiment.py index d3ecb0ed..23e666db 100644 --- a/all/experiments/single_env_experiment.py +++ b/all/experiments/single_env_experiment.py @@ -19,6 +19,7 @@ def __init__( logdir="runs", quiet=False, render=False, + save_freq=100, verbose=True, logger="tensorboard", ): @@ -33,6 +34,7 @@ def __init__( self._render = render self._frame = 1 self._episode = 1 + self._save_freq = 100 if render: self._env.render(mode="human") @@ -88,6 +90,7 @@ def _run_training_episode(self): # log the results self._log_training_episode(returns, episode_length, fps) + self._save_model() # update experiment state self._episode += 1 @@ -121,3 +124,7 @@ def _make_logger(self, logdir, agent_name, env_name, verbose, logger): return ExperimentLogger( self, agent_name, env_name, verbose=verbose, logdir=logdir ) + + def _save_model(self): + if self._save_freq != float("inf") and self._episode % self._save_freq == 0: + self.save() diff --git a/all/scripts/classic.py b/all/scripts/classic.py deleted file mode 100644 index 59808c84..00000000 --- a/all/scripts/classic.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse - -from all.environments import GymEnvironment -from all.experiments import run_experiment -from all.presets import classic_control - - -def main(): - parser = argparse.ArgumentParser(description="Run a classic control benchmark.") - parser.add_argument("env", help="Name of the env (e.g. CartPole-v1).") - parser.add_argument( - "agent", help="Name of the agent (e.g. dqn). See presets for available agents." - ) - parser.add_argument( - "--device", - default="cuda", - help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", - ) - parser.add_argument( - "--frames", type=int, default=50000, help="The number of training frames." - ) - parser.add_argument( - "--render", action="store_true", default=False, help="Render the environment." - ) - parser.add_argument("--logdir", default="runs", help="The base logging directory.") - parser.add_argument( - "--logger", - default="tensorboard", - help="The backend used for tracking experiment metrics.", - ) - parser.add_argument( - "--hyperparameters", - default=[], - nargs="*", - help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc.", - ) - args = parser.parse_args() - - env = GymEnvironment(args.env, device=args.device) - - agent_name = args.agent - agent = getattr(classic_control, agent_name) - agent = agent.device(args.device) - - # parse hyperparameters - hyperparameters = {} - for hp in args.hyperparameters: - key, value = hp.split("=") - hyperparameters[key] = type(agent.default_hyperparameters[key])(value) - agent = agent.hyperparameters(**hyperparameters) - - run_experiment( - agent, - env, - frames=args.frames, - render=args.render, - logdir=args.logdir, - logger=args.logger, - ) - - -if __name__ == "__main__": - main() diff --git a/all/scripts/continuous.py b/all/scripts/continuous.py deleted file mode 100644 index a46b6be3..00000000 --- a/all/scripts/continuous.py +++ /dev/null @@ -1,75 +0,0 @@ -# pylint: disable=unused-import -import argparse - -from all.environments import GymEnvironment, PybulletEnvironment -from all.experiments import run_experiment -from all.presets import continuous - -# see also: PybulletEnvironment.short_names -ENVS = { - "mountaincar": "MountainCarContinuous-v0", - "lander": "LunarLanderContinuous-v2", -} - - -def main(): - parser = argparse.ArgumentParser(description="Run a continuous actions benchmark.") - parser.add_argument("env", help="Name of the env (e.g. 'lander', 'cheetah')") - parser.add_argument( - "agent", help="Name of the agent (e.g. ddpg). See presets for available agents." - ) - parser.add_argument( - "--device", - default="cuda", - help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", - ) - parser.add_argument( - "--frames", type=int, default=2e6, help="The number of training frames." - ) - parser.add_argument( - "--render", action="store_true", default=False, help="Render the environment." - ) - parser.add_argument("--logdir", default="runs", help="The base logging directory.") - parser.add_argument( - "--logger", - default="tensorboard", - help="The backend used for tracking experiment metrics.", - ) - parser.add_argument( - "--hyperparameters", - default=[], - nargs="*", - help="Custom hyperparameters, in the format hyperparameter1=value1 hyperparameter2=value2 etc.", - ) - args = parser.parse_args() - - if args.env in ENVS: - env = GymEnvironment(ENVS[args.env], device=args.device) - elif "BulletEnv" in args.env or args.env in PybulletEnvironment.short_names: - env = PybulletEnvironment(args.env, device=args.device) - else: - env = GymEnvironment(args.env, device=args.device) - - agent_name = args.agent - agent = getattr(continuous, agent_name) - agent = agent.device(args.device) - - # parse hyperparameters - hyperparameters = {} - for hp in args.hyperparameters: - key, value = hp.split("=") - hyperparameters[key] = type(agent.default_hyperparameters[key])(value) - agent = agent.hyperparameters(**hyperparameters) - - run_experiment( - agent, - env, - frames=args.frames, - render=args.render, - logdir=args.logdir, - logger=args.logger, - ) - - -if __name__ == "__main__": - main() diff --git a/all/scripts/atari.py b/all/scripts/train.py similarity index 58% rename from all/scripts/atari.py rename to all/scripts/train.py index 804f46d6..d16e44c7 100644 --- a/all/scripts/atari.py +++ b/all/scripts/train.py @@ -1,15 +1,21 @@ import argparse -from all.environments import AtariEnvironment from all.experiments import run_experiment -from all.presets import atari -def main(): - parser = argparse.ArgumentParser(description="Run an Atari benchmark.") - parser.add_argument("env", help="Name of the Atari game (e.g. Pong).") +def train( + presets, + env_constructor, + description="Train an RL agent", + env_help="Name of the environment (e.g., 'CartPole-v0')", + default_frames=40e6, +): + # parse command line args + parser = argparse.ArgumentParser(description=description) + parser.add_argument("env", help=env_help) parser.add_argument( - "agent", help="Name of the agent (e.g. dqn). See presets for available agents." + "agent", + help="Name of the agent (e.g. 'dqn'). See presets for available agents.", ) parser.add_argument( "--device", @@ -17,7 +23,10 @@ def main(): help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0).", ) parser.add_argument( - "--frames", type=int, default=40e6, help="The number of training frames." + "--frames", + type=int, + default=default_frames, + help="The number of training frames.", ) parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." @@ -28,13 +37,18 @@ def main(): default="tensorboard", help="The backend used for tracking experiment metrics.", ) + parser.add_argument( + "--save_freq", default=100, help="How often to save the model, in episodes." + ) parser.add_argument("--hyperparameters", default=[], nargs="*") args = parser.parse_args() - env = AtariEnvironment(args.env, device=args.device) + # construct the environment + env = env_constructor(args.env, device=args.device) + # construct the agents agent_name = args.agent - agent = getattr(atari, agent_name) + agent = getattr(presets, agent_name) agent = agent.device(args.device) # parse hyperparameters @@ -44,6 +58,7 @@ def main(): hyperparameters[key] = type(agent.default_hyperparameters[key])(value) agent = agent.hyperparameters(**hyperparameters) + # run the experiment run_experiment( agent, env, @@ -51,8 +66,5 @@ def main(): render=args.render, logdir=args.logdir, logger=args.logger, + save_freq=args.save_freq, ) - - -if __name__ == "__main__": - main() diff --git a/all/scripts/train_atari.py b/all/scripts/train_atari.py new file mode 100644 index 00000000..20c968bb --- /dev/null +++ b/all/scripts/train_atari.py @@ -0,0 +1,18 @@ +from all.environments import AtariEnvironment +from all.presets import atari + +from .train import train + + +def main(): + train( + atari, + AtariEnvironment, + description="Train an agent on an Atari environment.", + env_help="The name of the environment (e.g., 'Pong').", + default_frames=40e6, + ) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/train_classic.py b/all/scripts/train_classic.py new file mode 100644 index 00000000..2fd4e1cd --- /dev/null +++ b/all/scripts/train_classic.py @@ -0,0 +1,18 @@ +from all.environments import GymEnvironment +from all.presets import classic_control + +from .train import train + + +def main(): + train( + classic_control, + GymEnvironment, + description="Train an agent on an classic control environment.", + env_help="The name of the environment (e.g., CartPole-v0).", + default_frames=50000, + ) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/train_continuous.py b/all/scripts/train_continuous.py new file mode 100644 index 00000000..981d730d --- /dev/null +++ b/all/scripts/train_continuous.py @@ -0,0 +1,18 @@ +from all.environments import GymEnvironment +from all.presets import continuous + +from .train import train + + +def main(): + train( + continuous, + GymEnvironment, + description="Train an agent on a continuous control environment.", + env_help="The name of the environment (e.g., LunarLanderContinuous-v2).", + default_frames=10e6, + ) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/train_mujoco.py b/all/scripts/train_mujoco.py new file mode 100644 index 00000000..8ebff69d --- /dev/null +++ b/all/scripts/train_mujoco.py @@ -0,0 +1,18 @@ +from all.environments import MujocoEnvironment +from all.presets import continuous + +from .train import train + + +def main(): + train( + continuous, + MujocoEnvironment, + description="Train an agent on an Mujoco environment.", + env_help="The name of the environment (e.g., Ant-v4).", + default_frames=10e6, + ) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/multiagent_atari.py b/all/scripts/train_multiagent_atari.py similarity index 92% rename from all/scripts/multiagent_atari.py rename to all/scripts/train_multiagent_atari.py index a76acfe7..255f8c90 100644 --- a/all/scripts/multiagent_atari.py +++ b/all/scripts/train_multiagent_atari.py @@ -13,7 +13,7 @@ def __init__(self, state_space, action_space): def main(): parser = argparse.ArgumentParser(description="Run an multiagent Atari benchmark.") - parser.add_argument("env", help="Name of the Atari game (e.g. pong_v2).") + parser.add_argument("env", help="Name of the Atari game (e.g. pong_v3).") parser.add_argument("agents", nargs="*", help="List of agents.") parser.add_argument( "--device", @@ -28,6 +28,9 @@ def main(): parser.add_argument( "--frames", type=int, default=40e6, help="The number of training frames." ) + parser.add_argument( + "--save_freq", default=100, help="How often to save the model, in episodes." + ) parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) @@ -57,9 +60,11 @@ def main(): IndependentMultiagentPreset("Independent", args.device, presets), env, verbose=False, + save_freq=args.save_freq, render=args.render, logger=args.logger, ) + experiment.save() experiment.train(frames=args.frames) experiment.save() experiment.test(episodes=100) diff --git a/all/scripts/train_pybullet.py b/all/scripts/train_pybullet.py new file mode 100644 index 00000000..45d19512 --- /dev/null +++ b/all/scripts/train_pybullet.py @@ -0,0 +1,18 @@ +from all.environments import PybulletEnvironment +from all.presets import continuous + +from .train import train + + +def main(): + train( + continuous, + PybulletEnvironment, + description="Train an agent on an PyBullet environment.", + env_help="The name of the environment (e.g., AntBulletEnv-v0).", + default_frames=10e6, + ) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/watch_classic.py b/all/scripts/watch_classic.py index 239f0964..3cf78014 100644 --- a/all/scripts/watch_classic.py +++ b/all/scripts/watch_classic.py @@ -5,10 +5,8 @@ def main(): - parser = argparse.ArgumentParser(description="Run an Atari benchmark.") - parser.add_argument( - "env", help="Name of the environment (e.g. RoboschoolHalfCheetah-v1" - ) + parser = argparse.ArgumentParser(description="Watch a classic control agent.") + parser.add_argument("env", help="Name of the environment (e.g. CartPole-v0)") parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", diff --git a/all/scripts/watch_continuous.py b/all/scripts/watch_continuous.py index 903f83d7..3abaad9a 100644 --- a/all/scripts/watch_continuous.py +++ b/all/scripts/watch_continuous.py @@ -1,15 +1,14 @@ -# pylint: disable=unused-import import argparse -from all.environments import GymEnvironment, PybulletEnvironment +from all.environments import GymEnvironment from all.experiments import load_and_watch -from .continuous import ENVS - def main(): parser = argparse.ArgumentParser(description="Watch a continuous agent.") - parser.add_argument("env", help="ID of the Environment") + parser.add_argument( + "env", help="Name of the environment (e.g., LunarLanderContinuous-v2)" + ) parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", @@ -22,14 +21,7 @@ def main(): help="Playback speed", ) args = parser.parse_args() - - if args.env in ENVS: - env = GymEnvironment(args.env, device=args.device, render_mode="human") - elif "BulletEnv" in args.env or args.env in PybulletEnvironment.short_names: - env = PybulletEnvironment(args.env, device=args.device, render_mode="human") - else: - env = GymEnvironment(args.env, device=args.device, render_mode="human") - + env = GymEnvironment(args.env, device=args.device, render_mode="human") load_and_watch(args.filename, env, fps=args.fps) diff --git a/all/scripts/watch_mujoco.py b/all/scripts/watch_mujoco.py new file mode 100644 index 00000000..8add986f --- /dev/null +++ b/all/scripts/watch_mujoco.py @@ -0,0 +1,27 @@ +import argparse + +from all.environments import MujocoEnvironment +from all.experiments import load_and_watch + + +def main(): + parser = argparse.ArgumentParser(description="Watch a mujoco agent.") + parser.add_argument("env", help="ID of the Environment") + parser.add_argument("filename", help="File where the model was saved.") + parser.add_argument( + "--device", + default="cuda", + help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", + ) + parser.add_argument( + "--fps", + default=120, + help="Playback speed", + ) + args = parser.parse_args() + env = MujocoEnvironment(args.env, device=args.device, render_mode="human") + load_and_watch(args.filename, env, fps=args.fps) + + +if __name__ == "__main__": + main() diff --git a/all/scripts/watch_multiagent_atari.py b/all/scripts/watch_multiagent_atari.py index 9d16f1c9..b6dacf8d 100644 --- a/all/scripts/watch_multiagent_atari.py +++ b/all/scripts/watch_multiagent_atari.py @@ -34,7 +34,7 @@ def watch_episode(env, agent, fps): def main(): parser = argparse.ArgumentParser(description="Watch pretrained multiagent atari") - parser.add_argument("env", help="Name of the Atari game (e.g. pong-v1)") + parser.add_argument("env", help="Name of the Atari game (e.g. pong_v3)") parser.add_argument("filename", help="File where the model was saved.") parser.add_argument( "--device", diff --git a/all/scripts/watch_pybullet.py b/all/scripts/watch_pybullet.py new file mode 100644 index 00000000..1ca3a1c5 --- /dev/null +++ b/all/scripts/watch_pybullet.py @@ -0,0 +1,29 @@ +# pylint: disable=unused-import +import argparse + +from all.environments import PybulletEnvironment +from all.experiments import load_and_watch + + +def main(): + parser = argparse.ArgumentParser(description="Watch a PyBullet agent.") + parser.add_argument("env", help="Name of the environment (e.g., AntBulletEnv-v0)") + parser.add_argument("filename", help="File where the model was saved.") + parser.add_argument( + "--device", + default="cuda", + help="The name of the device to run the agent on (e.g. cpu, cuda, cuda:0)", + ) + parser.add_argument( + "--fps", + default=120, + help="Playback speed", + ) + args = parser.parse_args() + env = PybulletEnvironment(args.env, device=args.device) + env.render(mode="human") # needed for pybullet envs + load_and_watch(args.filename, env, fps=args.fps) + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 3f0ee0b9..da27aca9 100644 --- a/setup.py +++ b/setup.py @@ -59,15 +59,19 @@ author_email="cnota@cs.umass.edu", entry_points={ "console_scripts": [ - "all-atari=all.scripts.atari:main", - "all-classic=all.scripts.classic:main", - "all-continuous=all.scripts.continuous:main", - "all-multiagent-atari=all.scripts.multiagent_atari:main", "all-plot=all.scripts.plot:main", + "all-atari=all.scripts.train_atari:main", + "all-classic=all.scripts.train_classic:main", + "all-continuous=all.scripts.train_continuous:main", + "all-mujoco=all.scripts.train_mujoco:main", + "all-multiagent-atari=all.scripts.train_multiagent_atari:main", + "all-pybullet=all.scripts.train_pybullet:main", "all-watch-atari=all.scripts.watch_atari:main", "all-watch-classic=all.scripts.watch_classic:main", "all-watch-continuous=all.scripts.watch_continuous:main", + "all-watch-mujoco=all.scripts.watch_mujoco:main", "all-watch-multiagent-atari=all.scripts.watch_multiagent_atari:main", + "all-watch-pybullet=all.scripts.watch_pybullet:main", ], }, install_requires=[ From 1119aad6a0f613db793782b5f0ec816f1a40526c Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Mon, 26 Feb 2024 20:47:55 -0500 Subject: [PATCH 15/26] Hyperparameter Logging (#308) * add initial add_hparams implementation * add add_hparams to logger interface * remove commit hash from run name * re-organize tensorboard tags * remove comet logger * remove additional comet logger references * remove logger kwarg * fix multiagent env logging * run formatter * fix SingleEnvExperimentTest * update parallel env test * run formatter * update multiagent env experimnet * fix softmax test problem * update torch version in workflows * fix integration tests * remove unwanted print * run formatter --- .github/workflows/python-package.yml | 2 +- .github/workflows/python-publish.yml | 2 +- all/experiments/experiment.py | 25 ++-- all/experiments/multiagent_env_experiment.py | 16 +-- .../multiagent_env_experiment_test.py | 10 +- all/experiments/parallel_env_experiment.py | 13 +- .../parallel_env_experiment_test.py | 81 ++++++----- all/experiments/plots.py | 11 +- all/experiments/run_experiment.py | 2 - all/experiments/single_env_experiment.py | 13 +- all/experiments/single_env_experiment_test.py | 74 +++++----- all/logging/__init__.py | 4 +- all/logging/_logger.py | 17 +++ all/logging/dummy.py | 11 +- all/logging/experiment.py | 136 ++++-------------- all/policies/softmax_test.py | 16 +-- all/scripts/train.py | 6 - all/scripts/train_multiagent_atari.py | 7 - integration/validate_agent.py | 6 +- setup.py | 6 +- 20 files changed, 193 insertions(+), 265 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cd40ad82..d91aa04e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,7 +27,7 @@ jobs: run: | sudo apt-get install swig sudo apt-get install unrar - pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu + pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu make install - name: Lint code run: | diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index c072dc2e..8f0f5ca9 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -27,7 +27,7 @@ jobs: python -m pip install --upgrade pip sudo apt-get install swig sudo apt-get install unrar - pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu + pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu pip install setuptools wheel make install - name: Build package diff --git a/all/experiments/experiment.py b/all/experiments/experiment.py index 5611d7ae..078b2be8 100644 --- a/all/experiments/experiment.py +++ b/all/experiments/experiment.py @@ -64,15 +64,11 @@ def _log_training_episode(self, returns, episode_length, fps): self._best_returns = returns self._returns100.append(returns) if len(self._returns100) == 100: - mean = np.mean(self._returns100) - std = np.std(self._returns100) - self._logger.add_summary("returns100", mean, std, step="frame") + self._logger.add_summary("returns100", self._returns100) self._returns100 = [] - self._logger.add_eval("returns/episode", returns, step="episode") - self._logger.add_eval("returns/frame", returns, step="frame") - self._logger.add_eval("returns/max", self._best_returns, step="frame") + self._logger.add_eval("returns", returns) self._logger.add_eval("episode_length", episode_length) - self._logger.add_eval("fps", fps, step="frame") + self._logger.add_eval("fps", fps) def _log_test_episode(self, episode, returns, episode_length): if not self._quiet: @@ -96,10 +92,17 @@ def _log_test(self, returns, episode_lengths): episode_length_mean, episode_length_sem ) ) - self._logger.add_summary("test_returns", np.mean(returns), np.std(returns)) - self._logger.add_summary( - "test_episode_length", np.mean(episode_lengths), np.std(episode_lengths) - ) + metrics = { + "test/returns": returns, + "test/episode_length": episode_lengths, + } + aggregators = ["mean", "std", "max", "min"] + metrics_dict = { + f"{metric}/{aggregator}": getattr(np, aggregator)(values) + for metric, values in metrics.items() + for aggregator in aggregators + } + self._logger.add_hparams(self._preset.hyperparameters, metrics_dict) def save(self): return self._preset.save("{}/preset.pt".format(self._logger.log_dir)) diff --git a/all/experiments/multiagent_env_experiment.py b/all/experiments/multiagent_env_experiment.py index e141e037..0132b467 100644 --- a/all/experiments/multiagent_env_experiment.py +++ b/all/experiments/multiagent_env_experiment.py @@ -2,7 +2,7 @@ import numpy as np -from all.logging import CometLogger, ExperimentLogger +from all.logging import ExperimentLogger class MultiagentEnvExperiment: @@ -32,10 +32,9 @@ def __init__( save_freq=100, train_steps=float("inf"), verbose=True, - logger="tensorboard", ): self._name = name if name is not None else preset.name - self._logger = self._make_logger(logdir, self._name, env.name, verbose, logger) + self._logger = self._make_logger(logdir, self._name, env.name, verbose) self._agent = preset.agent(logger=self._logger, train_steps=train_steps) self._env = env self._episode = 1 @@ -166,7 +165,7 @@ def _log_training_episode(self, returns, fps): print("frames: {}, fps: {}".format(self._frame, fps)) for agent in self._env.agents: self._logger.add_eval( - "{}/returns/frame".format(agent), returns[agent], step="frame" + "{}/returns".format(agent), returns[agent], step="frame" ) def _log_test_episode(self, episode, returns): @@ -181,19 +180,14 @@ def _log_test(self, returns): print("{} test returns (mean ± sem): {} ± {}".format(agent, mean, sem)) self._logger.add_summary( "{}/returns-test".format(agent), - np.mean(agent_returns), - np.std(agent_returns), + agent_returns, ) def _save_model(self): if self._save_freq != float("inf") and self._episode % self._save_freq == 0: self.save() - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - if logger == "comet": - return CometLogger( - self, agent_name, env_name, verbose=verbose, logdir=logdir - ) + def _make_logger(self, logdir, agent_name, env_name, verbose): return ExperimentLogger( self, agent_name, env_name, verbose=verbose, logdir=logdir ) diff --git a/all/experiments/multiagent_env_experiment_test.py b/all/experiments/multiagent_env_experiment_test.py index ad782c7b..a8b78f45 100644 --- a/all/experiments/multiagent_env_experiment_test.py +++ b/all/experiments/multiagent_env_experiment_test.py @@ -12,7 +12,7 @@ class MockExperiment(MultiagentEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -50,16 +50,16 @@ def test_writes_training_returns(self): self.maxDiff = None # could not get the exact numbers to be reproducible across enviornments :( self.assertEqual( - len(experiment._logger.data["eval/first_0/returns/frame"]["values"]), 3 + len(experiment._logger.data["eval/first_0/returns"]["values"]), 3 ) self.assertEqual( - len(experiment._logger.data["eval/first_0/returns/frame"]["steps"]), 3 + len(experiment._logger.data["eval/first_0/returns"]["steps"]), 3 ) self.assertEqual( - len(experiment._logger.data["eval/second_0/returns/frame"]["values"]), 3 + len(experiment._logger.data["eval/second_0/returns"]["values"]), 3 ) self.assertEqual( - len(experiment._logger.data["eval/second_0/returns/frame"]["steps"]), 3 + len(experiment._logger.data["eval/second_0/returns"]["steps"]), 3 ) def test_writes_test_returns(self): diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index ff0327cf..7e38139b 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -4,7 +4,7 @@ import torch from all.environments import VectorEnvironment -from all.logging import CometLogger, ExperimentLogger +from all.logging import ExperimentLogger from .experiment import Experiment @@ -23,11 +23,10 @@ def __init__( render=False, save_freq=100, verbose=True, - logger="tensorboard", ): self._name = name if name is not None else preset.name super().__init__( - self._make_logger(logdir, self._name, env.name, verbose, logger), quiet + self._make_logger(logdir, self._name, env.name, verbose), quiet ) self._n_envs = preset.n_envs if isinstance(env, VectorEnvironment): @@ -91,7 +90,7 @@ def train(self, frames=np.inf, episodes=np.inf): self._log_training_episode(returns[i], episode_lengths[i], fps) self._save_model() returns[i] = 0 - episode_lengths[i] = 0 + episode_lengths[i] = -1 self._episode += 1 def test(self, episodes=100): @@ -139,11 +138,7 @@ def test(self, episodes=100): def _done(self, frames, episodes): return self._frame > frames or self._episode > episodes - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - if logger == "comet": - return CometLogger( - self, agent_name, env_name, verbose=verbose, logdir=logdir - ) + def _make_logger(self, logdir, agent_name, env_name, verbose): return ExperimentLogger( self, agent_name, env_name, verbose=verbose, logdir=logdir ) diff --git a/all/experiments/parallel_env_experiment_test.py b/all/experiments/parallel_env_experiment_test.py index f922fe51..a47a8d8d 100644 --- a/all/experiments/parallel_env_experiment_test.py +++ b/all/experiments/parallel_env_experiment_test.py @@ -10,7 +10,7 @@ class MockExperiment(ParallelEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -32,63 +32,80 @@ def test_adds_custom_label(self): experiment = MockExperiment(self.make_agent(), env, name="a2c", quiet=True) self.assertEqual(experiment._logger.label, "a2c_CartPole-v0") - def test_writes_training_returns_episode(self): - self.experiment.train(episodes=4) - np.testing.assert_equal( - self.experiment._logger.data["eval/returns/episode"]["steps"], - np.array([1, 2, 3, 4]), - ) - np.testing.assert_equal( - self.experiment._logger.data["eval/returns/episode"]["values"], - np.array([12.0, 13.0, 16.0, 16.0]), - ) - def test_writes_training_returns_frame(self): self.experiment.train(episodes=4) np.testing.assert_equal( - self.experiment._logger.data["eval/returns/frame"]["steps"], - np.array([49, 53, 65, 65]), + self.experiment._logger.data["eval/returns"]["steps"], + np.array([65, 65, 101, 125]), ) np.testing.assert_equal( - self.experiment._logger.data["eval/returns/frame"]["values"], - np.array([12.0, 13.0, 16.0, 16.0]), + self.experiment._logger.data["eval/returns"]["values"], + np.array([16.0, 16.0, 25.0, 14.0]), ) def test_writes_training_episode_length(self): self.experiment.train(episodes=4) np.testing.assert_equal( self.experiment._logger.data["eval/episode_length"]["steps"], - np.array([49, 53, 65, 65]), + np.array([65, 65, 101, 125]), ) np.testing.assert_equal( self.experiment._logger.data["eval/episode_length"]["values"], - np.array([12.0, 13.0, 16.0, 16.0]), + np.array([16.0, 16.0, 25.0, 14.0]), ) + def test_writes_hparams(self): + experiment = self.experiment + experiment.train(episodes=5) + returns = experiment.test(episodes=4) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] + self.assertDictEqual(hparam_dict, experiment._preset.hyperparameters) + self.assertEqual(step, "frame") + def test_writes_test_returns(self): - self.experiment.train(episodes=5) - returns = self.experiment.test(episodes=4) - self.assertEqual(len(returns), 4) + experiment = self.experiment + experiment.train(episodes=5) + returns = experiment.test(episodes=4) + expected_mean = 26.25 + np.testing.assert_equal(np.mean(returns), expected_mean) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] np.testing.assert_equal( - self.experiment._logger.data["summary/test_returns/mean"]["values"], - np.array([np.mean(returns)]), + metric_dict["test/returns/mean"], + np.array([expected_mean]), + ) + np.testing.assert_almost_equal( + metric_dict["test/returns/std"], np.array([6.869]), decimal=3 + ) + np.testing.assert_equal( + metric_dict["test/returns/max"], + np.array([34.0]), ) np.testing.assert_equal( - self.experiment._logger.data["summary/test_returns/std"]["values"], - np.array([np.std(returns)]), + metric_dict["test/returns/min"], + np.array([18.0]), ) def test_writes_test_episode_length(self): - self.experiment.train(episodes=5) - returns = self.experiment.test(episodes=4) - self.assertEqual(len(returns), 4) + experiment = self.experiment + experiment.train(episodes=5) + returns = experiment.test(episodes=4) + expected_mean = 26.25 + np.testing.assert_equal(np.mean(returns), expected_mean) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] + np.testing.assert_equal( + metric_dict["test/episode_length/mean"], + np.array([expected_mean]), + ) + np.testing.assert_almost_equal( + metric_dict["test/episode_length/std"], np.array([6.869]), decimal=3 + ) np.testing.assert_equal( - self.experiment._logger.data["summary/test_episode_length/mean"]["values"], - np.array([np.mean(returns)]), + metric_dict["test/episode_length/max"], + np.array([34.0]), ) np.testing.assert_equal( - self.experiment._logger.data["summary/test_episode_length/std"]["values"], - np.array([np.std(returns)]), + metric_dict["test/episode_length/min"], + np.array([18.0]), ) def test_writes_loss(self): diff --git a/all/experiments/plots.py b/all/experiments/plots.py index c3e69f37..400c6266 100644 --- a/all/experiments/plots.py +++ b/all/experiments/plots.py @@ -26,15 +26,12 @@ def add_data(agent, env, file): data[env][agent] = np.genfromtxt(file, delimiter=",").reshape((-1, 3)) for agent_dir in os.listdir(runs_dir): - agent = agent_dir.split("_")[0] + agent, env, *_ = agent_dir.split("_") agent_path = os.path.join(runs_dir, agent_dir) if os.path.isdir(agent_path): - for env in os.listdir(agent_path): - env_path = os.path.join(agent_path, env) - if os.path.isdir(env_path): - returns100path = os.path.join(env_path, "returns100.csv") - if os.path.exists(returns100path): - add_data(agent, env, returns100path) + returns100path = os.path.join(agent_path, "returns100.csv") + if os.path.exists(returns100path): + add_data(agent, env, returns100path) return data diff --git a/all/experiments/run_experiment.py b/all/experiments/run_experiment.py index a89d3e9c..d3f61d4f 100644 --- a/all/experiments/run_experiment.py +++ b/all/experiments/run_experiment.py @@ -14,7 +14,6 @@ def run_experiment( save_freq=100, test_episodes=100, verbose=True, - logger="tensorboard", ): if not isinstance(agents, list): agents = [agents] @@ -35,7 +34,6 @@ def run_experiment( render=render, save_freq=save_freq, verbose=verbose, - logger=logger, ) experiment.save() experiment.train(frames=frames) diff --git a/all/experiments/single_env_experiment.py b/all/experiments/single_env_experiment.py index 23e666db..f4ad1cb8 100644 --- a/all/experiments/single_env_experiment.py +++ b/all/experiments/single_env_experiment.py @@ -2,7 +2,7 @@ import numpy as np -from all.logging import CometLogger, ExperimentLogger +from all.logging import ExperimentLogger from .experiment import Experiment @@ -21,11 +21,10 @@ def __init__( render=False, save_freq=100, verbose=True, - logger="tensorboard", ): self._name = name if name is not None else preset.name super().__init__( - self._make_logger(logdir, self._name, env.name, verbose, logger), quiet + self._make_logger(logdir, self._name, env.name, verbose), quiet ) self._logdir = logdir self._preset = preset @@ -34,7 +33,7 @@ def __init__( self._render = render self._frame = 1 self._episode = 1 - self._save_freq = 100 + self._save_freq = save_freq if render: self._env.render(mode="human") @@ -116,11 +115,7 @@ def _run_test_episode(self, test_agent): def _done(self, frames, episodes): return self._frame > frames or self._episode > episodes - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): - if logger == "comet": - return CometLogger( - self, agent_name, env_name, verbose=verbose, logdir=logdir - ) + def _make_logger(self, logdir, agent_name, env_name, verbose): return ExperimentLogger( self, agent_name, env_name, verbose=verbose, logdir=logdir ) diff --git a/all/experiments/single_env_experiment_test.py b/all/experiments/single_env_experiment_test.py index 1a7e50f5..7a3de97a 100644 --- a/all/experiments/single_env_experiment_test.py +++ b/all/experiments/single_env_experiment_test.py @@ -12,6 +12,7 @@ class MockLogger(Logger): def __init__(self, experiment, label, verbose): self.data = {} + self.hparams = [] self.label = label self.verbose = verbose self.experiment = experiment @@ -34,9 +35,12 @@ def add_info(self, name, value, step="frame"): def add_schedule(self, name, value, step="frame"): pass - def add_summary(self, name, mean, std, step="frame"): - self._add_scalar("summary/" + name + "/mean", mean, step) - self._add_scalar("summary/" + name + "/std", std, step) + def add_summary(self, name, values, step="frame"): + self._add_scalar("summary/" + name + "/mean", np.mean(values), step) + self._add_scalar("summary/" + name + "/std", np.std(values), step) + + def add_hparams(self, hparam_dict, metric_dict, step="frame"): + self.hparams.append((hparam_dict, metric_dict, step)) def _get_step(self, _type): if _type == "frame": @@ -50,7 +54,7 @@ def close(self): class MockExperiment(SingleEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): self._logger = MockLogger(self, agent_name + "_" + env_name, verbose) return self._logger @@ -77,26 +81,14 @@ def test_writes_training_returns_frame(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=3) np.testing.assert_equal( - experiment._logger.data["eval/returns/frame"]["values"], + experiment._logger.data["eval/returns"]["values"], np.array([22.0, 17.0, 28.0]), ) np.testing.assert_equal( - experiment._logger.data["eval/returns/frame"]["steps"], + experiment._logger.data["eval/returns"]["steps"], np.array([23, 40, 68]), ) - def test_writes_training_returns_episode(self): - experiment = MockExperiment(self.make_preset(), self.env, quiet=True) - experiment.train(episodes=3) - np.testing.assert_equal( - experiment._logger.data["eval/returns/episode"]["values"], - np.array([22.0, 17.0, 28.0]), - ) - np.testing.assert_equal( - experiment._logger.data["eval/returns/episode"]["steps"], - np.array([1, 2, 3]), - ) - def test_writes_training_episode_length(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=3) @@ -109,25 +101,36 @@ def test_writes_training_episode_length(self): np.array([23, 40, 68]), ) + def test_writes_hparams(self): + experiment = MockExperiment(self.make_preset(), self.env, quiet=True) + experiment.train(episodes=5) + returns = experiment.test(episodes=4) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] + self.assertDictEqual(hparam_dict, experiment._preset.hyperparameters) + self.assertEqual(step, "frame") + def test_writes_test_returns(self): experiment = MockExperiment(self.make_preset(), self.env, quiet=True) experiment.train(episodes=5) returns = experiment.test(episodes=4) expected_mean = 8.5 - expected_std = 0.5 np.testing.assert_equal(np.mean(returns), expected_mean) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] np.testing.assert_equal( - experiment._logger.data["summary/test_returns/mean"]["values"], + metric_dict["test/returns/mean"], np.array([expected_mean]), ) - np.testing.assert_approx_equal( - np.array(experiment._logger.data["summary/test_returns/std"]["values"]), - np.array([expected_std]), - significant=4, + np.testing.assert_equal( + metric_dict["test/returns/std"], + np.array([0.5]), ) np.testing.assert_equal( - experiment._logger.data["summary/test_returns/mean"]["steps"], - np.array([93]), + metric_dict["test/returns/max"], + np.array([9.0]), + ) + np.testing.assert_equal( + metric_dict["test/returns/min"], + np.array([8.0]), ) def test_writes_test_episode_length(self): @@ -135,20 +138,23 @@ def test_writes_test_episode_length(self): experiment.train(episodes=5) returns = experiment.test(episodes=4) expected_mean = 8.5 - expected_std = 0.5 np.testing.assert_equal(np.mean(returns), expected_mean) + hparam_dict, metric_dict, step = experiment._logger.hparams[0] np.testing.assert_equal( - experiment._logger.data["summary/test_returns/mean"]["values"], + metric_dict["test/episode_length/mean"], np.array([expected_mean]), ) - np.testing.assert_approx_equal( - np.array(experiment._logger.data["summary/test_returns/std"]["values"]), - np.array([expected_std]), - significant=4, + np.testing.assert_equal( + metric_dict["test/episode_length/std"], + np.array([0.5]), + ) + np.testing.assert_equal( + metric_dict["test/episode_length/max"], + np.array([9.0]), ) np.testing.assert_equal( - experiment._logger.data["summary/test_returns/mean"]["steps"], - np.array([93]), + metric_dict["test/episode_length/min"], + np.array([8.0]), ) def test_writes_loss(self): diff --git a/all/logging/__init__.py b/all/logging/__init__.py index df81429c..fc635bb4 100644 --- a/all/logging/__init__.py +++ b/all/logging/__init__.py @@ -1,5 +1,5 @@ from ._logger import Logger from .dummy import DummyLogger -from .experiment import CometLogger, ExperimentLogger +from .experiment import ExperimentLogger -__all__ = ["Logger", "DummyLogger", "ExperimentLogger", "CometLogger"] +__all__ = ["Logger", "DummyLogger", "ExperimentLogger"] diff --git a/all/logging/_logger.py b/all/logging/_logger.py index 728060b0..70daeb2a 100644 --- a/all/logging/_logger.py +++ b/all/logging/_logger.py @@ -60,6 +60,23 @@ def add_schedule(self, name, value, step="frame"): step (str, optional): Which step to use (e.g., "frame" or "episode") """ + @abstractmethod + def add_hparams(self, hparam_dict, metric_dict, step="frame"): + """ + Logs metrics for a given set of hyperparameters. + Usually this should be called once at the end of a run in order to + log the final results for hyperparameters, though it can be called + multiple times throughout training. However, it should be called infrequently. + + Args: + hparam_dict (dict): A dictionary of hyperparameters. + Only parameters of type (int, float, str, bool, torch.Tensor) + will be logged. + metric_dict (dict): A dictionary of metrics to record. + step (str, optional): Which step to use (e.g., "frame" or "episode") + """ + pass + @abstractmethod def close(self): """ diff --git a/all/logging/dummy.py b/all/logging/dummy.py index a7430105..c3db81f8 100644 --- a/all/logging/dummy.py +++ b/all/logging/dummy.py @@ -4,19 +4,22 @@ class DummyLogger(Logger): """A default Logger object that performs no logging and has no side effects.""" - def add_summary(self, name, mean, std, step="frame"): + def add_eval(self, name, value, step="frame"): + pass + + def add_info(self, name, value, step="frame"): pass def add_loss(self, name, value, step="frame"): pass - def add_eval(self, name, value, step="frame"): + def add_schedule(self, name, value, step="frame"): pass - def add_info(self, name, value, step="frame"): + def add_summary(self, name, values, step="frame"): pass - def add_schedule(self, name, value, step="frame"): + def add_hparams(self, hparam_dict, metric_dict, step="frame"): pass def close(self): diff --git a/all/logging/experiment.py b/all/logging/experiment.py index d4fc5279..7c08bc64 100644 --- a/all/logging/experiment.py +++ b/all/logging/experiment.py @@ -1,8 +1,9 @@ import csv import os -import subprocess from datetime import datetime +import numpy as np +import torch from torch.utils.tensorboard import SummaryWriter from ._logger import Logger @@ -25,25 +26,26 @@ class ExperimentLogger(SummaryWriter, Logger): def __init__(self, experiment, agent_name, env_name, verbose=True, logdir="runs"): self.env_name = env_name current_time = datetime.now().strftime("%Y-%m-%d_%H:%M:%S_%f") - dir_name = "%s_%s_%s" % (agent_name, COMMIT_HASH, current_time) - os.makedirs(os.path.join(logdir, dir_name, env_name)) + dir_name = f"{agent_name}_{env_name}_{current_time}" + os.makedirs(os.path.join(logdir, dir_name)) self.log_dir = os.path.join(logdir, dir_name) self._experiment = experiment - self._verbose = not verbose + self._verbose = verbose super().__init__(log_dir=self.log_dir) - def add_summary(self, name, mean, std, step="frame"): - super().add_scalar( - "{}/summary/{}/mean".format(self.env_name, name), mean, self._get_step(step) - ) - super().add_scalar( - "{}/summary/{}/std".format(self.env_name, name), std, self._get_step(step) - ) + def add_summary(self, name, values, step="frame"): + aggregators = ["mean", "std", "max", "min"] + metrics = { + aggregator: getattr(np, aggregator)(values) for aggregator in aggregators + } + for aggregator, value in metrics.items(): + super().add_scalar( + f"summary/{name}/{aggregator}", value, self._get_step(value) + ) - with open( - os.path.join(self.log_dir, self.env_name, name + ".csv"), "a" - ) as csvfile: - csv.writer(csvfile).writerow([self._get_step(step), mean, std]) + # log summary statistics to file + with open(os.path.join(self.log_dir, name + ".csv"), "a") as csvfile: + csv.writer(csvfile).writerow([self._get_step(step), *metrics.values()]) def add_loss(self, name, value, step="frame"): self._add_scalar("loss/" + name, value, step) @@ -57,85 +59,19 @@ def add_info(self, name, value, step="frame"): def add_schedule(self, name, value, step="frame"): self._add_scalar("schedule/" + name, value, step) - def _add_scalar(self, name, value, step="frame"): - if not self._verbose: - super().add_scalar(self.env_name + "/" + name, value, self._get_step(step)) - - def _get_step(self, _type): - if _type == "frame": - return self._experiment.frame - if _type == "episode": - return self._experiment.episode - return _type - - def close(self): - pass - - -class CometLogger(Logger): - """ - A Logger object to be used by all.experiments.Experiment. - Writes logs using comet.ml Requires an API key to be stored in .comet.config or as an environment variable. - Look at https://www.comet.ml/docs/python-sdk/advanced/#python-configuration for more info. - Args: - experiment (all.experiments.Experiment): The Experiment associated with the Logger object. - agent_name (str): The name of the Agent the Experiment is being performed on - env_name (str): The name of the environment the Experiment is being performed in - loss (bool, optional): Whether or not to log loss/scheduling metrics, or only evaluation and summary metrics. - logdir (str): The directory where run information is stored. - """ - - def __init__(self, experiment, agent_name, env_name, verbose=True, logdir="runs"): - self.env_name = env_name - self._experiment = experiment - self._verbose = not verbose - - try: - from comet_ml import Experiment - except ImportError as e: - print( - "Failed to import comet_ml. CometLogger requires that comet_ml be installed" - ) - raise e - try: - self._comet = Experiment(project_name=env_name) - except ImportError as e: - print( - "See https://www.comet.ml/docs/python-sdk/warnings-errors/ for more info on this error." - ) - raise e - except ValueError as e: - print( - "See https://www.comet.ml/docs/python-sdk/advanced/#python-configuration for more info on this error." - ) - raise e - - self._comet.set_name(agent_name) - self.log_dir = logdir - - def add_summary(self, name, mean, std, step="frame"): - self._comet.log_metric( - "{}/summary/{}/mean".format(self.env_name, name), mean, self._get_step(step) - ) - self._comet.log_metric( - "{}/summary/{}/std".format(self.env_name, name), std, self._get_step(step) + def add_hparams(self, hparam_dict, metric_dict, step="frame"): + allowed_types = (int, float, str, bool, torch.Tensor) + hparams = {k: v for k, v in hparam_dict.items() if isinstance(v, allowed_types)} + metrics = { + f"{self.env_name}/{metric}": value for metric, value in metric_dict.items() + } + super().add_hparams( + hparams, metrics, run_name=".", global_step=self._get_step("frame") ) - def add_loss(self, name, value, step="frame"): - self._add_scalar("loss/" + name, value, step) - - def add_eval(self, name, value, step="frame"): - self._add_scalar("eval/" + name, value, step) - - def add_info(self, name, value, step="frame"): - self._add_scalar("info/" + name, value, step) - - def add_schedule(self, name, value, step="frame"): - self._add_scalar("schedule/" + name, value, step) - def _add_scalar(self, name, value, step="frame"): - if not self._verbose: - self._comet.log_metric(name, value, self._get_step(step)) + if self._verbose: + super().add_scalar(name, value, self._get_step(step)) def _get_step(self, _type): if _type == "frame": @@ -145,20 +81,4 @@ def _get_step(self, _type): return _type def close(self): - self._comet.end() - - -def get_commit_hash(): - try: - result = subprocess.run( - ["git", "rev-parse", "--short", "HEAD"], - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - check=False, - ) - return result.stdout.decode("utf-8").rstrip() - except Exception: - return "" - - -COMMIT_HASH = get_commit_hash() + pass diff --git a/all/policies/softmax_test.py b/all/policies/softmax_test.py index a543ac15..1821f2a6 100644 --- a/all/policies/softmax_test.py +++ b/all/policies/softmax_test.py @@ -23,7 +23,7 @@ def test_run(self): dist1 = self.policy(state1) action1 = dist1.sample() log_prob1 = dist1.log_prob(action1) - self.assertEqual(action1.item(), 0) + self.assertEqual(action1.item(), 2) state2 = State(torch.randn(1, STATE_DIM)) dist2 = self.policy(state2) @@ -37,12 +37,12 @@ def test_run(self): state3 = State(torch.randn(1, STATE_DIM)) dist3 = self.policy(state3) action3 = dist3.sample() - self.assertEqual(action3.item(), 2) + self.assertEqual(action3.item(), 0) def test_multi_action(self): states = State(torch.randn(3, STATE_DIM)) actions = self.policy(states).sample() - tt.assert_equal(actions, torch.tensor([2, 2, 0])) + tt.assert_equal(actions, torch.tensor([2, 0, 0])) def test_list(self): torch.manual_seed(1) @@ -50,7 +50,7 @@ def test_list(self): dist = self.policy(states) actions = dist.sample() log_probs = dist.log_prob(actions) - tt.assert_equal(actions, torch.tensor([1, 2, 1])) + tt.assert_equal(actions, torch.tensor([0, 0, 2])) loss = -(torch.tensor([[1, 2, 3]]) * log_probs).mean() self.policy.reinforce(loss) @@ -64,17 +64,17 @@ def loss(log_probs): # notice the values increase with each successive reinforce log_probs = self.policy(states).log_prob(actions) tt.assert_almost_equal( - log_probs, torch.tensor([-0.84, -0.62, -0.757]), decimal=3 + log_probs, torch.tensor([-0.84, -1.325, -0.757]), decimal=3 ) self.policy.reinforce(loss(log_probs)) log_probs = self.policy(states).log_prob(actions) tt.assert_almost_equal( - log_probs, torch.tensor([-0.811, -0.561, -0.701]), decimal=3 + log_probs, torch.tensor([-0.855, -1.278, -0.726]), decimal=3 ) self.policy.reinforce(loss(log_probs)) log_probs = self.policy(states).log_prob(actions) tt.assert_almost_equal( - log_probs, torch.tensor([-0.785, -0.51, -0.651]), decimal=3 + log_probs, torch.tensor([-0.871, -1.234, -0.698]), decimal=3 ) def test_eval(self): @@ -88,7 +88,7 @@ def test_eval(self): decimal=3, ) best = self.policy.eval(states).sample() - tt.assert_equal(best, torch.tensor([2, 2, 0])) + tt.assert_equal(best, torch.tensor([2, 0, 0])) if __name__ == "__main__": diff --git a/all/scripts/train.py b/all/scripts/train.py index d16e44c7..5d02ab4a 100644 --- a/all/scripts/train.py +++ b/all/scripts/train.py @@ -32,11 +32,6 @@ def train( "--render", action="store_true", default=False, help="Render the environment." ) parser.add_argument("--logdir", default="runs", help="The base logging directory.") - parser.add_argument( - "--logger", - default="tensorboard", - help="The backend used for tracking experiment metrics.", - ) parser.add_argument( "--save_freq", default=100, help="How often to save the model, in episodes." ) @@ -65,6 +60,5 @@ def train( args.frames, render=args.render, logdir=args.logdir, - logger=args.logger, save_freq=args.save_freq, ) diff --git a/all/scripts/train_multiagent_atari.py b/all/scripts/train_multiagent_atari.py index 255f8c90..33186f2b 100644 --- a/all/scripts/train_multiagent_atari.py +++ b/all/scripts/train_multiagent_atari.py @@ -34,11 +34,6 @@ def main(): parser.add_argument( "--render", action="store_true", default=False, help="Render the environment." ) - parser.add_argument( - "--logger", - default="tensorboard", - help="The backend used for tracking experiment metrics.", - ) args = parser.parse_args() env = MultiagentAtariEnv(args.env, device=args.device) @@ -59,10 +54,8 @@ def main(): experiment = MultiagentEnvExperiment( IndependentMultiagentPreset("Independent", args.device, presets), env, - verbose=False, save_freq=args.save_freq, render=args.render, - logger=args.logger, ) experiment.save() experiment.train(frames=args.frames) diff --git a/integration/validate_agent.py b/integration/validate_agent.py index 1b786efe..6d56fdac 100644 --- a/integration/validate_agent.py +++ b/integration/validate_agent.py @@ -10,19 +10,19 @@ class TestSingleEnvExperiment(SingleEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): os.makedirs(logdir, exist_ok=True) return DummyLogger() class TestParallelEnvExperiment(ParallelEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): os.makedirs(logdir, exist_ok=True) return DummyLogger() class TestMultiagentEnvExperiment(MultiagentEnvExperiment): - def _make_logger(self, logdir, agent_name, env_name, verbose, logger): + def _make_logger(self, logdir, agent_name, env_name, verbose): os.makedirs(logdir, exist_ok=True) return DummyLogger() diff --git a/setup.py b/setup.py index da27aca9..257f6637 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,6 @@ "sphinx-rtd-theme>=0.5.0", # documentation theme "sphinx-automodapi>=0.13", # autogenerate docs for modules ], - "comet": [ - "comet-ml>=3.28.3", # experiment tracking using Comet.ml - ], } extras["all"] = ( @@ -45,7 +42,6 @@ + extras["mujoco"] + extras["pybullet"] + extras["ma-atari"] - + extras["comet"] ) extras["dev"] = extras["all"] + extras["test"] + extras["docs"] @@ -79,7 +75,7 @@ "numpy>=1.22.3", # math library "matplotlib>=3.5.1", # plotting library "opencv-python-headless>=4.0.0", # used by atari wrappers - "torch>=1.11.0", # core deep learning library + "torch>=2.0.0", # core deep learning library "tensorboard>=2.8.0", # logging and visualization "cloudpickle>=2.0.0", # used to copy environments ], From dc295ab5577022a6d9665d1bf46e9461303dcca2 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Tue, 27 Feb 2024 09:07:54 -0500 Subject: [PATCH 16/26] remove env name from hparams tag (#309) --- all/logging/experiment.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/all/logging/experiment.py b/all/logging/experiment.py index 7c08bc64..6a9a388f 100644 --- a/all/logging/experiment.py +++ b/all/logging/experiment.py @@ -62,11 +62,8 @@ def add_schedule(self, name, value, step="frame"): def add_hparams(self, hparam_dict, metric_dict, step="frame"): allowed_types = (int, float, str, bool, torch.Tensor) hparams = {k: v for k, v in hparam_dict.items() if isinstance(v, allowed_types)} - metrics = { - f"{self.env_name}/{metric}": value for metric, value in metric_dict.items() - } super().add_hparams( - hparams, metrics, run_name=".", global_step=self._get_step("frame") + hparams, metric_dict, run_name=".", global_step=self._get_step("frame") ) def _add_scalar(self, name, value, step="frame"): From 9d064823e624106a31118603c62f0cd36e1e09a1 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sat, 2 Mar 2024 11:45:45 -0500 Subject: [PATCH 17/26] SAC/DDPG tweaks (#312) * sample actions for SacTest agent * update sac default hyperparameters * update ddpg default hyperparameters * fix add_summary bug * use more standard hyperparameters for ddpg and sac * remove time body and adjust discount factor * adjust sac default hyperparameters * run formatter --- all/agents/sac.py | 3 +- all/logging/experiment.py | 2 +- all/policies/soft_deterministic.py | 6 +-- all/presets/continuous/__init__.py | 1 - all/presets/continuous/ddpg.py | 33 +++++++-------- all/presets/continuous/models/__init__.py | 10 ++--- all/presets/continuous/ppo.py | 45 ++++++++++---------- all/presets/continuous/sac.py | 50 +++++++++++------------ 8 files changed, 70 insertions(+), 80 deletions(-) diff --git a/all/agents/sac.py b/all/agents/sac.py index ef537c39..2143a491 100644 --- a/all/agents/sac.py +++ b/all/agents/sac.py @@ -134,4 +134,5 @@ def __init__(self, policy): self.policy = policy def act(self, state): - return self.policy.eval(state) + action, log_prob = self.policy.eval(state) + return action diff --git a/all/logging/experiment.py b/all/logging/experiment.py index 6a9a388f..ea7b936d 100644 --- a/all/logging/experiment.py +++ b/all/logging/experiment.py @@ -40,7 +40,7 @@ def add_summary(self, name, values, step="frame"): } for aggregator, value in metrics.items(): super().add_scalar( - f"summary/{name}/{aggregator}", value, self._get_step(value) + f"summary/{name}/{aggregator}", value, self._get_step(step) ) # log summary statistics to file diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py index b3075e3e..74656f08 100644 --- a/all/policies/soft_deterministic.py +++ b/all/policies/soft_deterministic.py @@ -36,10 +36,8 @@ def __init__(self, model, space): def forward(self, state): outputs = super().forward(state) normal = self._normal(outputs) - if self.training: - action, log_prob = self._sample(normal) - return action, log_prob - return self._squash(normal.loc) + action, log_prob = self._sample(normal) + return action, log_prob def _normal(self, outputs): means = outputs[..., 0 : self._action_dim] diff --git a/all/presets/continuous/__init__.py b/all/presets/continuous/__init__.py index 7b0a9fc4..91b40db6 100644 --- a/all/presets/continuous/__init__.py +++ b/all/presets/continuous/__init__.py @@ -1,4 +1,3 @@ -# from .actor_critic import actor_critic from .ddpg import ddpg from .ppo import ppo from .sac import sac diff --git a/all/presets/continuous/ddpg.py b/all/presets/continuous/ddpg.py index 62c130dd..4762d252 100644 --- a/all/presets/continuous/ddpg.py +++ b/all/presets/continuous/ddpg.py @@ -5,7 +5,6 @@ from all.agents import DDPG, DDPGTestAgent from all.approximation import PolyakTarget, QContinuous -from all.bodies import TimeFeature from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.policies import DeterministicPolicy @@ -15,12 +14,12 @@ default_hyperparameters = { # Common settings - "discount_factor": 0.98, + "discount_factor": 0.99, # Adam optimizer settings - "lr_q": 1e-3, - "lr_pi": 1e-3, + "lr_q": 3e-4, + "lr_pi": 3e-4, # Training settings - "minibatch_size": 100, + "minibatch_size": 256, "update_frequency": 1, "polyak_rate": 0.005, # Replay Buffer settings @@ -94,18 +93,16 @@ def agent(self, logger=DummyLogger(), train_steps=float("inf")): self.hyperparameters["replay_buffer_size"], device=self.device ) - return TimeFeature( - DDPG( - q, - policy, - replay_buffer, - self.action_space, - noise=self.hyperparameters["noise"], - replay_start_size=self.hyperparameters["replay_start_size"], - discount_factor=self.hyperparameters["discount_factor"], - update_frequency=self.hyperparameters["update_frequency"], - minibatch_size=self.hyperparameters["minibatch_size"], - ) + return DDPG( + q, + policy, + replay_buffer, + self.action_space, + noise=self.hyperparameters["noise"], + replay_start_size=self.hyperparameters["replay_start_size"], + discount_factor=self.hyperparameters["discount_factor"], + update_frequency=self.hyperparameters["update_frequency"], + minibatch_size=self.hyperparameters["minibatch_size"], ) def test_agent(self): @@ -114,7 +111,7 @@ def test_agent(self): None, self.action_space, ) - return TimeFeature(DDPGTestAgent(policy)) + return DDPGTestAgent(policy) ddpg = PresetBuilder("ddpg", default_hyperparameters, DDPGContinuousPreset) diff --git a/all/presets/continuous/models/__init__.py b/all/presets/continuous/models/__init__.py index 937ca44f..3d815e69 100644 --- a/all/presets/continuous/models/__init__.py +++ b/all/presets/continuous/models/__init__.py @@ -14,7 +14,7 @@ def fc_q(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Float(), - nn.Linear(env.state_space.shape[0] + env.action_space.shape[0] + 1, hidden1), + nn.Linear(env.state_space.shape[0] + env.action_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), @@ -25,7 +25,7 @@ def fc_q(env, hidden1=400, hidden2=300): def fc_v(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Float(), - nn.Linear(env.state_space.shape[0] + 1, hidden1), + nn.Linear(env.state_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), @@ -36,7 +36,7 @@ def fc_v(env, hidden1=400, hidden2=300): def fc_deterministic_policy(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Float(), - nn.Linear(env.state_space.shape[0] + 1, hidden1), + nn.Linear(env.state_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), @@ -47,7 +47,7 @@ def fc_deterministic_policy(env, hidden1=400, hidden2=300): def fc_soft_policy(env, hidden1=400, hidden2=300): return nn.Sequential( nn.Float(), - nn.Linear(env.state_space.shape[0] + 1, hidden1), + nn.Linear(env.state_space.shape[0], hidden1), nn.ReLU(), nn.Linear(hidden1, hidden2), nn.ReLU(), @@ -60,7 +60,7 @@ def __init__(self, env, hidden1=400, hidden2=300): super().__init__() self.model = nn.Sequential( nn.Float(), - nn.Linear(env.state_space.shape[0] + 1, hidden1), + nn.Linear(env.state_space.shape[0], hidden1), nn.Tanh(), nn.Linear(hidden1, hidden2), nn.Tanh(), diff --git a/all/presets/continuous/ppo.py b/all/presets/continuous/ppo.py index dbf73568..6ba761fd 100644 --- a/all/presets/continuous/ppo.py +++ b/all/presets/continuous/ppo.py @@ -5,7 +5,6 @@ from all.agents import PPO, PPOTestAgent from all.approximation import Identity, VNetwork -from all.bodies import TimeFeature from all.logging import DummyLogger from all.optim import LinearScheduler from all.policies import GaussianPolicy @@ -15,7 +14,7 @@ default_hyperparameters = { # Common settings - "discount_factor": 0.98, + "discount_factor": 0.99, # Adam optimizer settings "lr": 3e-4, # Adam learning rate "eps": 1e-5, # Adam stability @@ -112,35 +111,33 @@ def agent(self, logger=DummyLogger(), train_steps=float("inf")): scheduler=CosineAnnealingLR(policy_optimizer, n_updates), ) - return TimeFeature( - PPO( - features, - v, - policy, - epsilon=LinearScheduler( - self.hyperparameters["clip_initial"], - self.hyperparameters["clip_final"], - 0, - n_updates, - name="clip", - logger=logger, - ), - epochs=self.hyperparameters["epochs"], - minibatches=self.hyperparameters["minibatches"], - n_envs=self.hyperparameters["n_envs"], - n_steps=self.hyperparameters["n_steps"], - discount_factor=self.hyperparameters["discount_factor"], - lam=self.hyperparameters["lam"], - entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"], + return PPO( + features, + v, + policy, + epsilon=LinearScheduler( + self.hyperparameters["clip_initial"], + self.hyperparameters["clip_final"], + 0, + n_updates, + name="clip", logger=logger, - ) + ), + epochs=self.hyperparameters["epochs"], + minibatches=self.hyperparameters["minibatches"], + n_envs=self.hyperparameters["n_envs"], + n_steps=self.hyperparameters["n_steps"], + discount_factor=self.hyperparameters["discount_factor"], + lam=self.hyperparameters["lam"], + entropy_loss_scaling=self.hyperparameters["entropy_loss_scaling"], + logger=logger, ) def test_agent(self): policy = GaussianPolicy( copy.deepcopy(self.policy_model), space=self.action_space ) - return TimeFeature(PPOTestAgent(Identity(self.device), policy)) + return PPOTestAgent(Identity(self.device), policy) def parallel_test_agent(self): return self.test_agent() diff --git a/all/presets/continuous/sac.py b/all/presets/continuous/sac.py index c33aeddf..460ec2a8 100644 --- a/all/presets/continuous/sac.py +++ b/all/presets/continuous/sac.py @@ -5,7 +5,6 @@ from all.agents import SAC, SACTestAgent from all.approximation import PolyakTarget, QContinuous -from all.bodies import TimeFeature from all.logging import DummyLogger from all.memory import ExperienceReplayBuffer from all.policies.soft_deterministic import SoftDeterministicPolicy @@ -15,20 +14,20 @@ default_hyperparameters = { # Common settings - "discount_factor": 0.98, + "discount_factor": 0.99, # Adam optimizer settings "lr_q": 1e-3, - "lr_pi": 1e-4, + "lr_pi": 3e-4, # Training settings - "minibatch_size": 100, - "update_frequency": 2, + "minibatch_size": 256, + "update_frequency": 1, "polyak_rate": 0.005, # Replay Buffer settings "replay_start_size": 5000, "replay_buffer_size": 1e6, # Exploration settings "temperature_initial": 0.1, - "lr_temperature": 1e-5, + "lr_temperature_scaling": 3e-5, "entropy_backups": True, "entropy_target_scaling": 1.0, # Model construction @@ -111,32 +110,31 @@ def agent(self, logger=DummyLogger(), train_steps=float("inf")): self.hyperparameters["replay_buffer_size"], device=self.device ) - return TimeFeature( - SAC( - policy, - q1, - q2, - replay_buffer, - temperature_initial=self.hyperparameters["temperature_initial"], - entropy_backups=self.hyperparameters["entropy_backups"], - entropy_target=( - -self.action_space.shape[0] - * self.hyperparameters["entropy_target_scaling"] - ), - lr_temperature=self.hyperparameters["lr_temperature"], - replay_start_size=self.hyperparameters["replay_start_size"], - discount_factor=self.hyperparameters["discount_factor"], - update_frequency=self.hyperparameters["update_frequency"], - minibatch_size=self.hyperparameters["minibatch_size"], - logger=logger, - ) + return SAC( + policy, + q1, + q2, + replay_buffer, + temperature_initial=self.hyperparameters["temperature_initial"], + entropy_backups=self.hyperparameters["entropy_backups"], + entropy_target=( + -self.action_space.shape[0] + * self.hyperparameters["entropy_target_scaling"] + ), + lr_temperature=self.hyperparameters["lr_temperature_scaling"] + / self.action_space.shape[0], + replay_start_size=self.hyperparameters["replay_start_size"], + discount_factor=self.hyperparameters["discount_factor"], + update_frequency=self.hyperparameters["update_frequency"], + minibatch_size=self.hyperparameters["minibatch_size"], + logger=logger, ) def test_agent(self): policy = SoftDeterministicPolicy( copy.deepcopy(self.policy_model), space=self.action_space ) - return TimeFeature(SACTestAgent(policy)) + return SACTestAgent(policy) sac = PresetBuilder("sac", default_hyperparameters, SACContinuousPreset) From 08718826d7e7cf45826d5732e17058de5ac291fc Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sat, 2 Mar 2024 17:20:57 -0500 Subject: [PATCH 18/26] fix duplicate env handling (#314) --- all/environments/gym.py | 21 +++++++++++++++++++- all/environments/gym_test.py | 32 +++++++++++++++++++++++++++++++ all/environments/mujoco.py | 7 ++++--- all/environments/mujoco_test.py | 9 +++++++++ all/environments/pybullet.py | 1 - all/environments/pybullet_test.py | 10 ++++++++++ 6 files changed, 75 insertions(+), 5 deletions(-) diff --git a/all/environments/gym.py b/all/environments/gym.py index 994fcfd7..d1da1a3a 100644 --- a/all/environments/gym.py +++ b/all/environments/gym.py @@ -25,6 +25,7 @@ class GymEnvironment(Environment): name (str, optional): the name of the environment device (str, optional): the device on which tensors will be stored legacy_gym (str, optional): If true, calls gym.make() instead of gymnasium.make() + wrap_env (function, optional): A function that accepts an environment and returns a wrapped environment. **gym_make_kwargs: kwargs passed to gymnasium.make(id, **gym_make_kwargs) """ @@ -34,15 +35,23 @@ def __init__( device=torch.device("cpu"), name=None, legacy_gym=False, + wrap_env=None, **gym_make_kwargs ): + # handle gym vs. gymnasium distinction if legacy_gym: import gym self._gym = gym else: self._gym = gymnasium + + # construct the environment and apply wrapper self._env = self._gym.make(id, **gym_make_kwargs) + if wrap_env: + self._env = wrap_env(self._env) + + # initialize other instance variables self._id = id self._name = name if name else id self._state = None @@ -52,6 +61,16 @@ def __init__( self._info = None self._device = device + # store arguments for duplication + self._constructor_args = [id] + self._constructor_kwargs = { + "device": device, + "name": name, + "legacy_gym": legacy_gym, + "wrap_env": wrap_env, + **gym_make_kwargs, + } + @property def name(self): return self._name @@ -84,7 +103,7 @@ def seed(self, seed): def duplicate(self, n): return DuplicateEnvironment( [ - GymEnvironment(self._id, device=self.device, name=self._name) + GymEnvironment(*self._constructor_args, **self._constructor_kwargs) for _ in range(n) ] ) diff --git a/all/environments/gym_test.py b/all/environments/gym_test.py index 29abcd34..1107e3ac 100644 --- a/all/environments/gym_test.py +++ b/all/environments/gym_test.py @@ -1,5 +1,9 @@ import unittest +import gym +import gymnasium +import torch + from all.environments import GymEnvironment @@ -36,3 +40,31 @@ def test_step_until_done(self): self.assertEqual(state.reward, 1.0) self.assertTrue(state.done) self.assertEqual(state.mask, 0) + + def test_duplicate_default_params(self): + env = GymEnvironment("CartPole-v0") + duplicates = env.duplicate(5) + for duplicate in duplicates._envs: + self.assertEqual(duplicate._id, "CartPole-v0") + self.assertEqual(duplicate._name, "CartPole-v0") + self.assertEqual(env._device, torch.device("cpu")) + self.assertEqual(env._gym, gymnasium) + + def test_duplicate_custom_params(self): + class MyWrapper: + def __init__(self, env): + self._env = env + + env = GymEnvironment( + "CartPole-v0", + legacy_gym=True, + name="legacy_cartpole", + device="my_device", + wrap_env=MyWrapper, + ) + duplicates = env.duplicate(5) + for duplicate in duplicates._envs: + self.assertEqual(duplicate._id, "CartPole-v0") + self.assertEqual(duplicate._name, "legacy_cartpole") + self.assertEqual(env._device, "my_device") + self.assertEqual(env._gym, gym) diff --git a/all/environments/mujoco.py b/all/environments/mujoco.py index 88f609c4..be7ec547 100644 --- a/all/environments/mujoco.py +++ b/all/environments/mujoco.py @@ -10,6 +10,7 @@ class MujocoEnvironment(GymEnvironment): def __init__( self, id, device=torch.device("cpu"), name=None, no_info=True, **gym_make_kwargs ): - super().__init__(id, device=device, name=name, **gym_make_kwargs) - if no_info: - self._env = NoInfoWrapper(self._env) + wrap_env = NoInfoWrapper if no_info else None + super().__init__( + id, device=device, name=name, wrap_env=wrap_env, **gym_make_kwargs + ) diff --git a/all/environments/mujoco_test.py b/all/environments/mujoco_test.py index 26bbe544..1fbda626 100644 --- a/all/environments/mujoco_test.py +++ b/all/environments/mujoco_test.py @@ -47,3 +47,12 @@ def test_with_info(self): state = env.reset(seed=0) state = env.step(env.action_space.sample()) self.assertTrue("reward_forward" in state) + + def test_duplicate(self): + env = MujocoEnvironment("Ant-v4") + duplicates = env.duplicate(2) + for duplicate in duplicates._envs: + state = duplicate.reset() + self.assertFalse("reward_forward" in state) + state = duplicate.step(env.action_space.sample()) + self.assertFalse("reward_forward" in state) diff --git a/all/environments/pybullet.py b/all/environments/pybullet.py index 48afe200..1d792ac9 100644 --- a/all/environments/pybullet.py +++ b/all/environments/pybullet.py @@ -11,7 +11,6 @@ class PybulletEnvironment(GymEnvironment): } def __init__(self, name, **kwargs): - # import pybullet_envs # noqa: F401 if name in self.short_names: diff --git a/all/environments/pybullet_test.py b/all/environments/pybullet_test.py index 7dcda1dd..5312a131 100644 --- a/all/environments/pybullet_test.py +++ b/all/environments/pybullet_test.py @@ -1,5 +1,7 @@ import unittest +import torch + from all.environments import PybulletEnvironment @@ -32,3 +34,11 @@ def test_step(self): self.assertNotEqual(state.reward, 0.0) self.assertFalse(state.done) self.assertEqual(state.mask, 1) + + def test_duplicate(self): + env = PybulletEnvironment("cheetah") + duplicates = env.duplicate(3) + state = duplicates.reset() + self.assertEqual(state.shape, (3,)) + state = duplicates.step(torch.zeros(3, env.action_space.shape[0])) + self.assertEqual(state.shape, (3,)) From c2d02ededb9dfe5bd80434a97ccd9e0975eb9e46 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Sun, 3 Mar 2024 19:17:48 -0500 Subject: [PATCH 19/26] Upgrade dependencies (#315) * upgrade/adjust dependencies * adjust required depenencies * change maximum python version --- .github/workflows/python-package.yml | 2 +- .github/workflows/python-publish.yml | 2 +- setup.py | 46 ++++++++++++++-------------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d91aa04e..f51b86f9 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9] + python-version: [3.8, 3.11] steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8f0f5ca9..b69fe933 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: 3.9 + python-version: 3.12 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/setup.py b/setup.py index 257f6637..e47b2171 100644 --- a/setup.py +++ b/setup.py @@ -1,38 +1,38 @@ from setuptools import find_packages, setup -GYM_VERSION = "0.29.1" -PETTINGZOO_VERSION = "1.24.2" +GYMNASIUM_VERSION = "0.29.1" +PETTINGZOO_VERSION = "1.24.3" extras = { "atari": [ - "gymnasium[atari, accept-rom-license]~={}".format(GYM_VERSION), + f"gymnasium[atari, accept-rom-license]~={GYMNASIUM_VERSION}", ], "box2d": [ - "gymnasium[box2d]~={}".format(GYM_VERSION), + f"gymnasium[box2d]~={GYMNASIUM_VERSION}", ], "pybullet": [ - "pybullet>=3.2.2", + "pybullet>=3.2.2,<4", "gym>=0.10.0,<0.26.0", ], "mujoco": [ - "gymnasium[mujoco]~={}".format(GYM_VERSION), + f"gymnasium[mujoco]~={GYMNASIUM_VERSION}", ], "ma-atari": [ - "PettingZoo[atari, accept-rom-license]~={}".format(PETTINGZOO_VERSION), - "supersuit~=3.9.1", + f"PettingZoo[atari, accept-rom-license]~={PETTINGZOO_VERSION}", + "supersuit~=3.9.2", ], "test": [ - "black>=24.1.1", # linting/formatting - "isort>=5.13.2", # sort imports - "flake8>=7.0.0", # more linting - "torch-testing>=0.0.2", # pytorch assertion library + "black~=24.2.0", # linting/formatting + "isort~=5.13.2", # sort imports + "flake8~=7.0.0", # more linting + "torch-testing==0.0.2", # pytorch assertion library ], "docs": [ - "sphinx>=3.2.1", # documentation library - "sphinx-autobuild>=2020.9.1", # documentation live reload - "sphinx-rtd-theme>=0.5.0", # documentation theme - "sphinx-automodapi>=0.13", # autogenerate docs for modules + "sphinx~=3.2.1", # documentation library + "sphinx-autobuild~=2020.9.1", # documentation live reload + "sphinx-rtd-theme~=0.5.0", # documentation theme + "sphinx-automodapi~=0.13.0", # autogenerate docs for modules ], } @@ -71,13 +71,13 @@ ], }, install_requires=[ - "gymnasium~={}".format(GYM_VERSION), # common environment interface - "numpy>=1.22.3", # math library - "matplotlib>=3.5.1", # plotting library - "opencv-python-headless>=4.0.0", # used by atari wrappers - "torch>=2.0.0", # core deep learning library - "tensorboard>=2.8.0", # logging and visualization - "cloudpickle>=2.0.0", # used to copy environments + f"gymnasium~={GYMNASIUM_VERSION}", # common environment interface + "numpy~=1.22", # math library + "matplotlib~=3.7", # plotting library + "opencv-python-headless~=4.0", # used by atari wrappers + "torch~=2.0", # core deep learning library + "tensorboard~=2.8", # logging and visualization + "cloudpickle~=2.0", # used to copy environments ], extras_require=extras, ) From a12a828ede58d289cd8d3789264713f11c263a37 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Tue, 5 Mar 2024 11:39:38 -0500 Subject: [PATCH 20/26] fix plotter and log final summary at end of training (#320) --- all/experiments/parallel_env_experiment.py | 2 ++ all/experiments/plots.py | 2 +- all/experiments/single_env_experiment.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/all/experiments/parallel_env_experiment.py b/all/experiments/parallel_env_experiment.py index 7e38139b..33d975ac 100644 --- a/all/experiments/parallel_env_experiment.py +++ b/all/experiments/parallel_env_experiment.py @@ -92,6 +92,8 @@ def train(self, frames=np.inf, episodes=np.inf): returns[i] = 0 episode_lengths[i] = -1 self._episode += 1 + if len(self._returns100) > 0: + self._logger.add_summary("returns100", self._returns100) def test(self, episodes=100): test_agent = self._preset.parallel_test_agent() diff --git a/all/experiments/plots.py b/all/experiments/plots.py index 400c6266..579b16a6 100644 --- a/all/experiments/plots.py +++ b/all/experiments/plots.py @@ -23,7 +23,7 @@ def load_returns_100_data(runs_dir): def add_data(agent, env, file): if env not in data: data[env] = {} - data[env][agent] = np.genfromtxt(file, delimiter=",").reshape((-1, 3)) + data[env][agent] = np.genfromtxt(file, delimiter=",").reshape((-1, 5)) for agent_dir in os.listdir(runs_dir): agent, env, *_ = agent_dir.split("_") diff --git a/all/experiments/single_env_experiment.py b/all/experiments/single_env_experiment.py index f4ad1cb8..53e152d4 100644 --- a/all/experiments/single_env_experiment.py +++ b/all/experiments/single_env_experiment.py @@ -49,6 +49,8 @@ def episode(self): def train(self, frames=np.inf, episodes=np.inf): while not self._done(frames, episodes): self._run_training_episode() + if len(self._returns100) > 0: + self._logger.add_summary("returns100", self._returns100) def test(self, episodes=100): test_agent = self._preset.test_agent() From dec247d36c0fcc97be734e28a3f483e688a3db63 Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Thu, 7 Mar 2024 09:10:15 -0500 Subject: [PATCH 21/26] add swig setup dependency and remove unrar/swig from github scripts (#321) * add swig setup dependency and remove unrar/swig from github scripts * remove box2d dependency * remove box2d envs from tests --- .github/workflows/python-package.yml | 3 +-- .github/workflows/python-publish.yml | 2 -- all/presets/continuous_test.py | 6 +++--- all/scripts/train_continuous.py | 2 +- integration/continuous_test.py | 6 +++--- setup.py | 11 ++--------- 6 files changed, 10 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index f51b86f9..3e933d3e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -25,8 +25,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - sudo apt-get install swig - sudo apt-get install unrar + python -m pip install --upgrade pip pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu make install - name: Lint code diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index b69fe933..83c81f8d 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -25,8 +25,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - sudo apt-get install swig - sudo apt-get install unrar pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu pip install setuptools wheel make install diff --git a/all/presets/continuous_test.py b/all/presets/continuous_test.py index 5f3278a1..07ee1710 100644 --- a/all/presets/continuous_test.py +++ b/all/presets/continuous_test.py @@ -11,12 +11,12 @@ class TestContinuousPresets(unittest.TestCase): def setUp(self): - self.env = GymEnvironment("LunarLanderContinuous-v2") + self.env = GymEnvironment("MountainCarContinuous-v0") self.env.reset() self.parallel_env = DuplicateEnvironment( [ - GymEnvironment("LunarLanderContinuous-v2"), - GymEnvironment("LunarLanderContinuous-v2"), + GymEnvironment("MountainCarContinuous-v0"), + GymEnvironment("MountainCarContinuous-v0"), ] ) self.parallel_env.reset() diff --git a/all/scripts/train_continuous.py b/all/scripts/train_continuous.py index 981d730d..c016e622 100644 --- a/all/scripts/train_continuous.py +++ b/all/scripts/train_continuous.py @@ -9,7 +9,7 @@ def main(): continuous, GymEnvironment, description="Train an agent on a continuous control environment.", - env_help="The name of the environment (e.g., LunarLanderContinuous-v2).", + env_help="The name of the environment (e.g., MountainCarContinuous-v0).", default_frames=10e6, ) diff --git a/integration/continuous_test.py b/integration/continuous_test.py index 10414703..cd614b1c 100644 --- a/integration/continuous_test.py +++ b/integration/continuous_test.py @@ -10,16 +10,16 @@ class TestContinuousPresets(unittest.TestCase): def test_ddpg(self): validate_agent( ddpg.device("cpu").hyperparameters(replay_start_size=50), - GymEnvironment("LunarLanderContinuous-v2"), + GymEnvironment("MountainCarContinuous-v0"), ) def test_ppo(self): - validate_agent(ppo.device("cpu"), GymEnvironment("LunarLanderContinuous-v2")) + validate_agent(ppo.device("cpu"), GymEnvironment("MountainCarContinuous-v0")) def test_sac(self): validate_agent( sac.device("cpu").hyperparameters(replay_start_size=50), - GymEnvironment("LunarLanderContinuous-v2"), + GymEnvironment("MountainCarContinuous-v0"), ) def test_mujoco(self): diff --git a/setup.py b/setup.py index e47b2171..feb7c15a 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,6 @@ "atari": [ f"gymnasium[atari, accept-rom-license]~={GYMNASIUM_VERSION}", ], - "box2d": [ - f"gymnasium[box2d]~={GYMNASIUM_VERSION}", - ], "pybullet": [ "pybullet>=3.2.2,<4", "gym>=0.10.0,<0.26.0", @@ -37,11 +34,7 @@ } extras["all"] = ( - extras["atari"] - + extras["box2d"] - + extras["mujoco"] - + extras["pybullet"] - + extras["ma-atari"] + extras["atari"] + extras["mujoco"] + extras["pybullet"] + extras["ma-atari"] ) extras["dev"] = extras["all"] + extras["test"] + extras["docs"] @@ -75,7 +68,7 @@ "numpy~=1.22", # math library "matplotlib~=3.7", # plotting library "opencv-python-headless~=4.0", # used by atari wrappers - "torch~=2.0", # core deep learning library + "torch~=2.2", # core deep learning library "tensorboard~=2.8", # logging and visualization "cloudpickle~=2.0", # used to copy environments ], From 379b72a45e0af209aee4372f2a7f893e22c7991b Mon Sep 17 00:00:00 2001 From: Chris Nota Date: Fri, 8 Mar 2024 09:46:19 -0500 Subject: [PATCH 22/26] Feature/benchmarks (#317) * update benchmarks files * merge * use humanoid instead of swimmer * make logdir match file name * update torch version to minimum version supporting global_step in add_hparams * update torch version for add_hparams update * adjust slurm usage * clip sac log_std * adjust ddpg hyperparameters * lower python version for deployment * revert benchmark code to include all agents/envs * rename benchmarks * change initial sac temperature * change pybullet logdir to match * run linter * add new benchmark results * update docs --- .github/workflows/python-publish.yml | 2 +- README.md | 7 ++-- all/environments/pybullet.py | 2 +- all/experiments/slurm.py | 6 ++-- all/policies/soft_deterministic.py | 27 ++++++++++++---- all/presets/continuous/ddpg.py | 4 +-- all/presets/continuous/sac.py | 4 +-- benchmarks/atari40.png | Bin 947647 -> 0 bytes benchmarks/atari_40m.png | Bin 0 -> 248090 bytes benchmarks/{atari40.py => atari_40m.py} | 4 +-- benchmarks/mujoco_v4.png | Bin 0 -> 159674 bytes benchmarks/mujoco_v4.py | 34 ++++++++++++++++++++ benchmarks/pybullet.png | Bin 874012 -> 0 bytes benchmarks/pybullet_v0.png | Bin 0 -> 168968 bytes benchmarks/{pybullet.py => pybullet_v0.py} | 16 ++++++--- docs/source/guide/benchmark_performance.rst | 28 +++++++++------- 16 files changed, 100 insertions(+), 34 deletions(-) delete mode 100644 benchmarks/atari40.png create mode 100644 benchmarks/atari_40m.png rename benchmarks/{atari40.py => atari_40m.py} (85%) create mode 100644 benchmarks/mujoco_v4.png create mode 100644 benchmarks/mujoco_v4.py delete mode 100644 benchmarks/pybullet.png create mode 100644 benchmarks/pybullet_v0.png rename benchmarks/{pybullet.py => pybullet_v0.py} (52%) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 83c81f8d..246c13b3 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: 3.12 + python-version: 3.11 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/README.md b/README.md index 200979d4..88d7c127 100644 --- a/README.md +++ b/README.md @@ -21,10 +21,11 @@ Additionally, we provide an [example project](https://github.com/cpnota/all-exam ## High-Quality Reference Implementations -The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and PyBullet robotics simulations. Some benchmark results showing results on-par with published results can be found below: +The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and MuJoCo/Pybullet robotics simulations. Some benchmark results showing results on-par with published results can be found below: -![atari40](benchmarks/atari40.png) -![pybullet](benchmarks/pybullet.png) +![atari40](benchmarks/atari_40m.png) +![atari40](benchmarks/mujoco_v4.png) +![pybullet](benchmarks/pybullet_v0.png) As of today, `all` contains implementations of the following deep RL algorithms: diff --git a/all/environments/pybullet.py b/all/environments/pybullet.py index 1d792ac9..db630dbb 100644 --- a/all/environments/pybullet.py +++ b/all/environments/pybullet.py @@ -5,8 +5,8 @@ class PybulletEnvironment(GymEnvironment): short_names = { "ant": "AntBulletEnv-v0", "cheetah": "HalfCheetahBulletEnv-v0", - "humanoid": "HumanoidBulletEnv-v0", "hopper": "HopperBulletEnv-v0", + "humanoid": "HumanoidBulletEnv-v0", "walker": "Walker2DBulletEnv-v0", } diff --git a/all/experiments/slurm.py b/all/experiments/slurm.py index 7e4e0903..21f029c2 100644 --- a/all/experiments/slurm.py +++ b/all/experiments/slurm.py @@ -89,10 +89,12 @@ def create_sbatch_script(self): "output": os.path.join(self.outdir, "all_%A_%a.out"), "error": os.path.join(self.outdir, "all_%A_%a.err"), "array": "0-" + str(num_experiments - 1), - "partition": "1080ti-short", + "partition": "gpu-long", "ntasks": 1, + "cpus-per-task": 4, "mem-per-cpu": 4000, - "gres": "gpu:1", + "gpus-per-node": 1, + "time": "7-0", } sbatch_args.update(self.sbatch_args) diff --git a/all/policies/soft_deterministic.py b/all/policies/soft_deterministic.py index 74656f08..9d6b3fb2 100644 --- a/all/policies/soft_deterministic.py +++ b/all/policies/soft_deterministic.py @@ -20,18 +20,32 @@ class SoftDeterministicPolicy(Approximation): kwargs (optional): Any other arguments accepted by all.approximation.Approximation """ - def __init__(self, model, optimizer=None, space=None, name="policy", **kwargs): - model = SoftDeterministicPolicyNetwork(model, space) + def __init__( + self, + model, + optimizer=None, + space=None, + name="policy", + log_std_min=-20, + log_std_max=4, + **kwargs + ): + model = SoftDeterministicPolicyNetwork( + model, space, log_std_min=log_std_min, log_std_max=log_std_max + ) self._inner_model = model super().__init__(model, optimizer, name=name, **kwargs) class SoftDeterministicPolicyNetwork(RLNetwork): - def __init__(self, model, space): + def __init__(self, model, space, log_std_min=-20, log_std_max=4, log_std_scale=0.5): super().__init__(model) self._action_dim = space.shape[0] self._tanh_scale = torch.tensor((space.high - space.low) / 2).to(self.device) self._tanh_mean = torch.tensor((space.high + space.low) / 2).to(self.device) + self._log_std_min = log_std_min + self._log_std_max = log_std_max + self._log_std_scale = log_std_scale def forward(self, state): outputs = super().forward(state) @@ -41,9 +55,10 @@ def forward(self, state): def _normal(self, outputs): means = outputs[..., 0 : self._action_dim] - logvars = outputs[..., self._action_dim :] - std = logvars.mul(0.5).exp_() - return torch.distributions.normal.Normal(means, std) + log_stds = outputs[..., self._action_dim :] * self._log_std_scale + clipped_log_stds = torch.clamp(log_stds, self._log_std_min, self._log_std_max) + stds = clipped_log_stds.exp_() + return torch.distributions.normal.Normal(means, stds) def _sample(self, normal): raw = normal.rsample() diff --git a/all/presets/continuous/ddpg.py b/all/presets/continuous/ddpg.py index 4762d252..60d0e7eb 100644 --- a/all/presets/continuous/ddpg.py +++ b/all/presets/continuous/ddpg.py @@ -16,8 +16,8 @@ # Common settings "discount_factor": 0.99, # Adam optimizer settings - "lr_q": 3e-4, - "lr_pi": 3e-4, + "lr_q": 1e-3, + "lr_pi": 1e-3, # Training settings "minibatch_size": 256, "update_frequency": 1, diff --git a/all/presets/continuous/sac.py b/all/presets/continuous/sac.py index 460ec2a8..a46d8fc6 100644 --- a/all/presets/continuous/sac.py +++ b/all/presets/continuous/sac.py @@ -17,7 +17,7 @@ "discount_factor": 0.99, # Adam optimizer settings "lr_q": 1e-3, - "lr_pi": 3e-4, + "lr_pi": 1e-3, # Training settings "minibatch_size": 256, "update_frequency": 1, @@ -26,7 +26,7 @@ "replay_start_size": 5000, "replay_buffer_size": 1e6, # Exploration settings - "temperature_initial": 0.1, + "temperature_initial": 1.0, "lr_temperature_scaling": 3e-5, "entropy_backups": True, "entropy_target_scaling": 1.0, diff --git a/benchmarks/atari40.png b/benchmarks/atari40.png deleted file mode 100644 index 4e2d8e4528883755c5228f5f32db81fb71eca47b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 947647 zcmeFZhg((G_BD(##@H}Mu!D&m5K!qzQL&*E_0Xh--h1zws6-G8(mT?-^bRT_3ewA= zBfT7obm`w%-1q&;^St-_3x4-`a&s?m_St*wx#k>mj4{{KJ5o1xZ9lY~j*f1Z_|2>L z=;(fbM@P5$(XYSYC!2J|M)5BJt7{5Y6jL25TXhRAPWL@KaFK0hH}6~0(QzCn|7^007PX<Qse#K+j-Ca zb@}eq-?m&$;Eu2VlY8XhBZm6rv7s%l)8|@WYw6s3H93@T!890JR3ULH^uA-VHRt}) zb35@p|Mfp*AG4w^{@35q(LJE;*!K4S=l6G#Za&!Z-+%u=;LEW z!WCZ8k;VIO|0#3-S>eC^4nIF5cjDbo|NZxu(vEG~_uqd_=g#;)kN$sNp8t94|L1kV ze)*q(|8Gn4KYRWEv@#D2+oHr@_xJW{%xy;a)K?Y#Cm$~-Ddk9Y=38gDC#w|5sT9~0 zakOp41zZl@uGR}BZKCo`xlu0pCp&^;SfpZ;d)v+I-~C%Zr;9QM?>^& zKdX`YliJ$aU#L`h7Z;ZV#jFH;Fg}pSu%fauF{)rA$C~~2|4nxfe&V;EjxuOU&|aA9 z>gw9QbEnK-e}#`WCC17nE5=h|rAC{RgOpWNJl@8V2Qzbi`t)gCvGcN3@5gOrA3vTE z5V#*28oK@0U+;Jx;%SV#d%P*`Ze&!!HvH`$Zkd_f?24H;)tu*icuhLuyuH1hD(&p- zhCdxPl?mcCZj8RYztg7Ocso5k$CIu%nThu^Vx&R@wHdi~Mm}=DLec$knpBRI9nU%V z{Z7cs%P%f2vi$MKCD{a-yBRuFjWLpk=O(+=E?>TUt&!q=9qZSm=DMm)%`!1DnO<0s zzjWyVt87BlU{z4!HIIFx?K!De#Kb&uH=e{uUE){rL}!feQ7ArvfeHWq`zw=x#nqDa zwSuThkI5UJhoW^u?XOQvoTY6PFWn;-Z6vy#W<2+?D@JYJO5a})pLffLHGii1B9C6} zAJ)UQ5&jMv&&_w+_iWwgSI4dA$L_mTXlrTdQBzdfvJCxnYC_rZ6rM)}?j3dcp}uF1 zf!x8J1N;|$r+c8RaCTo=c{y#YHKQ%xIuU=HT+Zw$5fKrQzrMCCdB1y;ctu4;J$cVd zv--yE*^(K$HO>F-IWCi|$kST;I)#VvUmy_Z{ z)gcYBVJ->JnJy-1mOo!wo-FZ*$oX8;WGTnN!4b%2%| zC{5w4p!1T|pR+shOS(JW{H(vz(QS>6jir?zb!{{)S&fkvS-iSibj1vxr#?tZI?Jw- zFT*C6RGXMuoUm>0DQ_!3@=5-O=l>-Cey3VoT%16OeO@)>_4+ja-ZJ414e;%_HsXCHt6ID7|_u!GD++lj_dVMlLYU%9VE z1T)exFl3wuY?4#mQGJr-_;E3Fb8|u0wZgOeJVqA#`?Yp43Ci5RfBz7#k-}XY#^^hx zPj(HAjOe@T>gedWnxvZ3(J`3r|58>bORmX#?i6`PbPQR|$x4RE3ihg& zD?iMu!bH_3OV*3uym^xpdJ(Vb7y2V{BuamEBnwYR z76*s$vDpDXwNbaDE-%#&wALJa5)m8faI3SxRvx**{cX+}WBf@B^3mXu)4h9MRWTC{w8B$Zomv$Q4jJGw7&X7#_&i>i9jqMkG86nGK(!no0{rNbFvTWma zow*fUG4rqYKkYig|Mro?#)gh=?|UJdQX{(F!LTqp+h|!M9D6tNItw>9H+7nYg=J}R zC~WS-4$k2Zk9Ww`BqRR{JI*&EcKZAJD8tSh**8G_z#(CWOqQ+^S6Aot)rINl>GZ_V z3)&TNuIp=~-6bWxy}jJJ)ki{*q~HE0ub$CLIoXLDy;VT|%yD5Nkk2&KmqUfs)YLR_ z=(U&EERvr0oyj|ji%UzR+3h9`y=5MAownUynwnO!Xpi@+o_QEb9@Ok-%`0SeFPX>5 z#Cy4{ui7HpeVOgh4Re6DXCyRce3-`=$Tj_ zGH=}VFaPVV?~>h9wONMb`~LoZe1<%Xx2og(SY0J&iIF>>ahr@`bJAUG38n^EMV9*AkFjSw4{oK*X%Xyeux_!hh-@JnhS(ui{3 zCr`@t>(_109y@lWr>6%M&tTj9&gj;bG|jo{i<4*m{PR8Q+EAFQU|}uUwU@s9@0@>Y zb$Nm6e_k_5LtkHC(CpiTwoH9Zd8QlBkJP>YXDj2ugYm<|dU#P}|1>4YQrfeP2YPz$ zGN|Nud3n8ISIRzf_N+u&S{hdT?&Ix@R(+m)3HLK}EX>VW8s`xlnf5a}kKNqZa9^sb zstL+@>57?p5x?v@#9*L=8=<@9hTHEjsFunPR+i$jUG&UCaoAn7mBpD`($exbZ~mJgmz+S) zB>1_eCiPCcyQinDf`b2ZX5j?nv6H7xB}fEZ*uH)Hy)t*krfg%SEnBuQ8UzWPJbl^_ z8H1!Rq!@<%`%fP|denzqNzR(qEE{>NHy@rr)sxt-(fInw1Xj#A4!_Gb*`vdYAT zV=E^}2Ju?hauzA|_TjO$EBBFniEKDMGeg13G~(XZzbZH7Sq?EEEMK3NX64|J!Zsv` z;m)0xB<&&(d~R8GD$QdNbS_|4w2y7YW_{FAYbl(aN;e!SfpSGjimI;)6? zDn7pPJU}=+290syDmfYlo&c;wzkPcyM8sJUS(dzm)^u&z6lH;?6cr^z$=U3ne{Q^B zZW3KziA1qx<>8Utv}uz;%i9Q(;zb?ASDUPx`hm6#-52v?t&yKU-voM#*C=zx7rbi2 z8#>FEVbMpUC02)AG#sgqmYe!#>u%pWFE~onNJT>VW@KhgdG>54-emmce||AQn5K0m zq3&kRPjn`F?B^UslCiL~q?nn#WtC4!3NkIut3AERNWft>j(j7zr}O9U0>C98*D$;M z=s9M37)wL-eRF89>fPEeBW&)=jy4(^Z$5nZVAcP5&$Syj*#7+UGGZE+3%+vYN@KpY zA@WoFo%XXV^83yo!OP#zsVXBO@$9F~n;C!q{SCHcymqCpg|#*NhzZGFxz~XZ#Zb&| z-MW?Tu2yJwzQzK3H&Dbm-(_`9K19g=0+qJ1y4nj`gdFxzRTv(}XxnqB2c8b>qJ1yg3MLL+O>A+AZ?q(w~3+xh0{! zsYy1>c|O3TaO!%Zxg;K|%XvXXGT=h=rMcVp5PR7soq-|TEv2PmK5X)GMGkW$;EyTM z>;T1$yNgx!?AeoVH}$5zzMkR8k))R|f6LNuxDH5nt$w2E*zx08n&tE%{N`7%VURoS z;x#SJ(T1#gFW`QZm+=RZX@6J$$^S;DDo{>57g_QRHqx#9!fsmR3z7P-0bJIA;Xwn= zPEL&lwx*K)=PqMAS|{v|kB?U@cFJ4((YKpaIOMf2GYp~MB;5gg;dou{E0%L z%$4m|(|iB-mbP^5+bJ`g@=LX3D@}AN;m;_*AWSBv<3+p(r4 z=ZxZ`G6*>)lk)qERV?FOMM~%wn!HZlmJkn2<(TQPtQnaqKbk`e4?HCqsJFhh+WLYG zSp$`aarkA>;>-~3;a29i?p1+2w~ya={xwTNM#h_94B14v5xl*MnT6xxh)n}(O2~Df z@lBh5VW<$SKn^jej}k{hebw97XRJzzzjsm?IPTCz8>3fam)V}z4Ch^U889&X;6 zU_UcRUqpzb*n@Z%Tcv{e)o-RRVHbKd%yl}=RK+ae-H)0H@AD{Ac>CAGx>)I>rBo_a zW~fONp}>3n`4RnK0n1M#E@AlZAK(7j{j6Xjzw(THigM_<_=O5@7R}kwrb>l0jmHhf z&2e{26RQ6dd!i-k7Z9NDv@{ZX{hwcUS&aV^78W+uNRd8RS67#pz31pr)57^S!|Oh* zvKGm)NEDwgP8MovX=(AV9Ll#E(MXv5_;fElb!N6fI%sqRIHhv{D9@0!#AaO4ZTXeI zzuvRMeARKPMGh&GeyORcQve4C&|H{C#7+6*Z3v6KyBH5Q)%15iiakA*T zfq_A&_U@x1Mnb@P*Q0OwOgn`PRQmBTJAJ?07v{QVK59M4^W5emo_73DU(3?6*&xAf zxm}EdEg2RA6*2iJ7`iiUU%yK9HOZ^-V72?cFZ}2#ay*P@vWPRa_vFEr(zbJSZ0~eI z{COh&_~)3TqoNMH5Ssp1rLT@n6g#OZbotN)t$&*=v2V;0j^ZomZe3rP=-gqwR^tos z)f&9D!cdB(MBsg2`HREYF_o4(<%=AwH9K<6uWM*%L=-+pGtdXL#zPW*X;l?_jX4FV z#gnhwsUbewG9{yMX+b?r;TfXsAnyaXcHiji?DX_gb7}otQDG6cy3p;~{p+v4nk_k+>wtF65gyYvm@!^+A`jZKdAbia5cfUHzj6Y%YEUf$*is*Yi2xUfYKC0*g{ryo;& zGfcWFs||0i?KqAN-_6^Nq(^B5^cR?Qu*@INZciGWnV!CYC$uP9!yZ1@(b*ZYCc$ge zQrm({K3rN^Tb{hgy;YH{1=7b8^_CKs3!G((2diiF3aNGG zlLvtb=u2$z$kpEtIMC4r|HjCu?JgU4=Y51At{YBTqHpgWD&||ok1z}ce<&@TtGjVD z9u+w7ociDByTrz=M;paa-l<$QLXjtL`?h?Y5209Cr2BHJr2r8mAm~AxvN*N4(APp zQt&X=$N?Yhw$W$!TocI|o4KYvH`+uBHUd1RAy#_GK?S`{@yhh=PP;x&xzmgcGPiEI zhq|uWqbIvDS-fl#TH+{P@%eKt=p8FG7m0ZHa;Fb4e8eN7G#)QnnXX*I%iIwyh1F0N z-L5HNm&IAKa&OmR-o_h8owcY6?6JLTp{otjqUdU5Nc(~m7FXoBP;u^=QzetAi!wU+ zyQpllSR#Q&9d(5a9Zx~4;lHf!g`Ft+{$WR;sB3XyVWE-lZ7Hed*U}>KpF9uKfX*8c zG}A_>1qBtmUDu0Hx}$-HFV0;g-JzMYhldAOS%ojVp*tXBqFRYd;00}WG~-Mrx_~|T zi-SSN)$avHedJZ^zyWy&1{&A~AXyXIz`lB_l^|!ND~g)SVl>%c1fl?AwOgm3$K@Z? z(~vO%M8}9skUSfE zSY1xHi_!~`r8tV^`_$s41e8^u$)R27LDN72N{qpOe%dOl-; zx_ z(arPMJ36pN?JB20LSPHaVly4$*1Hov an4kD6(UMZiFbQblsmBrD{f}Dm`g%Xz{ zhxxIQ+FuCy^v(UlfxW8ZpgNj&a2DPPUt?HU7z2=QllVy|73mrK8?dXDI`oOyu^_8+Yb6_3-X0t+nlTj&)sIyiw`L zDUT!&GD^6#2eeAirbf)cueqku%QWtG7dsns99mddpoZ9X#e=j+xEFoP{lyC^;Cq%{ z-LVFLUZf*13C%$HvIREAu}dp>eIqEdZ$}MNRf`DAi9C22m`>1U%pU+rtL?<^Xi-TA zOn9KqapCXpHuI|4BJEpIRczxCB-^)byL;uz-_MVVrXUp#qgnFecoDfgM~lT)zc{Bz znrZJs?D9_UtMN{Y)1%Z>wTNs=o`p8SekH5(LGi0VL4q=5S>E8E`iKj$(qVFwT}5+9 z6mfu5^gdNcVhDFa`UuKQpd0(O6V=;cJc~LznwTmm>t5j(7hO*AD0lpAg1t0(X9O+vct+_lxd0=QLcE^4WJycc2Y@+~u*9ydL zB8vOSKf6WD5>^3}!58b0W(uS(C<0DtY3Y+EP7EXH3D>%|G9#V1D)U5r7zyd_>r;|P z4vCSk(ya+qM0|`0pxe$eXu2irFnb3tPqx^}J~}3b70Cu++kjZk82W20%cN5fzpR`M z&CAaR`zU45U0ejRJQ^sU_2kKGxENuaqjBF#CxZl?e!O*9m`Fy6KJ&*P?;=DP8gtG2 zxJ|nX$I1zd5GLZ>7$v@U%hs)I=(<3j)PsW}2tG00ZTHdU+nhyFc%lJCS9J2=-y(sJ zbjt~4hR*QuqeuK5-KNEhpS)ihx4c!i@v}u=SLw^%lxwaD{`nH{5#hUcmVOR(>p)UN zSFP0~h1PANWZebp$>zuzF5KlfDO|L+M2pGw^Yb$)S+zljd!KYJC|N+b5qK7!p>FIm4=44Vl z34M)L{dGx6$ptPh#{K(a(VrW4b3|9x3_x!yuq6rpDH0u9&$Ws*04$0mj|itjBJ@Mt4m{t zO;N|v?PqFL(K3<(Q5B`j*JT$8SpNoFRyRzQ8CxFPT?y|zLvYhEwBeh7*;xyq+XS5E zwrt+>VGG2N?$J|P$cWbM#yRoG?odIffbiDezg=cmc>8W@KRgf*)(iiBzAaVmo~ zQ}36ptgOtUog569MLNHRj43Z@;J$-rejkBW*crxg?ww-cnQwGC|`5{0J^9g7{be@H6s>;m3dX&=(D|t>!LtIVO9lb()p#8f}Pu zfl3*UR{`3F1~{XbYZehzX=x{4qcq-{QJzqZ;3L(@K3fTi-)))1h8GpFx%lIU!bvNy zChqjMOjb< zX6N#PZf zCh%=1d2dJXZ?l-X>L#ypVU{0*3~86)bsog@z~Aw`8q zB>}#DW7rJf@$Tu~(>ulGL{;*w%;V@i$}|e0O~yC~2@87y1h>eR1AXm8*-M;8{sNFL zRR363HgF$kHtRb6ksPlHdjh`P8s85-G zqJ97!@mp5fz<|%{B%bELMVqnDSz||CSJZzjEd{nA28!z5T=N(!Ml?91@@A zXk&bgAU5x~%TdBtu)3o76?AVdJ@*VS^AI?P7zaey{sNi;8iDu~gwYJUpO=>gw_fc# znQZVDpHN{e*mHDim?M!u<-%lj+iEH*E?|q9u(lUFJ8B|3+WUHXdh&dFc2F}nH&?+Y zSvJuZ%t@GOyFj~@rDf%sExHCzP(4G6yeChd>|O#EGBZB2OY@3S_fYDZiy-!6AKpWr zZ_VOr=7|zr>0tgkz51x9Q>X!l36-|sfE#XXl<6zoEy@k{SVHq7Zof^o9F575LqUM6 zhaqgfjhE2Vi<^QF)!*KQr8JqbC2`s>sQUTqSMC_;FcY5nqUnZIHPHf9-D>mMmw$?l zvHCS&pGrEPsRlERH!7+AOqf%(P)`ODkc>3akEC&Hplu*RNM+opySS5EO zJk7Zmij}eqd{oAz!Cc6!y}H5tB}W;BeGdbJ7VC14+tIeB3G{QGU?GE6Q<3#mtdYj( z&e#J6U2u*CI&pZFnPp9r3)1`-ST2a#9=LaQe!hBb0VUe#(c{MhZC$`U&rtdONA+sM zAHG71Cm(iN!v8N;wJZbKKBS8asJfQyejt9F))t4JfWFcfu|h|)m>pypGSFlhL1-M3 zYD+cJTFx4#DHs(O2d8SH&$cPrpm2DF*H>nEOI+0!OPB+{RB-&)zW}y=bY-8p#Q7#_XJ{=GY z!g_?uhpODEE=MMEuxqKQg=y4U~;de0rEHGT?p}0GsF(O4@7;q+rCbIHftl?>8_6cC$2sh?* z!76^JR={LU1W-PB@Ia{27X2y|0~rK*)~k#+DFX42KRG zWY80T!(l!t(qzgCLF@6@$8Iso!{0|*@axx~$eulO<|e2Lj_GXl6DdGvi3&*Mpv6yR znRbg%El|HVAT_j#`(6^K^;>A@o9du6C24@9YXlauZjj^~xSws*nocT-KeHsieM)mcE8X6(pd+7}70#bteQml|dW zy@ZM&UJt}j)q;aK&)4qYmTvxg-YKpw5!#Z^wNm?m>*zv{DtML8_^*r<5pX&$C3C=1?!eZ zU%#}RK>|8+>eMGz8!&VuXs6N>I}yU8;6dKf6vC;o-EgO z7pU!bpw8v_THD%Ep+qF$jVo3SA@Lcm{FqwI4r%b2(gtkIa#?lQ#Vn)%-j_Gk+T8pV zx{gv~+}5`eZTKGSC#LvqCxQpR21Kr}PP!U&73QJ5k=Lu8TH^8Vzh`Des)7Y5#%-Bj z76cJjRftM-V;Sy8Au}P-SLQf@rf6_39Pm||1;$e>8w=STKoskFuq=kCnDh&!?d?jS z%PZy=Waz~aH(ZLwuhLdyT#`_aiy=XN$Y$SQ%*iUpojEJLM>#1*fn zq%6jY#i^AP>sQ4cztyLk3FD5#WYNMAR?E>wmWE){>O~z#;H`c^U*x|tM#RgioP!pP z6Y!~jsc&Q?etMvi787Jr4_ums!k0LW%7^XmF^dA--qeMxYhcaQ1f_}?DWH~W*oJN1 zyqRzd_6`m@S*x4d89@=l#a`nLqDaM>{(`D5sFI zt8)q=ED(T$QX$yn0s^+nW!X4U0p*3;u=%W;$pdVwuihVxP@tih+U82bfk2SVjHDn| z;K6!!FZ15PPXS*>yw*ZhUOwJM5&|u?#DpHC4F?6MzXTE56S0^dO+G^E};kd3ha$|2Zk9eBbL%Js!WeF}{#LQrxFa6wQ;8vm0p zlI_-K`gZYkgrf=t^a4Vqu^hCJ0@siw7=@0vg0LTAGmcOx8o>A%F)BgW(=TpRRdbqp z+^}^!aKg8x-tlqLG>|XPq%8rO5PKMxZgnHffsBU^CE#Je$Q0jGR8)Mp+yKp1c;$Pk zjoKtSFH7eZ0`tEW_n+1kMWKjB-4UuZYI*yfn~!h{MG%}GcZrIlJI+aBrh>!9TASxG z@7}$;Xu5(;E)5lZf5l{nDo|&gift!+gz)1LB2Lx_fJLs4Pl;4j_tsl_K0!c}gkP0Q z1J{*^S7}4-*$G*)L9r4UWKgNPxw`_dj8@xK0LH#;wfki-w#+Q3yK1h0EOyYTG5*Pp zM4^t}MQn^_cfiLJK57`KDIW~NIP2@s&=g?KZq|87mWgl#(&xCA?&9+|`H7_J9)}}6 zSSm1E$Q5~0-a%j$*toeHCum!D9{7t3+b*@;=e=M zpa;?0&xF{FwM2j>XkUp(8mjQ)Oa-i_fwRjFSc!*$Q4#88(*XmpUV!9YP1ZzkqF5e9 zL5*U`28wc{9TaiO?XP=|$HhCZ5JN!lq;w=Z?;d(lA|LOGC2gB0HxU&?tD@?L|G zeDxxiqZO@L(ze6gt#km7V7AJtl$}J}0nRg3eDtSB&hHd+U7hnohqnjJTaAv!{To6{ z!;$n%H(6A=*t{(#G>{Y)Pi3&j1$TFIadAn`unMr>Pyfh395v9Yc4V2>4`^v43lzv2 z!M91D8d7*n_d-0`IIZbrBWnVqP#-LFC93n%K7PCka+ag!0(giEJ_JUONkwIv^<1G` zIH4q-<$k5QrXT)}*OT4vk*JfI-JU#2Ana()_}BN@(NkQY#ZxW5QJkv}OK-_LKRob0;$`GDH71dYgj&1{9Wr=)}YXt6FiPN&d*Gq945;gprCL+q_#Se9Mk~ z{LXv7?h!Cu6R+;ngY*@LT*t(*faIejJORfY-r#_luorQX^61xpWCcqn=r(F*ZeI7j zr!>TXD4#Eb4*|6S?KKNP1P5sVkmaeC7`O8_u(Sy5f8L&GJK05Vsz9!N zM=&ZXs^!_m*Zp5qvq!6wk7h&a4Wbv$Dqmk)5T$%=c6i5TQT^xA@-huQzGbe;$h2Sq z+ROW8!2x0?HW5K8xTm-G4ca9d;5hL~&c55cdxO;#Bt-#k8-1{_Qj<9rgAXK7Sm(yu zsjR-R5hE>Jl#!N`^Cb>AtO2n=fRyiy2L6f%z~tSnid_&A|D0%rt=Fq3f1vFp4qv9x z*dnaE!IH4maP9k%rN96FTjGm5GV*|x^K;hcd1-7G6AwWFr3Dmz^@S9sAUr=a(|`m& z&Kg7k&r676$jTJ5+=M+wdL_;qUeVWtS!4}^dV{u3nfBt@vqlgFNgBv-)&`R*kaL(+ zC+{B;jYPJzP8df5VLfx^GwVF$Gy@fEtkhf)){D53#I!9q#{8o2Gy)wqD1yn%Fwqj% z5`<%V>cAP44_L1+6Y8bGV=Cm8SKkr&egsYfx`XT)0-%4Wn%OB;L>saJ-yVUUR6eBh(lq%Ah5TY9e2+B>( z+{cvQNqrX&<7qA+8sX)!47sO?0Qs|^3JpKM{FA;()<5VAK%OcQTNj&mBb?~u;W1Mo z9SmTc@u}%m+>=BaVE)A>-`Ln#0!jcbb;}_)hQl^b(MT%n7rI%_EU@3{wms)`caCWo zOEWw=ZtXtk>F9{T4EfhGK?s+AK=VlcIX}BuVK4^X3f}Tdi+MoA48Q#r{^Q4wF$Gn( zihku-V(_tdbiCLx!6-nD7t9DK#9~P0kbBE5D4;#RI8n=F+Ytelh8TGOtQrBc2sO$i z;?!bQt1g8eP!?#;Wo7DyTU!d6b9BGMG9|`UYGWOF4JpoA`#F?r(Y3{cZypLb`(0SFSy@7}#DMIHDRPTAP`lLCT#Hf0e0 zQc;?fPHKc+v^6PynnYTdTNXB0Q^DW^1}KIO%i}p&T3>#5^Saz_0YzT9y8vlAP$O9} z%OZgZ!UPX|rlfJ{)CPoTY|XoaO-fJ?X`VAQ;innx$jdM(nv+8=B@L+@%vTgLox#Ak zF>{~X@CDRumCnNhxyp{(h?^yD9C|O?2f<6Eln>BO@{X zMSPd#QdN_ILe$`A|7?9cT{u{h*{Ay#OC_{79!a5SJ2b=}$fjvr+Xb4o0{eLFBOQ zvcExmwAdvBR?GLg5Skik3fwwh4^;V|FJ)yua3Bu*Gyz^jdTM|rR~`~Ev0=-s!?+N! zoso4IKEb?=IQ_QNKQIuD2qO+qR8dNzTr!b4khvsNQd07KD+P3BW@j_)H8P71@fc|N z&{b-rogp5Q&Bhj?e;Dmn`cgCC<;2Z)982xLg7SCOjXoH3r zPeJQZneYVw-|%z9b5D2i(UT{mAbaY?b_+)lDHW3qUy8SR!yDUE>b9Ya!Xi{PEnxaL zY1)wDAGejSnEd?_^Cdtwpqy4^^o#{x3&EOfL7$^ZJEf-H)MxbjTR_{u^xVTwA_(k( zaZP3taWmVZ`C%&yb8lO@+c_ z+4ps!p?+F;8St)&I1ay222#a1Kt(oVgD&G>DB(`D+IN(db+%-616i+7kCm7_ut38Jriit7pZvi|Gu ze?T@#p)SSnej;f>-^j&C93%k`=B@#fQIhbah7S`sNR76mHH4K;N9?Tge8ETQK}*1)&PBA<^NaAKGQWPKxQ=%`byWjr4r;>F#P<1M$#b*8+9mlQyM z@U^VFAQ3=GydVtY-uz^DD#6kqjJ%?N=;KYhiygi_+)C!FV%y<-H!LYe(E>c&(S3 zc=hSi)d4FNH8s+(tk?)H`e=2@sAL=6kEMBi-SH3XvEISK%He_=xUl5+-+xbHUS3+- z7bl`LV3~dQP*ikufW{U9vrm{~`-4nb(A~KL1G16g>(k=!4qvNR1)v_uRG{rARHvy|O{kG#bFb%2~i zL_m9Cag^!b=qax&WgDF@!;>VUiqg>E*Mlu*fS#M1%O8!!G!YuG*4)_)%tDm!RkX^0 zZ($N1ldel6r%^H@KQF(p63zI#YlTeVohT=u2-Qa1wT4))pX@%G0-TVTi!uVXFqTXc zK^h~{C@g`P^3$?RldLW~P+hUeeCpIG0}ydiryIRex~uweHM*2tpzq~gfP>mD6P7$J z(0*o@k?|78=a!1*Phe}&+=qcDB<>^k$(FRpn{Dj{wy9P2#JSS0a>Lvxm~%wns*=LS zR`vSU*B1%E!N$+uGTVVyJf0H!#|-E5>Y``Rco`0()giViP%HY7NQoH=U34<2sfzOQ za_UB_yok=x+cj`D*~9UPxv;`?;ng&(b4l{bW^H*)_2zJJZ}BKYrZW!FDF# zwI*2JQ8aq~0i-IL#Ja9m7=G(y=q8nL%dGZ|R zdQ(jtmW|nU#T?hv$DSu@j!D6Iw$p1lJgyl0D6n8RxW~t0B?ue%js?vY=)~77#mGZJ z)S<#{_tG@Vt`u#;1i}+~CWDM|REZJLJL&G2j~oGk(tsi`2f;xfj!6A5tlwO?U3<^fD772p&wKfG`~Wkm%~poHq3NnUJ4_Y zAqg`MorI02`_}f-awq0Y1M!Ih45SH!o+bZ{M@B%50cpj3TEAJn(7)WThBk z>t3t(O?Cj`n}~wZwh;n3JZ^T_Q;bW3vPxgd$zRu>g5_qAIT88=qU7?Rnh_98c2bq$ z3#N-Yw}u^uLTm{;pXZx~NtjQsI1=D`icz?GsyWN&<<~k9wMnv+iXQrG5% z;-$39MBid8XuF@T^%PFWWDV1zIp0v{`TK6aGhJL-&YH9594jd;RNfs{2l$~6+pi)F z-5Q#jS+x9_Of9sZxfPy+6>zvp2!LQ2a03)0RA*kq_ueRe>2^Yd zY9DwaI|jzH8O2@8a<7HSnjxRDdjmRg;@3@7K<)^&9}GBV?6;9~FV&fANhtW=ns&57 zigsBZ=Sx(;S`l!J2|z&^d38?@0C_@7CtP{FU2Y1XMa6k(*4#ksa+;_>kt+ZMHKikw%9A^orgJqk^gsRHOCdXX{a4V}dzPca#>NZ-{EO$&s zc3a&U2AI)O8x4RwJ{P01>t2+ZGyHxZ; zi7lWk&w?az8$};45phZv$u0oJ)l@LT2)#x=q;{q*nU|Jai)hp(Qrq_k@V#ztdtjCo zJQsRo@V5cSD8k5rSqw~?_7$Tbg&HD28v6!|UV zn82lb@ViAhldXSq8KBSzfEjzE4F`;rm?{qJg`Y7;Y>r{@lRcX1AWO;Y=I!Zn8*nZI zc$ack_d{%9Fxp_q15#NGur&sn`B_V|8`2Qmgrn^Jdfwuy~U_=rtX#czzP&g73bVT(ytH4@7 z<{X~}&GZX1UJLzctAZ}kNDLnlaFdDehsNIQs(#_`nENbA*l13pZt4a%3w?9h0`GtLwO2i!9|Nh60PVPDw~>?XoGWB z8z^?&`1v!E|6FRS1&Y0c4{Po#`;iXjr!Ch!5)%-=P?QkwmS+U+S3efU{k35fld?*t zuk`R%Rl95JB5eWMAK zN5kdd`T@yl#88Gi8D@umORGAdJV%3h)Wl z1%|GS%>i);D4)V`9~Tz|jjwIO>I#_HD(?qlfc~6~|8S>nh3V{EGG{QCuGDFCN0Qvb7gA!aXl%Qf`0Ze&zIn78?oX~m_PFKtt=R-6P_6RQC zhXg+o?j20qlq%;%lkvD!z4>4N97M{4BII37cMwDE5rV-1(h87?0DTM@X?2lq0D4BM zlQ3-0@uOAmc_d2cbIMa&7=m@(2}R)jYwo$9fBxD3cN4;Mfl4rr$eB|keGvR;&T?s} z@w%>q;C2mJxsYhx+OSKl`oGt)i7X8tZ}?Maei-Pwsqg!DGmP;!_Be>FuUNWOem}Un z4hz~}?gEx-0Fv+SZs3`1csrQ4ea#YT`FflY%<)~QxUxpj4+R}&^IMxeLZz4k0V}AihH_7QXl|9Qfx78g%-^>wS^hg&I3xASNR^Z0s}dqlS(H?L%n`o1 zGfJv;Revt2?{T4E6fb(OoI*$TE*HJ^|}X{d156)#YBW@AD{zOg4t`5QyXwhAcC$ zXSnj9TevkJW9B!z1Y^V#F=JJAn&|+nvX*+H?Z9los*x4Cv+2kvK^ly#GMIEnywd(7 z;eGICp&02Lt1-Tu3*q?s3ua@+N*r?mXB|0i;e>PHu_4b`loqa=vn{e^Rz z{l9|c#428$jb9F?F+`l~UWI)7ij1J2pS{7P7YZ2Qj6vT z#a8Wr9waqWF?HQv5KcxHViHUy<`>N2q?JSpc(ch@k50{H+mUW6ci*!^JQY^= ze*C><$1GAsyQ~Y^x0n-(^#k@v!P_3*evL;6_|!F26Sgso`2G8Lp-LTXZGE&+UmZzJ zSpSpEq#{C+HOPD^&;sM$y?rtv@VdLMFNeVasqQdA{5Bv%gA;5qEJ=uyWdt>q2HH7$ zBg5)=C9)Bx3`z_1!qo*eGCLbl?0f2eg%r*%NB~trR1O?3W0@a{y-g@D#EaY%p$waq zOs#W^akRn+Ch}m=C_OieaYFwk)mSMe3=v$UTEG%{p$uJHESy_T?GobRbx*! zRX<1GcwzAU+k2@j_ElSvNHX#Z*Iw|NuoJl47z|2PZEs^obUJO2mRpMeW&+P^K4Ae6 zwIzpYP%Ur)!WGLJ0m%}0k4|PtMh#pdkIRZ(vU1)9sv$rJVe{tDQ@6>klj-lR%yyTa zsPi|8`fvI9=N9EV7xe3+j@gF^Lo@(6s90A2A0IGisUam1OuC?%x7C}fYC`s#!_b~m z+0N2h7&gYNt~-m9$=)gop}HQt865L}d(NkE!AQx&m`p^Yi*=$ETilz+%MO_cJ5D~2 zS5BB;IpR7XGYoAIu0sbbPo?=rA-Bt8SLP(H66@E7Q+2%cFEg0OaV*7B^D0`w@qF8t zkhK7N7>Bt(Vvtus7{_9i=&O_2ng?DuAz+sz9I6biJ&*TuTl9YxD%U565eL4w%trZ)pNLNCaiBPzG5n3PvwBR#sN3MSG5^Jdj3~QR@Y9zXetSgctK6 z$kehpP=}C?WMo0*OBR{V&~J!|M{Za`n=xI8F@`*Hu!QUw1SJeXC3o5N?J8*gj8j*D z=?p)tAM`XKG!4tsC#qy>LNo>V+Vo?qMt_~AT1-PJL3b79FTe5~& ziQovcZ{7p_3d{}-%b1hjGXvf+5!sbY7ZKA=fYgaf7O0Bk#2_S6BFJNWhfHnfsk%ZY zG5{GWm&V4Ib^X}^b;YqiR75r{;{7$Tr51V>T@nFTkRn<2xf?%927RT_`K${wL&Ua( z7qj2Nk+bmo-^JiB6SoM1^(8hF9-Q}ricqbSN2peCxn!Izs*-OVX3}5-R|E@`4q2PC zP$n^()IeNueyR@Qd>v+D#dG9bgdO|N5&}IH>~0){B>IId2spFr>S!^P#GPvBckLn! z@P@{O^GnJbGjyd%gPZx=kUQ!b4eq`Qsjjh!2@cFTHLy57uEJam(q$8J>KtC7b~(%< zO_*vVgDWH+s?t&ZsboG2ypC2mMr424&71_oC9&;5edG2ab(XmJQQ+8wg*!*{;J1fI z#AR{50M9~vp_rWwWx5Cv1mkq^uo1-J=e(7~d+vbUOuWM4e*4w2@PqBDQsnE1U2(NV zBTWy?6BH(2pzv`sXR@8$KarDoFl(gfxg)yLJ)|t+)L03k-qwi`5k0U+}4BGHi!;qvTnDssB0beqv!EiI^Nj^61ge*REdmml6-- zH4Y>aDxy=TyrZMTeVm7U`57TxyNR%3sPjr?3po1%o=w8R*J4yv26UP-?WUw;6XvnV z*+6H6ggR!O(7I&WP4zmxmL3?a2(ALTNZ2kS$Pu=n$`H*=1Q|$yg+I%k4QzYNd zI7QWphu=6AGE48BWrQmgh9~knNbRKdL2M4hO~t}RQ3*z#=(ecRSx`)Z9F57BK#gFp zvBC0jW8{C=QIQT-_=!pqk$VoE`|ANd7K^V2jW&v2hEyY9SRJ2!8#Ev!vobk&$`=BZ z;P3pJar^)y%uZyAB?|p@BC=Zotmo*uwbhUB>ZDzT;!#ta4au7L z-lm0K^6)`2tR+4~K_i^A7mY&t9H}lEq!by#CI?HAijO}~JXxFwb)1~ig`afy%&*@L zs{{Ne6fuT6?m)mH#3u2&An}nCV+O!sLuw!sBxEHZ|4FX1Tf%&U>T_$Dkzi%X5w*Nu z=q~MPP1C#)`ZV$czE2z;E7(E4%pD^9a}b|2#E6QVDs(i%KnjgWsQQee=mtP=L?NFf z5c!}f!3W#r4^dH3GAW7qCvwwSU{VNA6LI1y8HOaqh)kp@U;qZESj3aVP>|8coHd4Y zB9Y1sP(sODO5SHdtZvv^S%#aoOb=mV6Ier)v-rmwStp=Q;wi3*(%M2ScObgR@m_L= zH{mE567cX;s1n&7yR0AbFG!pb@FF-*hL&#nL^Usdt^PKrKu0m=r(B>(kiS+}S8p{3 z%ET6?f$iQH`GYb|=s+|Bn4~8DHMCb`5}sHD1hMHJc9VnNzPWoH*t;*rYbkUUGi&h# z45fiNBL`uSlf*Dq8wc5ploNcc0qdWoUiwSo)!MzMBx;e6$T5hF!@_Uf(DIS-6k=&c zk*|4P{^`@GuaZd}7|RscMB2~7f;dS$Gi@MhZO(#3Aw7b~V*g%(oDpQQ$X8&8$v7Ti z&|`LOs~_;(L!6wQs{k+Q8$+vGSmDf&#R?O;7un=b$Gc=giVV4v^67Y&oXcVjK3gzr zPXwes9O;z4)M470FN#W{{QNp#rDwEb&x^I!>eo{NN)fg;Iyol!%wxW!0ON5qpR9F=y=mcNap> z`zi|bBVeE`8uNUzoR}74#kRm9IwUorj1vnDyIQd_=D&DpsC9U2i59}Yeqtu65`DT4 zq-!itJSt;8%qxw^wB(dB!aw7D$_-ZgYLUF1y}dq2eBL2lqW~QHfKW;r{B%Me>p|Qc z$f(4&n}BOzO}y~7pf7j}cas1oy@Ww%LX;3JAf&HtQtl(;f5>?#E>hr^z47O=0c?HB zc(fJUU?KbZk<}B>e(DeaBW41?rQ|3}WR47sgq~38`ez2Jb&IS^ZTMxtEzG3(W zkwx+=W~?;k$lwkdxd_}DgQDa5HJllQ$Me&F{++~^W&Rqz<_u!4m+`0hDrA>=G8|g*%8$Mt@SXBTd>n`m=xOLp~bHLp^_g5J)b{sMzm+_PS0|~ z=?w!SfoAK-_a-jBk`_mEED1guB7o<1uIsr9E2e>t46Mb5H=CBGRjF*y`Kra~lS zBPcme@|-#=adJwy0CSlRkgdq+42or->#}f6AS@w&)J&5SfRGDOl<7}m6R&_bF}Y3h z=FM8j@+qj!@hIpq78sEO!{JE={u_P@B{BizE}N`wu&e}Okxf@Xjt^{xp_i1&L_1I_ zK(MiZpaY7$2!4zlTpVjxAcBTh7Q;#ZIw;^cw2?@pv{Aq~a!%oN5p+Oza>ZG&uD9}o4=fr$A-q;E zjVsb!tAS)mOyodcF`&9}Kx^x$J&t@t22x@=X7B=i)W$(bz+5{TOECLK=IIFvI-w$B zHb7h+`1C8&`#_ni=sbz(xRZ=>>H&-;s1#(Yf;t@AER({ImRf(asXnR5#KD4mA2Wp9 zCaHN*9Etc|oh@k)ADaMg+`{oHcuzY9E_M>1JMi{!O2LFh{&S#7;`t#t82pKr{6SPQ z((|Ade4Ut>*lAo9z;y#lWUi+Yl9Q8zL4xtIwCk7Q<=l7#gbf+gO)Qd{x3IO9M`0b$ zpU18w&NXsUr1MnSe%{?eY4Ql5Mx4(STd9@^7QolgD;7uMbmD4KR3=!X&{m8f+0`Kl zj&aDtRYU}Fl28CxhOCyoW?|7_1D8$uW6v2&3UEHj-UdiJ4}bqloI0DQ(JCjHUk;8hEPyHV-WU6*OTPQ1-%6MI-!Q#a`i4T&KQ;|kp-llOWmtCY&fzzjtR~xWO8e90Wt!P{-k- zl2^6Ah972$4Q%(oD z^tJtsI@2I`$kd4C@Tx&;dNgr85K_8lK8087^KayxV8opB&VN|cP)tt5{9o+7WmHws z*EdQ_3QB{bbb|p%gLHRyNC`-Hhcu#a2o@28Et$`mAnM%#Xo(ugT%0y70H+iHZxdh%Lj!C1;(7ufxTF)1 zVFu;CPyq{+)dpq`N}>T8y)U3v6av=)GHD4B$njjqQbD~{Us%MW3I3>s6e!^iIHm+4 zzgU=bzyYGEsKe4yAI5F9cp@C~|yZ~&S(&}tJWaRbJdjtrQZKhz*VXB6u-YXK)h0W?B9{z(RM z!=e!93`Amd{|UcN`kJ~r;3-oF$%awjNuMSwA75Ok0q3e3(84tXs0;KdJLEqGsaa^$ zptB$fg3W3m?K7)s9TMJ7)e!s!4Eh8(9Z)*XdG@G*BE1HXK4Sxd0fu5U;|&*}98BbL z$c31HfuKT6$j*`Lo`5nA*jT85f!p;c9h{pByrG%-1D|e)rv&)-NkdS%#Z(j&6fBtngcatX8SEdqrn33I zeqkC!uDJ`;9n>udfra^ook7mU!cKs*7F-5`%NUrLBG%LaJUd-a#1(Z0TsMI&073z^ z7uUHCiq=6HLpqoMh)M}Ka8N`r0pesgS64i#MW~Ap+@b{#1#X~_Tc+Yca0Fy@3?h;M z(2O?lMx1LQT3^U^LMdD3ofa*if7k(9#0R+YX2MaR496_07syO`5{^uPSPSrl^?CDx zg$rdV5BQ~Pe0(6zfj>Q(Ju_3oP{0jb;rsF5fyobpvVvoWMnF}nVH}lefE3gp_z}wM zG&t|?C^SOt?*Ltanh3W*nKMTzNX(9aR7W8IVAwrY9Sy5lB|%qS;!Cp+P<9%CDm;ra zS71&-EL{Jtew_fKX92-&%`WJcv4n_NPuU>yqF=SKLC*~hz0(F|?YxFsj6k6^R96Bu z!&_1;0!rE~;F&`FHwM*p3!lp~D~%hH^}NniA?S_)s-6T{)VIKusgDEw9c56H3rMf6 zbLHL#71W?U0m2}mg>(OWAP&OZ!Nh6-Zm$wrduS$xcuyl#GfO}p>^6}5%7(#_ZC?

UC zo`!8s3Q&L$mkbn9#mB3YH)W23Jyrl~hBz2gW$>6Fvd9Ki(hxBdh+v^uH2lqTQGnlo ztlG!lJfJaF9kiVSHc)9WZDqrAkQMU1`vN8l!iLq#p{5zo;3STA4v|(1Duw}996%tB zAbIhRRv*L%Kso#bKAz0I<|d%0A#XOUN*;v%5EukvA3R)KN?;s7l@DY9fjd$y96ZU{ z@n1~6>R{r*=$e#kRJ>{eNQ^ef)&Hpp$%YWQ^eX!eL2>W~PH_28sSEV>@5^^r2rNWE zV!W}}1ELCOY(d|czJD|1v?@G=a=sAQfR_f4Z8DF&^1m$4zR`kzw}GM4eD8z#$G-pP zf^e`G5MHhdNUA|Om_TnSIN-ZG4(9C~l%OT7K&}MPcM5+AuK%1>cl)Q(`M<6J*T!xL z{6a|XLAmH2i!rEg2LXQb*{F2xCg847O_+zq*f#PrK)0X*WBf{x&0{nF2x97?hdTg8 z`TJ@+5BP7}$&{c$1JU(5HvuY~e_Q2A?+ zp$6Y2K!sxd*PcA-A^(r5$bj?=Sa6`XaM^YSIMomt>^PwCS>M^wLTdgq6U>Fc#{wow zV|4yMHwHN7|DTrvmIlJo!H8!;MYN!FRS~$z09eZb*#nRr0(IMvDghV|B>ND$0E90e zSZ0WBdprKWuKnlDl>iH%QlOsyLyiBxANsFPIs!V<4bUzAPp`U`|8|%E?Ti0sN232W zApiC8|8+Aadx&Px_6L9g&Hx}~B2e;|aZ9QU(nNJ+pwfFA_(d~Ojez$Gl9hglw&j zfF2^=@fA2?o1zV58kAdTgS9Ay555M>wFS7=%dc;tw^-nMrV&snj>0HWA$a@eS4f`e zA%fm2OGy`S;D3Bg5ke;Ie|)8D)W$+IRUp3XeK(? z>)~pRM=<#NKt2E40}0-Odjoo=?ztM`2{;7KqxJ&>qO%D0lCc1e zF1g2^deFoDeMYzcpPupm?BMl3EP(%`D*lIo`Tx9cTdc=Fxik>yG*VP^tYYfzwZ}sJ z=uvM^Jx>k&sxOsEwp8eYXw2Ty=(T0tQE2xJTM2imis;pqwVeZ+#fsPV4rZEvcBp`Z zD>5;CLwBuIL{%U&V{d6fL|=6 zJ%Gwj_bNq(8GdvgOU z^^z~F3#nUN>|0@fdB(K701w!#9GD?8SMWNCCnhaCTkT<` zZSCuaeM+Q|te~Qfw;h6xEpeL?DgEH0zgSyeQ{R@1?*3RYn$v$4eef6zoopi{Cl}1GiJe%(teMP{;rJqU5%KW+m z6a27JzK|IghwjN8TuxUHJte!UA2~i(`QNSombl}iZQUwew3MQiaB=Z)bY35%>V-Aq zl%)5mu5BDBcKq?3d^0~KCR0I+NzK%mg2_dm)$QiGcWh34ySGzaDdi>xfz18+SsH8n{WNMFO1*lbHGjW2)i zEoCC0zx@G*8~kzV<9L@yx2Rs-ufsynA*E#RnY@z4+7}%Q=73E!jVB8hcon_F0G$u6 zuc2ldG^(sbCAr-T`lN-A6$xWMe1NFxD=0_keGW=L_elU zGEZKSv>o+4GxXDfN>zAHDt%J%FTfsSc{x>G(U8|z_kk!#af#br+IxL}mv*tuJ~}&M zTm?Lig8SPqZ-~NLi3;kzVN_^vX8&E$OYSibeZUNWqZPM@IP#s(U^w^%fL7VW#FXKJ zSF#NHsb7~mlBiZhL8H;Ev~3Ky3Tph`w-99}=%$ZeVc8yie=604R|rmmmKUq8()e~v zSa?%9;9`5|4ekl{?mwP(+nw2)U-}h=UEg1cFo&Y`MTA9vGWn#VyS{S3LdnKDx%JgTtlBmg_;%KDRAliqu{ar>vurb=zJvwGC%>qfZzP^4H zEv3xS=!fgu`;;~U0&cEujNq;cLPJk3PSpi%k?ZRk;*J;*fb9GW`(O_>Lqq#GUF5{X za^Gmci1BlZu8<_73u|kC2n)f8afSbdb?-iJEA2_rQ%b&|q89&sC!#@SLUk9W)E^qX zM-B&f6$P!!?c*@HP&FwvDQDiXZ{yvtaOfk~p*_Q)vL0Ends4Lm~G!X-{ms0y@$tR-^RschoV;8xPr?h{-X{#J zh2vdFCZ^eiR^1Bq^)VY@brc@oMf{n+4lsYFZ?a#ois^bMr{?yDplXi3q>_BM-q%~K z!v)eNus45qB>M@Av*o&urB<$ZBZ;<|bC&&m=o%K|$+%^&#st)DM~q3Yl9g7Gk&-dY zu->zD`WYs5HZ2`LvmV(yGKO7MHN6A9Fu6!InYR&r?y;(6@@2g9yw;NzHYbIQI$&L^ zvq+FFEJ%e#DyoSEtx&RltF`*r)CW^ElBpV1j>cEWSYdDGoGR0R;oug-t~e!?OvM_a zS6K>V3^eP%E0-^Cs+1_1xVd`7+sglyS1Bcsj(ft`?& zl~Mt7UGd^2%#mKrqPO=1o1wJ7imf}DC#wQq_;UaX+!Djo!4610g0+NQmYv7Ax~Hib z&ANy*5Ei9ussei>!Se2%eW`|G2|qve4{7Lob#l~VQpcMtcRZA;QmFA2FQ^FBdt0N| z)_tzI{)G836wNp$P0a9$2qrKY-@XrkeGu2mB8o6Y^TZfA*wqL$sU$zg_3w45Yl_8D z(a%ShWJgfN+OcL;ESRkC9~4hlX>{jyGYr2}Xy8+7J&|{L( zE39G+>7K~-9&Yp-zqn$Hi5N;>m`Ih?UX{MY6t?L7+$ZrpIyf(h*XzgH&Q}^k=t%{8 zfnfK`c{;88G?z4BukCEm+%2JpMTM!TndYu!u`oQX^M9eFjPJ4B)TfG0jRx60UrRNh zTLRm*>3gGZS;@=t_Js>dY)_9q+RWIW9Ahg$qW5qm_H%0PRrrGw#wScJZVWWPiVzqA zzx4>De}hW)rx5-IH-c!rVd2WaNJlV*!(t2i!xj6$HZ+1=)iT8O!GYhp2a5kor%J~; zZdbwFw@+5bUFnzosf6uMKO%gQIGEBC0>-d}nm7(K$1I0&EV_Vc)!wL3M%vWbpE2A} zF^+ooPQdI<;*PD0ow~1??=QaJ>$}U0goN=F+h%V~zv!${V86n4t8@Q0(S3~N)l1w9 z352J+r!y1ZOI^EdJ1H4?SjKm>SA05rYlrKlXGP!9%A#M!#3m;&*|^xmo--7_ATz;> zC(Ell<3x5v%5Rc)tIyR;mh*OcpVZih;4ch|#wmpby5>;R(!b=B^?tp+ez06n7lI*m zejejA^>#8@p554UCq?mbfkwaP<0tS-$<%4G7P};H#q0|n z1meLF&X*Du5!Wf&G8d(vuom53>#s~qsl=UEeXY^`ri(_G6QV3U8zu4cfmlabAdV0! zIryguX~G>@P+OhTQc>#D8B0)L5}i?#$NTjW^1&LWD0!hWsf~Ls(+%#w>ywjk|DF=E z*~=rIQVO5XvEMv;8>7!U`E{5yGPE+{{P_8xCc(XXm@du|9$s<5_oj0V>na(0WWad~ zrwhdFga3YYMVHDos@aQ_C&^`~F#6&EzFkWp%-n_W<>#g!Lg>(Kq1yFoEh^1|E!NC+ zrCATklX^uxj?v#TCU`xtLWb8^K|5L`mwlBJNI%LYzD`&q=v@7xn34=@&zfz?PNBzvZ z87{WeWW!J!2zq^fR z#vC5dc-Dhku^hs!KL$h2YGynO%F5HLWeuy0TVK{qtUyXaS1yuM=#7{_EDfY!?e5Lu}EM2Tu zg1X!*BF09t%9@FN9WYrUHJRC{XP=6M|BNp9hgGaz2Ts9;#Dlh(Uu)4v=j}?u$O2hm zmaxuT1vfvm26E{%9{n(>@KG(n!uWgEf?}Vl3nHWT6z80?U)%3n;{~TyIyoZ!88be< zTm2pz+rjf?CzRtQ>R6=^512!O z53xU@jiDWe6=6i10rt;E)@oO9YS(uhp(Q!*U<*F4ElzaN?5r`tB9G4Us!_I*jC0+( z3CZDDtcwRP4?1$K%IcPm7FPW!acmcs0}i==%~+BrqnOd-HRp>99E?TXq#KSfX|Q47 zg}vrAfJG2dQyM)Ms3Mx28}piN8Ys|lTI7t}zi^^_oKgK1CA>N=`owRH7wwz0a1I}J?x)bEmR|a4 zj@Tx@8ZQ>7PdGEW2%>LZYgP<()2#lH7+=LPER8uuOB`_4tnD;#BUo-ur;bIAB;?yr z2HQpPOh-Cn4Ov;*NmE&Y=TDsR}omNCZ*G#$0k!6Qc+?ejg54YyM%wfawtfSB&Dr1?O(C~^YT40lDUrXI)m2Y3rSiyPm62n znlbB(n;Q`=7VVIDDkP10zw?v>H|4e+0@r&|ombpco$=aYU=goSn|pnaoWMfY7W6e4 zo1Y&y7Y2<fuL#wyMR;d~2*zW9q$53eubEl5ThDU2TMA>v%q#h9w4VK{N~4ix3pd z75MT{&05RJ{lvLzYj?#cyBDUgh!7Q#cc(olda*Ux75F)f$L(n?xv>ZSttujKz}1UZ zZV)Qql3jp$3wQU!xDS_um-VKL>N8GK&pioIZll`T?`jR*6`DXTsaW@poS~pZ(qnla zD7;p_j~6LluSEK_7d2;mPOFtR)?t?-Pov+i#>CE5kmJ$FlSqp&|E$`Uesh<2ecQwW3I(h#x;U%_=OL%;W0s|2v+AM* zvMN0O;qMgAU(x0Q&d$908?4!-*k~q2%E_hJ6pe;|rd0ERW$|Yjx}GN|OI!3kL`|Th zahEaPqmv{$)HQT-l+d0kGVO_QzQ~`ngmD{qn%p77bpBXCRPt0=ZPn>qa#wj}Jh<#A zRImu8=ux!iMC>};`Ev@Seq@-`HQsAG+rXl^QSU`jQdb0;#v`hV>cPJixXHvR?y*bo zZbTL3So8xQCl%*CsRz3C{=|`b_Vj;snydX_<5*i^CNrINXpcS-ZJ^-(TLx{XltLCH zL@>ELGxt{=`-@V`->T`5>hNHFeYo=8j$p$^X#R1fV*gVCBm@!q2z|turf~|$JhBRp z5HwZ0V~{c}c-#*QV3FA&Lw~Qse;qjs`8AX8DL_x~1xtm*hHEFDjX?M@-<`ZFV zG2W+x+>F?k(ngo7>rG%EaTt@7=YCctsVP4M=IrKdm0uI4I2&ETT;EtXGr-xRk+Q*d z)aP_lL;G5F^Ls(;SGQp7)Y*ygaa(yBCP#S9PX`F)FeN-|yN3G_91#NvsU$d}4@M}3 z-jj8icRJS$P^H+~dWM@I?mE4$*UK7_8GQ^^{dI*=OOt3^*B*Aw46#hef+?Kh>=T?k z;%g%@kFV)0@!KYP5_0W}`U^l){9E3m?*X``nn(i9r1+CJLY;d<^!d$~Do`nyjY&fT{gEP?6Qh1gkKUCKSt~Gh*H_jEhhQPl(fP_OXX=s*;Y?cY`JxmSt03l6CE2D%Xh9mv48J&CbH_Z~u~Ue2y1B?3LOc9DLSj zf)JwYkl_*My9ER!s=#&ocm8Yy1uWZWDQQisqB`d z@fjK)(n%8UACzmV^dSo95jWf5kpnWj#PT|idyUPVwjP^G8<3T6QkH;=`)3k(#rcOU-u8W#@Dih10AGzh{{uDI+M|f6JGVHKiPI!uDC%j6z4&@Nx8PrFnb1jp-J}8Ctzsbvh z6JLL@>pY-WbML_qjYR{!p?dy0ISN1F6;X3^;0)>@^0)mQ)WlKa4jNHakSFAY)Ibwy ze<4@xi8c~Kq;;M4kDPO6DSCHB?9Im<8p$q52rQh*PS{vp?y#7Tg&I9(Dce6kdEZQE zJ&gI75Xf9pw&1*6>?EgHS}=8M!2R zZ|WkGOcQ_-csY3dS1enWW7REu9xa;s{H`QZ!^6pPtnBo_{_ngzaT6xiN*5JRtRj5S zgTAjVRKJDrHpCBZHa+4sF&4KFapa4j8nnRAhK0=t2O)_M3Gt-Wm5Xei%LPZQK2Sx} ze|Nu*UBL9!oRQfx^6y46XG;o+V^3)g8=uh}lDqn25XFojNjK`@+!J?jl$qTfA^lJ$ zrPy*#5O5r5^^79WxBC8+hR$x;AK<5@gtN6oK3$OjTK+U zJY}uEmUo&LMht`rS@Y+`3`t-6PNL)EC6=KThbh>hD8B5a(o!#xafY- z!Wi%5YF57Ba;g*tSET*D*eT$AN!VakLuylzBvz;@F(-{~YeUR^_uUTyl*^!$@JcU?crcJ^$|^eOrF zvxIoR&dm1rf*fIJ1uNzg={irb!r18I3WL_aEKAp_V~&Otd-rFjeWW45C9P7No>Y7# zOISgOkjf&$j1)!7Ym^|a?tYr|<98g5q)c=QveEWEq_Y%HjvBk^FFCsL(@hj~6S^4E zV;FOL+YQ`z^~mAqx*_T_?gX*Mikd|NUbw%f%f_OJO&(}jR*ldVziP!wvJh>!!Ey^r zXZ~vzyT;JS6$rT&xT>BL;bvr^T1V^!w@N3oUDP;5R?e4ejqVB@->kA3*H!lHTz$-C z=rtQ-L9(nDwCFZBJFJIk|Hn|O9;MpPPs9|5Kl07KTk8>)r;t(tMiB&~hI2t6W(?>h zn6VY%Pcljh>W%e+#r+HiQocY+1tMSGkwv%cOYf* z1iICb%lsuiSyd%Mt{N_jtA%}1hKb*KoEBci99X6Ddg^A8sjR$tfD1M?%@ttgFIzizgY)b<6+->Nd|FK#-tU}TiX4c8!9F0a*B>(&izx0G|wV~ZAI zbfSLLlzz(`lTOEnSCVstkL4E>jcl-W@oAs$bqR?m!R6Ma*qfOIJ)gkd(Sdq>>HHHG zydJG87plnzJmN!K{tmia)iiVgs_=Qdae~7i#fUY3+_Tgi*wm#{8OsbnJS-@tWaIjz zA1(HQ?!GxL?&$L_#HFGN%Q@FC3`;uc8qAGMnX29Cy|$s5VbW4eCK|Tcc*0}G56?rZ zmuXz;-y3BHStXHMWHuTH;IIV#;x$j=`eInaRdlSUs7^2&B%*#c?%!J`&l<{U8Y;9J z;InmhFyWP|I~@NZlO@FJ^mA&EO2II)xn_7}3r6Y*K`SwVod0LJc+pSi9{F9ByhUU$ z?;IX4X^CqxeVc%(ItI7D34D%{9ZM8GJ_5E`sK+Tz=!|!OMJ8|6bt>Mm;)nAc|-B z33H&V;Tg@h!Nvp5Heo>>Cpmp*y+$<$liZkQo4{pt3I7~e*=3Kb4<2qtsc zMJk(PyH&yHTD6HPyQdvD<%_tks)XqWGT|Q@Ry_j{=T>ybV#izz_4wg#seXiyEf&&^ zh)4!XQpjXKW1tEB*<}|sefDh>_PSKQ$jEcQ+%1&V(o^@rPVP-4|8<}c=lrB&VtQrK zt~k2m7C)u7Uvb%Fg*ig_SLMdap~-8(Rq~g^bls0as*QLita8v#Lmci&F;*cwwpeNR zZNF(n!0*O>tp8U79J_*8@X?QMj&nqPbuHseqQ}<$LYj^?4{6_k4E7gq<&!1tWnmNSg=mluODYXpSdd@7-VJ7U-&`+7g6QX6>QX z^NJLc)r)_Y=&OhNx6PM36;Jj1%dNi~SSE4nzqBT;(f4>$V zcO`<_rOr?Z#GGdGnQe*}GRx-O1~l1?)dt|cWkwYK zE==%XP-Vygfo%4Ws=%?I!j$XW`}C_X6jg~VhPem~umhnl2qcB_2!)QXbP6hm886pf zOVX0n#uxjwygJhjstu}O{+5*d1AlzdAK@|e8_6g=?(}%fZ_)5l7UsUWZ7dj8w!<6w z&R8w*__(9JG_W1`8_$+GpAQ_m?%Z;bS-DJnvg>I4A4?n8KSWSoU_`vLTZkcK4ivOv zSTTQ{Eb{S#`FX;u80X^qr<#@p($aH+%G^(%sd&{mi7hk4m87FFpE`H@MetMnNp;Roq?h{Cub z6Ph^V&jg8|y67$`-hVV&oaT4`4GH=ZYKW>qm0K%0PKu{B2OeQ+GLQ2Kj^O*@V#J2@ zEWouiN+OTzMzW5EuAq3ybya!pQbDvykM|&mQ$*zT12^F{EZ5E-qAE#4&oBMzcf;N| z8qoT8Aw5K>52(i0Fc39J=Uwem5Pwj}Yh<8Rqv3B^W*uqycu>1(w+8u8h|qpR>xkJ^ z`p6mA86P#UvXi61pd_XkKIb+451I&B2m7KLO$DDCr}qPmjkHPDr*FP2r#{^sTW$93 zToDfMSw7}mwM4WeSq>sT`La3sJ*Agshki&J2 zDM>SzP$BHfm3w92LxLK78MvefFN1d+c^7p@d$V{Y`yW?E4%N@-xf$Pn$5^vNm2Bh) z3O;ca$NYe8(_u$>qPV=gg>143X03Aw>osqUD~GJ=%52;cpVLSRtIVQlN>BEbS>0oe zq1K ze5BJ{5D325%2MTs4|UyGP6@ynder|5W{v+=tXB2P=Ao6#rLBi+pms6ba5EVQycnW| zpa1ogqQkRyf4fd@(QFah$$Bq9ViFa(uC{aW0O`oszVag$U3vuQ5|Zc>|KG>-VVY^k%4Ug7KkL%+ncXlnijV-&MH*UIFy+5U0g{CcS zdLL|>C*EkY(C1v09R6hSKB4uA7P#%XyLvewje2YpTeDU?p6%wMaSp&hK1?~-{0kCL$L4W%Nd zr#@L|p6cMs(4zksT`nw^#q1}zN8y;0UeSh7NRUtUx*H{jNsFD%YFtHgSFOR;`o9T%9X89J zRoF%rZyUZO3Wm4iv2~M7@gmg z-|F~wWJMq4HvKMTQdi^CLQyHkoFSpJLq%)h;67nmAfH9?tK-H7jC}MD6yw+lBcf)F zv$`L_28<;0nXnF|_bz_gIi1!W-UvbdA^t4BTiKXi`^0q57I4|A?LOnGavkTM_9;Zw zXx^{k+_6^v5)#%%;fvi(iJ};f=%Ed zk7XKE_(WQ9dJ>1zkfrOqBk_nw#(73VA-Cu)p$RZEp{Hp>`IC%z!)aFKsbpDIxdN#= za_xWdMw}JmncR$G1|d|oB)FnlQq*5;?pY6LK4{8E*;;>h=Qgo7Ndvirs>;R!Qb->( zc}Nyo{>K|aB1hVrZ=B95S$524+-rw#*(NbhNO{~pIT=72`sX1+Tq6I4X-~x&9MwuzhzV#Ku7JOF-(?IV1UDqL;qecNLfewCiF%)R!)D* zJ?3LYjb&KPn~cdKE~5m@ZE7!uxJL!rR(W%n(e0J@0#YI_G20EWNe&G&l(6v;$dg6+ zc{zS9tB$BC479WBZax_);vQyia$)B5m<-X~9%G^Fwqwt`kR`PBr7@3GOO)s#G7eD* z9LdG#8KsRfRvBFkG&)zbdm?i-Y*02_QeMewX#L)a;Do2nH1af+rWfeiwd>w3GY&&t zek7}{jp<`mVdIq#q05=g(W&NG? zR|o6SAB}P?aJP9Ke0TKZ1zA*@dyBxOgFsKzuWwNbRtIOdPsx$Ix8uD-4{r|+H?p4m zrt`V60sA2nQgku0uwjY2v~&$*u`XFllQP}ghY^GeRg&7g48tg5I7Bm7^*!t;A*;e& zWy-K_IL(p`u$g0Vm9n`Qkh_S79b9C|k?juHe3fjAbx2At5@$lZyr;_V+zR@=#E=yO z!zf_eKNa!(E(2@G*{r8ZEz79clx^|h8@L!a$ocRY*pOm{#t|{gsOW^Bbq+l9(i-&m zPTi-z9{EWP6`ed!;?U5OxgVIthE>n5Mdp zAD+H-;VH-8!{VAZj&9PHNz@ljbJ=+XM4FLntH z9o5Wk6Gl5Ix>HxVueEzhs&#CbE#_VnF}kZtYQ<8izW%|>@5l((ZQJ44)cqC!YgqZC z{7#8N+0RQ3gv`7U7nX%xDDUI*2Ku7Ii?&$=gj3E^`O$UIy(jmze)U?NxxGgBsgCQ9 zON=J$t0no`8#wdgBPrw94*K_428bfobB3p`=n{_bO_8kUIYO>@aNsPJamKa7>!v*O##c@VYQ zoYmKu)SG<9npN(?LFD`@TC<;SiV-$>CT0wvbGn?Z$kf}}f672er^bE)8R3;`=p|V( zyWOM*`Keq*#Br<_!9AC9*+>ThVG-3ekcB2xLIFBp#pKg}MMkb-Ef(gB?il*cF0~Hf zTo+eF2)bDXzr7p}yOa0+MMkOrBBMscgKI&`LyMg-;7kDgZzw!EkaM6RY&nT$dVY=- zg)&9vWwOT)z8`Ey{1$g!Cs**RX)Vq}o(CR>rVC?l#YQ&h@Z4fzYIWB)nEHrlvhE=z z28Ksoch-~3C+v;WQmO?xHGch>=%yQ!grDUa_&kWmL~iJ8TB_#RSKb*BeKcA_-6S(J zY&qe^!6+9Sx~XltO~e$rMQ4uU%J>5`c%IKwl_&x4;kxlu2L}f%Um_m1*U}v}d@OW` zVl|bV7Z_4){5ty(mzTx0YLsq*-S5&7Yj;kTF6b3ndlyoS@1(iOvpz>ZjXr}+GHzlV z87X55t(>yPJ{K*m#_Aj_d(*XfS&(($m9RD9A~w;GjTN+{Rdm{1m79()-t|&bThDZwSD9#rYEQ)Mwgn(+UyY_&t#I)V6IPO-Hw;8 z`GT)m-nEugQgSrABojD=VuPti>K}^wH9a$dj$`XvdUEG<+<0|Y4g8$7d==K0{ht~b zTDW4Azg&fgB9B+myql0$pBr+D5Q{G3W0cY((q(O1cH`@REW$Qu7Auw>XAmzYc$Sut zNRqtkKUc)(Rg^5uNAq-NA64)%@uX%4cci8nnpfz%-t+-$pi@t-WmBoycmD?86s(-Sz@L_%ZF5wDL&Hg^ZXZTtn8zL@&|(yBmvsr?HJAQ26(hx zl2UWyESg%fU$7?e*<#;l7=j77HK%K2aT|??cCsf$uL7obhb%U|2YV; zH3bEy$qnDi&$~_A1~q5DWEM!xerY&LR0`w#E-^}G7nh;m?j`z!*Co6$|8?5yt(T>n zRn{^c99gbibn`kQt*jxtYrWe(m+!4oc=D(Uom;aWY|vb7Hk0gqM)o4) z9w+k4Ntx*9S#=mI?uzXOmn@d9D_@T`^M11(dc2<|IWay;n!L0|uGtaFZhC+8@gv5T zKYbPaV<#_zQ$K}je9=Y(UQuNO-2utHcqNo#JS}<{2N9l@B^yaGTLS8n?sQ}S&~}j@ zFUBN+%JrX=rH~{^V!Ila)Ww#o*yDIkBwDFu6f4{^L;jBV2)FT+bEUG|tDj zmaP|-zmBl)o(bHY3fzgDUj5n>7=qjfo$1`xWzrVEw4ViOEAqcW1?;%@#?o%td{%9K zVcib2UcbugyuDq#yYbv~@upq8`@ML_6iy#lM@5eG%gB$9KbPMA-J}b>j%Yq(?sp!U zU9R+RD*Y`p9b3c~I?rgrWF^*3H4Jc_&wX(o*HDH~r#nWK#%qHiujMtnWf@ms<+>6&tej5j8x#4!(`;WMYA=5|g&NK| zb&7jqv?7SM@OuyGP`pLS_&WMa;|>N7cO# zW5j)gGBxfxm6`vnfhzQM51Qp>8>5QqN4B{3htUlkYxueOAz?8+)VW(hpTEE(zou_G z`1#t>e*x(egYa-rI-a8Hrw!zv)cc3|6(Ua_{dmP|9{KpbWDjmo0Dp7t*9qzp>$+lu z*YrWEowpTjyl>Kexv6ODFmfv@rjy zCn2Q$;=Wy?o@*3GZsYBB0%lgsv!b`nxh(c4*O+F^AZgO=`Jwj1aqMrx**1-2`gmRA zLxx4Ri-U8P^9E-dHVP#9daY>I4G}vbSZ*sNP<#@#`yn)9^_qfI8AxYPNc8W5SP9`^stRLcuXf+)foO+ z27sx59-u1+ZDC9W->E#81-+?xI(>AKT~>8e1%I%HQ12xT#o&vTBJt$hQb$qiU1pHC z9dDrtY^zFEsz0E(1(hgIxm?Kg`RJ&&PN>d^Zys-)C-^J6 z6($L<^?59vxzf!FCYCWm*wX6fo724#kf-Ic_&eq*V~Gq0g`$i4dg-3Px5ZycJ~5C` zXHM<%**-GHT>L%1IM5?-Yv;Y!;~nAiE9&le+w$(G{+%WN?QxVoYMFaN>v`=h!FBE@ zj5cxplLxoEKEF#NdR8%3z0TEr*ddW0(!c%gbKTLz|8pejB=2@SjUsGqvB6Z)x;)+Of<=4Gk5z*$0xLHI8{*c1QqEObsb_8PoM(7x7MXBk7!aModf=3 zUD53a^T!`4mpI5Cs?? z(>eDJ@djn8p0@gQ?mzp1Od>G)0ofd+I+-!=Y&hDNhW&yv16azaGfa|}uv zfK!kZs%njD**tIto&yLfQhIpAb=St-OIEWs|t-LCbS5wY$d)SGv$tZi2-Yd|E~vJqAmIHXFf@St2RF?`v`mY=Q2!}-`P z9%jo`s1rha93yZY<2iKci7k2Z#z#Nm`LF(=Uzn}eNv*v$zrNZe-@lvg;lG%ME0Xs< ziTMS@pH)7MR*p@yZxPiIRO$#s^im%DiGU5zI^9NM|pu}CxO}^PI`&<(`bD?pZVJky(T+# zvDq8dOU^l7xkTzgDkiZ~2eV7z{vI2J^HO2Q91J{!gW~(v(WU&1X^r8vwZ|6(3kyLX z^&e^GY;AcZ9bm!gPug&v<|x4={We{UKP-J8J9^6DC)5t;yOuT)s>q0na$bwlHKTDW zb;{O_#CT!&>#xqkSKZsXVENLF;T%Mhj-dNYwF>*#NyszuqYkae+>n_lukR>j1lj9f z_?zha3i}Tl_ynu(nR|rTb6q`Lmqs0+jCz6?hI6lK!Jy6V)R)be@PP%+CIqmv5bUkBec%3(4UMStR&#CMMi2|)lYhuy+QFt)Vd)QQly zS`VlRw;Y=e=*zqqTo_D$Pr{LJCwnrsP(Q|gQNg@FkLalipDs?B+00kJn+#*ciGG-(psPp8b~LEd`WPrk22f6Xz-74Sd42yeX#kMdd^y4@SnLv{Q0(r07n zs9OJ~I?7W%&g9p5>+RHaxX&?b>irua)-=FbYZ82yVvLP_sfPFZZ}W}#{9>m35z$sx~GJEo&2;2vAqu+(h(G@S*^Se z+4V%Lj(sP#uuQdme6J^iHTfuxb`)V@3Z{7&9d6%b-S`v)uBhRxewaPJY#c7WC)d`& z_qsv;XNn^5uuHSDriN%X36MO(aio9&?b~a8(L*P7sc_b|*Bt1E{oLJ^+@Nnd?6o%r zDK_0tb$!>*WYWY!_W1FOYq?(Qz>2I&p~krv6NyF*yIyHP+yK)R$Q7Ni^L z?ha{Ky5l?Edw<`5W?_h7V>tVq_kHRZ27YG__}$ss$k+KW)ln+~twb++>SS|s`u80I z_;rhu%{L*HE~B|Abo;Hh-* z^$}7-!dg%2%)Z>_u!V9Gs$x{NmYk+mF=30EjrrReD%aUps#Bg0y=&m+oob<;EnA0f zS8yD5)?2xwOOL3LGL3HrEWO$gImv^+J8qk$zNGJ_dbxS!WtIATh%LWmLxV4Bp+9mh zfB8ueNAs=<>K`7!doC>!;c)mZiQDQbZdTBXT6bCr?$chH^rOXw4vaoaT}%_YA(JCV zlJ5y*hjNggc@7RO`|<>x31_o)TeST`luJxfLZXNn?LM)D;L7>sxV`oC(XV7C|8X7r znI`b6>RIbt1^%1y%a!pi&~Dgry=bqdXGhyJ^>K({1mx@pCj}qS+Q&hZ@+W=i1Sd$# z;@gc>p{NDxpLaE~1xBqfI#=ObxKGCIA$K!sd4mda1ksUagDx!>V2Yy`+(8HI6su2b zW{qtFF8-mW0m-)(?NB_#j>9m46Z3otN3opv`+1f2cw}r6iJbTcuVf=1@2oPV)%IyC zxSbg+Y#*&gW^>`YnEu4u7_`B0##f8me+)sTDxKw?Uu|CmYF8<(o|=b!JeiXiqDM#s zKp5#95rEs#Dq~}Rt1|YfjC`CBWqTk*&Z7hP18->1U6HfjaJKUE8dr1Fe=y5?kZ0oU z9@LPE12^LXge(99aZ!7{%A)UcynS+czZVp(Z0l8~qaM|&Wn%+2VEzU<82iQx4Q2TX zuMQ1fp~-YWI3W9@{e@3V?Ey^x0VdSq&yMabJ&w@+mB#(uoOwVRvtraH$r6OmQd)pQ zqcZg$fp*hS$?opIL`{$Rtyu9d*W_^sj@Y$V49J~HKr0>3xHTq8boifx(MCE z!OcZ>Yz8hR$$X56b{Ej?J8v{FZArN+W`*-Nt=AqtqQe=&KNLw<$qO#g> zKc$9CdHowZ7)AmZvaD8Goy|b+mg_v;p3p zruds$HLb1wYt_AGXXC)=6L>%6cjA^U>Mucp0k}?X6PVKH1{z=fv_BH(pl3ebygaX6 z{GE23^S5Q9m*0q(E-QQJXQHwQnVIYa88})<`qLF04icYxQl0UMix(#!^ zh)i(35ZzaFe>F_^EC@1dyV6=-c_G58^uxc-`ZV$9Hk3H4cqV1yUYp6-X zO-{AObq7wFZ{*CtCxQKtip+i7ln;t{n_`K@323~cLZVjw(xuE$xJn<-v}>qCS7lR9 zS>%tc5D`uxs%4)&A=@SEd570Fj2MThGjt&HEhc45aufUaD39`NFJgC@-E7V0Y|eOV z8TvKw@!K`>d~YV}*g(Xq1HX9BlW?lCzlS1MOlI`IbiT}A$B+p&_D^I(*|Kqg`DV{E zFnrL$V%O18i#}%0KS%m1Ef9GF9fIafXggfN!+A!Z z)TfPNR;$CooR9WKo{6>0=1jizH*R0v$0gzBTU;tn0nn-eeq*boeHEi2vRN%gFTnG| z`xz6QEqAD&P-gydNlQviWw3sgpYB3{^QjR@+cSql0G}&Y2npJ>yHIeL<(k3l*juwt zrcu5~Skb_2l9Vt}b$ZR%)7%R*r$fmh&CL(vXMu+e0f$`|&QG(>3_dXtoPkG)Pd}$% z%8^ynP+ad8K@BLgxTR>mUMR3BJQP>(5=*kALX}uSoR4;bt3IybZdlsFc5QDsv`g;n zri}A;(5L-iV+rjb^1?5hgIAomL06}kCk9VMSl8{}L|P>;PfZUTB1M)qY@ml@nDs&M zX6t`eCbXkGYottOn@2#P?46zM*UMyqSoUN-BcH0$0`RFN9<3@XC2tM7s*75e`~q$o z8pWT*YF8ZxE&TT{f-Y$FZlKn91Y15|zi)~(_u3zA&n>VRnZG~I^=?AD3zj0R_~sZ= zLDVYB?ko81IzhJxJp@fnQ2fugr7_r-n_{2re;p`Lfy4bX+gq)Aqt9GKQc{3FrAFmG ziR?aI$jZ+$d*+N5;;2VK#k;ujYJhjg)9H0vjhZuG0E?xspnV1zXf6z=9ib(&jC5wjAG2)t$rWD>`TlN;lZyYBA&{~m zOV1EI^PYoQu1Iane-(nwYz|P((n^Tfe;3vk^!iH6nabZ86vPzC?p@kVU|Z9-5?&`e zp5JCQbI@r-n6YgPhH8ub81`-eU~OFT8cY~!)2x&;p+SE)Mts#{U8Mi~|O~7E=f> zMrZb(kV(af&?D~9YEZ9dez7n$khV{|L|u~YnhoM!xNL|Fb8T=gDvj{jKb3>mQUd9^ zPbD=XnUjB5*=)6135*w62+@@;(D)gmn~#V2Gx$wD9=ynUtLe|CF2A2@D`LG8>*jJT z_@xAh?OhGdxFr({RPW+xFw{2|WK*~_xe+hfK8(rEgo3djYY}6;H8*@;`#DQT*`M;* zO>NMn@G}#G6aB+J@`RD7hJ+Ag^zVugHZe02(vVd1gb@=26^XsUe1j=s+o5nZ6SQmyOQ=tA?wm0W}_zBAgJ9VUG z)~+hLWnQx{Rd*jB5uKDJx<>}AnUntb|6xhy&u+O!mGC~sk1I25JJ`#(JKbogHZ=6# zw2ZlDuwycTG`mq2Ip_)+Z%^$*&jvP#g@$*=T2`~Zp|RJ#N?pJ;UcCE0)!C=uKs-0k z(j-;BqSC&WFi~1?9GKPeVoB5rQughqC{%K?X884M*`-HxWS0}{^WX3?1`w5&MsrOw*+I zmH)^;Jhq%Yke4~#H{~G#-@MLo87|N`A zn+!F79dsT38WCnqY@BTFqW>E7jsPS8^K`(pb!J>Pr^_D0$5q#cMy36C zDBNY%1dhb_BC36HlM_mu8?l0Kk@(tjZP0IVz6NUhX zmi}6eQ}b4tG#?HB*lLa;%9%yLwRPZi*M;O?M(0)k%Vn{o|Hd6~{noP&7tNlJzQs&o zjbd%K)eX+@CdHb5Q+aWn$8o=wR$)z?bUw66=QrX(Gf(Eqnibx>>;}rNHNxxT))3Ux z9p2Q`?zXyh*Ah%3@Edz0#n4LvlsyFJ<%ou!+08=s2~)NdvrusTsq)FH=c!EhXvG z{=-5jd6_TU3(+hL8o&ZgI!Iv8>qDQi|w(tf+;zu}s zDpQF|TA3QU4#puCBuS|Sf&+qdf|(){P4%oC5DHW|MiDyfPDw}mZt?tn zpw!dy;nwlYqo|YUZ;l6r%ocbRHp@mQf2tTUCu}I`@p!6}VN`+V3@Qo(R&-4`9#UF`t6%vRDm(}1I7FM3~aUi&Jj0f8OwiO(h($H zq5w~F^i*p824+HpASVX3u-Xs8YedA4?UqKvYixf{pMY;vZdDXmz+pbz-Sa)#aWBx3 zw~5T5)z1QT62GlA0S|rl7|QAtc|71917d%5zJJ8s%&GS(y@`hqdp`y@q&f&$)OjL4 z#R*=au4+~^lQYirDw;!|eeG8Mt#Y7UU64TmGK0)IO3DO){Cp|@O8N3OgGuXpZs)7H#hg$1>`A^VCQXU!2F zuQZ3vhP+QZb{w8f{c!a6mGp>aU8S~+sL56txL9)`l^Dpiq4s_&Fw7syxEtX_DGLmD zT6|iHpc%f(%|@}ljkDESG(=OJf~ld_*#((m~8b8SHF93C10hA z-ci~ap?O1cMhTp|hb+o)-5@fB5>CUap%hc!WhM~+K%_6~6sOxtZP3hx1+P0w{sK1D9S~quQWqt=xF?#J9XP8gT3TrHIy!>2=HM(n1yMl+xax2< zc;0-Koo+&wmY(deL2O0rx!R|dAttET!{UPt2SR`0vuIGUd7@0&UY;KJPVlH{ivnVz zMk!k{Y@9p;DyziObBgvJy__ZKJNr=`bZmq=Ls!Q$mQ}$Q!_|g+`fd*oCsM)&U*iGd zh_&At-BtNBY9AfWg4{6B+;=8g$-px*AM)ZD!%(uBZhz70*B1ngc=#kg?bRBbWm^MS zGn_c_t35E?S(Ady-EDYq?AI~fvvuk@n`@tQ20?6yQre6n^K_Np`pb5iw$AN|<={65rqwtLk1Kr^D(+1Vc>b@#OuXy*jM4Ft~&b;LZAnZ ze#P8;k>%kcc=+}5cr7cSU9VhSjqu;NbNlUl{2wC{`<|0HWvH?(%pL@^p!f+ejQRkG zQPX*v+n?~~BdXLFPcT}N5w>F%hs`Z-uWbyVsVll@C0!w>lokFXbbHPHpMMR;&v0tB zF_^0viPK4(eWVd`K8 zC{S)p?Xtl1QsM0C=z>Y-<#xyDjIl;j6VvqjM=XW&aroUrQPB_9^ct~N{HcNSv2@|4 zuX%JM(_BF*y0+IozCAn7Nclgb7kEL+U15`{+m5PRWN1qj#k&2M`4XXCQe~io(pOe9 zaTN}+8e%>!r{hZRo6`a0aHtl73UMad`p3SH`R)*Tl+=~5`Xl8%KtTfgiH%=(OIGc7 z$Jb@n^o*t??Kxse`u%%j27XlC~qUF0P-+0{g&H=HE?^hRh`N7;@$#e{2tw zmVC^ZDkr~2ez4eC-UV@(_!Ea9ThmG+G@!P=RTy7t!)d!JCmKl_)C$7^xO=hSw{mew zI#sygI8g1W{gn30ug{a=rfWC;YyH4p-5_!9e|UA@b176{nB3 zVqsxri6|esWj`#x9?^`_`6epSM~vtdxq>kF?JO<7pvcaiA}hZL{gL{cgzOZedV1mu z+L`17VoG~UP}*+qQ}CSY+H?J_^DWxjQ7tzfQd-n?@@j@b?I2`Oj)OFD#Fr4qAeBAm z=~q4GjobcL>rMV;>tbHWqqv!eWb`5m8VYsVTP;NnucST~Wv#;*NiSKX#O@Mfk<~F^ zBuIfiybkjTI_DDLV+tellS47=S6nmp3lJy|Qx|j5x8tUHj*yHy!L}h&J4LF16T#eL z&kUeKkYXE0D%2t-14HZZy4TrO)h;@3G^s|%8=P=BtX4c_e{jN-e$Dj}-?J<+qF#aK zN5Z>TMf!xqjKSKFDS-4cu}O~A1f?tZ8DqFZQCeqUR&KONLstV_D@!{Smm477iZZ6Syz~2bC+#f@z01LD8Dlaq z?|>{=5#9W<$52>&U2OJer~ENJ(^KAN)0coe^=ENnV4eL7uaaDRfxBf(^G)NGyTqUM8@ZYMv2DJ@0(cl6~F3R z2C^2q>Vnsv&M|?-=W^==&ixeFvj9sXU445oY#;#>-=aoA@aim9yknYnKLQ*OYdk)d z5GZ-aBKJ>qQe7pmd9gSu9)bWNMXNTC_=cqS6}O84M=e2}ahO5FJ zNkR_ET5ddO+m>nSS~&EayP8u+(8$^KSs3>bp5Vw5+A&t;8nyGYE_Ud8lFwH!cAuBC zflED+7k4uzqe?zaNj}Zrtda+Pu~fSupYWCXbnDX^ume+`efKSUgDLQwDchDzrUi6c zn%C+}4CCvkTunqMN3fNm=Znx``nq>BTB}Eu4;C67@Jt;2^3-o^H9^hhfa$V!&EM$! zK-RU|9qI|6qm0HVP8^}-*i}t2Uqh}T7v>JPvmY3$DqGRAhM*4~3%wx@O{MDcwsF+i zuXEx@;2y*^?*uzn%zX`f7G8G^WwHAhMK;<@CZmsEC(oi{gU{@%ew}*&PCUG98_$oO z!Z4vAL7ymcrg%d{C>%xkh6)ecHE4e7vC6o6>}0YRUA%ek3CeWW9f(s!mrf>w)+wBM zF%Jn}pU?+yXs}8J;RxjF3f0ZNQAGO;n?JFblahyg$@r>Er!8BGhL59%N4~xFNt?Q9 zc%jIHufExqq%wp>|v!YJoCP=sOxk zl+mq4q8Uh#LUK-2vx5*eWBi0|L=5JhldgK!_e~^d#tRk{^eEtam!Z~5l{bFgdH1pz zdhsIXc<<-^ka4?t((uvqa$`!07YYYKv>!{AMjZ0EBASaGJNLO0t|ZKU<@!f+>_uAa zbf11F(26uWu{o$LBO~A;hO&5C{~|jQ{`ul`Kjba7)wa0vZCIIIPu7B|XwxY36AYt2 z<%zK8&-Q&8lod6Ty#O@OT#Y;1tm!~{rSYw>R$5XdL!_xnzL8&Jo(zx1l0%sxWQG%D z7wOnK1X(mS)S-YV#7#+1pR{&R-C^tW9)#Z% zq~=V1)-lK}g&XNPbH!z(g#`eS{6a=KM>2mR$IkL}jplTK8eL%jR)Ei_NE~)Gd%yvd z20g7xbQd%qe!fZtINVef&*J$(Z3+V9;)~@$$h>NU+U}XuY^{0Za@Huz5Wk?XRda(1 zktLwAn}HIez86*4jS60By~8agRh)e_%A4xZTO%TG&b*YQ$NviFt#TJI)=gM?1^)h`ckB8-!;;|5y-!$2JuUL)lSjXao4(3X9m^Ew`HIz~CqCEk!dvAF*{nLM`?= zX8!oNErqvraFW?FB~Eu%3BaKqdVm#LdE^9^NM8rt#U&5U$I_CW?IH5%eH>+F-8IvC zuUN21M@H0J1t21+=l=8jo|*DeDJZ^*j|w31Zy(I!E8 z7L3Xc1}@MsXy=L|sD-7(Fdhy7uM5tW!?c_6S=pA@AG18J_fZ5Gv_?7Y*K^NgFK3d^ z{pj7#6O#7|f3F>r=}>fD!h^@3I#? zje^PfM0`sJ#X_uub5xi2^c0-IEx~1uMXN*?i4pL$0ULfI+@L41y_iQ9fLDdDYN}?t z`R!7M9=Do8d$99^e~@HRpz9LD5njdN&aZJ`H;?P|k4qR_n^s_&v+mPRCEZ#>DXl5c zW*>5os8ZhLw%0B!q5Blkj9U&mH(5$>4vku_vJ_&}C|o5sQQ=tlA_-zCX!qSAG8KqF z`;I-pe@sxcl$=SPlPGHG64DgC zv@6kpTaBI4LqhV7NaFQ9O8@@D zxKL%P^SS<+Ugg*9E&VfZGh@KaFMhfpv?J)l(hk^Af(6BSXiu-eBMn~(=gPO% z5^rwf5i)(+`Y~7$+J3tFX#%oa08r!->C%dw10)4`GQ>90I8P=ez?0N%?U> zJ{cU4W-j?gp#*_Xhr*gz!tRk2i||-0u&?L7CwgnMkcwJGQq~SR)eR*qSc0Iurs&}t z+t#gYfeoMROZ!!{|AC&Un=n8vS``L{EcWL6k3gni2e&`JDjn!}fZ77-C!i?Q+pELn z1`U_H8JrBz)}hDWi`$SMOI>E)v+u0z)(WrIsHptXpZH+CJD1GYtrzQ-`lpk!uURK* zGG+f5ut2rSF9x2&lzp6WM0YgYXjQORnLIQ;3HjGHN!#N#nSwauqklD*TQsl6Nc-1D{NJ3n2Vu zR&vL7XiJE3SIUW|y!6p{z~BbZRV3JeTl?+s6k=2hB`pH*v#nBe$wt9Pd%-2S_CeY= zqq+XEiIbJm$LM_T8}%GT>oJx>(tyVhrLA$)Vz9HRl!&j&an12H=-M$n1Dh(@j@7kDzl&Wvv;6R zt?izs^9qvp8Ud%sfv3@{*(+{eZ;NcR?4H5?fi}IZQv@=netXx&{mNja&v=FI$QifN2_F(^I6uSfYtuMm zUc6M-ID>ZdOUjtK10)bhgFW75-lb-6>K841q5c{7EFzs-dv_GuK7101&=K#`uwDmY z5&D|Xyy{F#MuX{Y z!&_!hic_--s~etAzKcXi3HqLlwA2P1Nyo2L2&Rzzgc=FTk)X8h$uf6g(=hO%*8h-v z%9dyk@c<7X_RNcFyLsGb+5nZsICI#yL2MfZo$SU(@9y(o z2Ir`sbW+ba{hR&ryd|Sy{Ad$y>)E&Fd=1xUju4TIVEsyL@2+b$mxf}Tf3W(s^b>ou zCv%FE61Do;b|5)(mkDEmkOz|l`|s4ZI#hafUoL%G*Bp>mK)PUZ zo1}Doxyfh#S5Ke79n~lDgmTX>(vf2f{H?F~8vthx*fmK02w}(`V-f7MfPqx9%qlIq z36{O}5%=ll(MVeVZVvW~as>TT0PU$*Lv2MMk|B(?9*{SmNMRU-r0sood;G^U8uQ`sUr3Z`*`41qUljFh(uYC{TYZDp zJ`nJ1@mP5hU$I8Q{&Ol2lSc_|n04L=0m|Ni8ZH`~{Sx%Ln0pJ22yL~tvDZ<^Ku;<# zUzKM8%sJqV)8I-2Cc*t}zDLw1n99)2phYloc;vvR*45Q;Z=c=jj1D%?Y?#X|n%HfS zJ(x4VP+3?bjPu(rlC8VEl&>cf-4^FB4!?b87SS%gSwSir>^UZE*TDfm`Ov<(>QR-Wu!pkm?!(8)6$_F zX=|hB{)O+t;^H^~Zl|c9dgJv`!aOa|%jo6(d?)#``*@r9avN?c`PeNvymluVxEo!} zLt*q7Us9?N*}q)IKJ*Z=B0AEr)ej_DZiA_BgNuE}b|r3v+n^6^&~LjOJ!M_Qh#s2J zQ&g0=<+2Q@Q?x0SeOtmd&E`S5Cp=l&^np<8i3V(HVZ#T7b|gms5)iZf2d%t8GCOo| zY$?YymR;<5mI>gh~?gi0f=R*ZPm=8cw zchO~t<96g+>2c>C-LJCcZocuGG3g`P63BQAc_;9+>-6j@^Cobp)3$o590RBob1*To zCx|GA{x(yZ=EI>YWrZ({ih#dVT7+UdDm_UC)-DPA5y|5RUg_jZUVh<=-`svRiECqN zo5A_WddS3ViNdU!AOxQw#fm-pu?Z8Q>a?=(EcK0o4VdU4e_F3PQF{fX54|mTF_a*e zP(eARebMr(c=5@@!9BY37B*NKc}uF6N2_X{M=!ylnPQVCB(2~|O!1}zGCpg#u_!V= zi_@D`cFm}>+dlTZsX2W7wb6e4b)O3XY0;vK1*SsbP6-bMd8j{mHh4!|=Zm={_(&n) z$)Q8b`j;K$!)PvC3!x{h4pXoW0I`u1`DGa#@f%+s!~=e<9ls=I?q-z5Nb1k{+3THY zIS$>`;%;5v{ul}{f@vdAc=SmE=2+7xFCGnkDC!cO=!lqX5n_@z@)XZz^~>vmxnTlx9anovJ=#s@wen8 zr6-(Y8<*`0Z4dNJl@FLp4)RR7A9q75sYDP9qs*hWPIe)V9Scr<5Y+ji3~d6rqsFqh zV+mte!-`&%LG_PE*uSrmrsHUBkNEN4(lJ%W8F zD1~i;6~~e2)qv5_^}i}c&(Kb}rMEkb)Gz%k0Te0+Xv=m|tG`Aj&`dq(iL=B|?YXIeqqYyxC)cj&op?Du!X#|VIw7-uT zXC$XME6Wn@nNU;;RR8}Q&Z&w0TP z+7A>KPkoH(H?K1lT(WFLU37JfgFxRE zWK^GdP4n3l--Z~|5oI2dW3b9F!aGa56H8QyJVq~edOT)fG-m4;e-kX%N(1vVfus*w zXmRr})Wh z;B7B=ulZs;eXy~%kwb8;7RYq+a*f=nRU&}AmKmR8E+Yn$3|3^O4Z1R!c3b0uM^Om{ zRdjefbchU%chjahtulR9NPG;x$g=%?X93PwD|0>{xMy@T7W>$yY0O>GKKxSJ?kD0< z@LD46XhfkF$?ck2{Z3%Rh?MS~J@(zr18hKDEDyh>MC) zraIXUH`rMjEspGz@b#cF6P64i6~aB4zHuJc3OvWkgtwPQ3>Szg zQ4)tI0MF#Tf*+J|vC;Z`RU7FfD);+N?j^gC(#aVOMqwvtEKTja|0;W(e^mh_>M)g; z!W1J-8nv0m3|O&11wu2Ik)*b>{O=t>C5J%`hBoR1i1&B64;N}@_xGAD+J5Q7e;%8q zvtQ;$i5l|Ruvs>0f7I=$RCvD*X!ArcA|2tvynmd`5lvY1NJvbv4%VfvzW%(n1~Xi@ z;{iw`yvs3pQ~eG3Q5hONgDvwJ>6h+(hYR+8T2Qvzm?PPvd&)c!rqa~M1j7sRiBEDK zn8g^QAs*|FGzcYwQUtP@9-Z$&Rv(jnZ7MoN z!e?Xn4kcb&cS6<;JDR23#%wVv2t`7UOU4qO*TZ>P5C@s*ros(vF&TY2&g@Uu_1$ec z8+ss!Vk8|wYH(j1Cz|MZVuYee;`m)z1v~$3Nr!R_<->>E%oi0IfTfi}D~_j-I-ws7 zVWtU7uVT!8<@|VeBn@;(M+%MTQc;cnb6E@mA% zH|tROIOZ(#;W!KIy5`Y)@3s85$8TzxEpe)osdz@a>1^C4GrY5(d=6qYo+3Epgt2e^w?h(2_WzsLywqF{Gp~0rW$bCaj?Q?1 zPWU|yJ^7aQ#H2y;zpQ~5v95p;068#Nv9je2Jt8~rUe3veSqoJ|s{z#0cTjWqS~3t1RdxNWT&v}(;3 z*imV@NBf2kNLPIU@&F)87spo|ZvZjH74sbnaFqj}vnOfIFf^g|Gl1GOTF$RV4Ci>O^3JMes+#REn|pmQ(#el~RM@~pXzj|e8Ys2trvzND`+K;W zzGo@+6IbaaM&NjDSU;%+xT$T!LvZr?rUdQic%?iszlLycFT8;O-J5+kHe(AqNUdYS zkoma)(=1P@MTfL&2VjJa9$69%a9RDyj>VEF8G6IfNK`Ljva&t>PUEJ}hLT5_$wEYq zpUvju2M}I8A!i0o5|u`M2cNJtD}ljJC%2CovU>rs1}-**-{?VK$y#<~JA`e-g@~i#>6DBNIzl~TL=s(8 zWrH9Bd^4iWpQF5PZg2Y~N3kC}uuaCERGzY+cT~^4?YG2f#SX*n&cCPQ$zKA2YyYg) zlvhGujO{DLzn`gso#Vx|Jf|>MFgV~$9n_^@LnGc0v?19>OXdfR$a>R1M!`qC&AU}< z`;_gSW{e;gjYrx;glOqN7dknz%qKpfIRfr?8>=hke|(-VKB)kKoo8D4b#8nlsESp; z`(IaI|FYKgy=}P(=1>VdUGL;iEG`~X^NQRRpV-S+_~{L+pTD~c>$2sDrAb%@24@Ek z;B-_rCw_Vy!+sU>vHR{9b56^#kM*=X%q!Hc|3@LTG+lrmpdXrczg8{=EefKkx8=0bVc}3;L4$IWnbb% z^+p=Z$q^lg^Y(ql_nDTdk}=9T{~(>^n-_i55LEIq|NDUbYOFf2MG=~CIr5B`4*`~y zBbbZR4A%ZvpyWsZJkw+| z541n#Q1V?qd|<7Hprx}aMb6B`Y0*r0tK!IS+s3#NRq$(&(9r*8jYVhK$Pw=`zIjOq z78U+&|IPAwadk9#IoYe|As~tShXk+bTapL_)0tWBB>Jl`?wDEny71gW%jP&P$&TsA z)}y^;rCMD~l?Nk|^Xww)Tf5_<&{te?O7G5oScR(blfw+#jMOTa>gKxHcU52^!YaVY z`M`EvClC)sMVn-vxnxEIHxq}E7ySCswIX3v+x`K*y{?TqS)tM!S>4vTSd2K2y_&_M z=wwzDv{HYLrBF%-Y!4UvCx#;~>aFHG@nz#>wT;rvUBi#qbA#l+v{yQG=NaD#;9J%6 z&?caN`ABAqUe3aN3=_ui2`4N`3Y8v@fu8%`DaK9|-~mPUoVfwRGO>URLk>?#CIDW;4KdPr0} z0}=`r@>Z!{9p13i#Qpqs{AS=AH5+91lSeZ~^AKBq^Y?mPYaRH?1WBSDic#Urdh%gc ztl)! zu7Z>P3w9~aPCvE_HCFWD*MOzJFB9tS2b22WU3n+kj|yKNQTRAj`WS5Q zEi9D2#ZcecDj67X(~mp;=ixkmrW9?fzu>@pFYi{z#+BioN|TOpGiGNE%OR|{iBR#8 zOH^Hz24T)l6sd~GzWuu6(vQd^e8?>qJbcRDp`TzP16 zY<3#hhue%yf=tG=^ZKzrk>ceuSjPwFS(gWeY8lt)~`J=c@gD1Y|G)}qjP z>!f6nMK?sNrFR;|qFxlFY?3b`P#TP(AxEus>B`#GqoO=rQaUsClm4xZf@721*FS#W z)Fa+V_e^YPc<_5V?zuvB%Vb>URzMPs5tdy5ENq%q;IE|K-Fc(FveDbzLjeegtTw+hP z1X3r3>cC9#!~`K8c~K!3&5p%NiV`R;s_X0own6-GLQlIB7o!bTet+0n?q{kPhqU2! zvu;{;&;E8DznPc~M0Rf6h~Q7WIbLcg+*hM`=_7Yu{zxu11{%6V-U}~P*BBewj0=HrPEe@>L;~e8@ zH6pY^{OuFsz*lBL2U|KLU$4)ESb*U^Xl>O#Xc&8QHoi`i5I^@~FyP>N|5?Cxt;h53 zXdaG*G!tykhnF4TkJS$ec>{_1{kYB)Ug+A2t&}xy`WZSmWbzIIG8t5kKSf z=vh)j`&Bxy9hX3^h{_wnjkk`VuepL2S+BMJWmeq+%bJ#4B##76BWSdY6oKb`gaM&p zx7GxI%W`W7Lu=UJVb6z7kSRM#cd0)sP6<#-#gA4eavi9P*{F}&EFzG=(Me!}Hu&-F z&PSxY9Cfk;|EIDNeOkgG7ZeZ;?py26v$U>dH*Zst!rldG2vMpN!BLc7zWI*8A;ngJ zhmb21jz_3ikVNwV9ICN2IFlhN%)Bh35S;%h)j%q`d%$JHITSA@497wFwVkwoNHx$S z9tipu6~{!W<7O{+cDb9%;WGM0j}hhKIQO}rx^_M>V!x&}OX9an! zd-4ez+2V=HGHW7pSssR+Qsk|?l58lSN|-aK(NE*@ox0OQf9KLM z`B^s3c`aIulTZ9u`vHDPteYehC#1whiExi@k(=i+se8`(xkbk8;wk>ogv^gtHk&#IGe2 zYq4?(I)^bPq1*qSEL^*U`=6f!;$B;bn6rT3Ojy6sX`k5^Nr=jzkk!ar&{AIG6w0B< zDuT5btl34z6moTX)9KuUOS&rM8@AM%R^GDN(S{c!gGo~0Gu@Qw@r5>+!2^B*iC`iI z0?m3B8SuwXv*GlG1(Xf8avb+^*#QgvLb6QIagHXw?t^ztRs`(sN6+rPvoP=9!r|r= z$e?PiD9O>l!I2t>NLP*Ko=dPEYRf@hEgn-=}gUjP+pG%kd7BbdVD65d7^PLs+=BThD{fBh%x4GR6HI ztF;EXiROq}IwOwnknX+~gkJkyHTu8LYaN5pqEh+*D_f{3}a#Nw~;v3jtW=vGuLT`&Se5 z`!ha&A(b`szXoZtCcAp6Oy+7 zP4n8y*E`X3C2#1+#Djs)gDy*`Ea1oMi1T%67-dh=*UHi;sZ8b(ki=eI z5c@{C>5VZEaUJu!O1Y?A>SuQL#ZsmZf+yYVLMka>?P`9T`Ex#i+LL{l>r;s~Igf~j zmOQSV860k<_zDBRaG>Wl^<74{v6V>;zP0ZFs+`pREL76(?qF$QrZpkj9a$rPk&(ns z$(E9+Kc`%TCSKz#ks10( z&e@rM%&{1BG9i1q7@w^d87C@R_|5t!lwY_8_?vzf_i=mL*%DsYydrBC6(c@=B0RP-{<>sKpSSmyZqAZ1@Er=(bgF`w zZ`Gy4z(j`>M4@1;VtOQ@eZ!NI2z}6sMfCdw=J#*&apZ#B@fawj*HJ<~@|#b)cG{WEF`vGhjp|}_ot6pbzc0*77X46ylFYa@vE7G$_V4?>1-(a4 zY4%uJ(K#zRty zO{=lE+rtHZ0pLsMD4w^7g@YAxrwFl`h|xD|jIW{={l{)|meRA(WNhT(w3?cQ-Nd{-&M)^Bd9E1D00vj!M2zcf>@1}8(Cu3aW(_xjN;LN51Hu@AaAJFz|VEF z_(tcBRP^Q>mIWN~EVl;bhbUI060;yo`@MSt)$h&E&u@5xsqEf?YSK7(4Ku8AkOAb{ zI=gL}+GZ?H2Jamn6u9Op>8_4_T$TL_zv)gIEEf*mympcSr#t9xMUfkUsin|qecZXsv?ykj4 zakt{`?ogcK?(W6Pd-t64`+v`TGl3*CxhEvUtZVJH*53PUchY#(MxnlOh={-bQ(9tW zf<9|wtMv}c{V)5wM>R1ep;7>sEkajw2_T((0uKWU(b+9g4(X3SL<|sz7+;}e(#N(Gt z*s{(+(IpWaJk9op#QM(14%NvqgQm>u;w*#)P#_ErNBLA@R5xA_y(|rqU8zV_sOX3! z%953TJcu;j)Lt#Wh3b%!8#X(=BPx#YiR{A%fs=db-J(FNpZ)b2+eN=+1YEY^Dx=-6 z-*|7O2xwxnII_M{X1Dx#ZQnoVe#cqf=o(d)&B0J!ktU32qTT_vP*XaR^s%L-7?Q<` z3sG-TejEF=Iz}z|M!Z&Kr$eye!zZ|F!x0zr3;rU^!Sad!0grQ&bgQQxx` zuR7JLu6`zDzHHC13C*9`KLu9|1^T}UaQ390954+x*(0w+n}27O#}}i5sS7aCfMuZJ zMMUQF!syXr84ON0V#_dNOE)6JH_g@C+}g@N9Yf9G26?uDDFW9ch|g%GT2H4_lC#9kOl%9B!``hdVJlYy8KdNiZs#cGVPN3p_gfi8Mv_XGeFx)a z?$||e&3CZEPGYNDu?+>*+F=z%zaPkhMgjAXQLvIK!URj3sbl%BrY1pZB)n9!l|WTF z0i|+l!n-E?WROg+1=bfVE@k6igdS7oDU zsS-8z*i4^zEUVr)&coPSxyB2JiTk&#wGxhMP60LF536CP_@@ z)QNvH=b)mR@|fK2i80*q<}sQoHE1)9WFSBJOnWnbyG{}jI28W#s`Zw6##RP1b@Z22 zCscb*00t{RX3y?vnNDfPDyq7sS0Q+RS}=};O2kPM7|K$@`_T*G#&CPgp*go!p`#@R z>M?1QwqUERrwff$>u+3cl;a|h1&NT`D50I0@sYEuw_za`mN}J=QEeV$ntek@#z8M? zb>?go->0n=_)|6v^?tranQ#{_j3Kx?1c6Gv8y^78C{msvnlP9>u$h0vadUMngKFNn zh^swDVut>wZku;7i6ay)M+70XWt-(@82|y)8kh^5H@TCy1n?8OqczK8U5xod-~w;2 zRf6j`Y1n?__v!4_5rICox$@9T@#P4;M$%t{5}<2)7z(YxB&+9zcu`^q9Wv@3_*xZ!5Vl?EYpOLV_R zb-0k5`LbAZ2-B2*B{w?s<-l_CSq^C7?uJ$bOXh*FkP*c+5b-1Ou$>39DQ#i2h5B_M zQ6uGzB}KI8s)Hj2p+elPR71p&+2lIhAAQ|B#>PzJJ`BnkKB-t(T55*syG(F;Kh0)a zcs`vzL2|EuAzw7`Vnq9J3x){l@n`mX9BV(6VCFlk2cG%;4)fFUU_V2SOW=N z{S_Basw6^2y!uHeS>tADsml2|mwAfz*>aUCVt0Ap!NI|>Pj#qr?`fatiEnRu%a{m{ zV1K#?xFfP)y0G(Dhw5>=qi^Q*z~F{kWUnj+iIz~S8*W=#81y+3d!y~~L5d*71bN>b zcWpiQ;w0!wl@X#232VNV?tl8OwxBlq-oD&>Nmv=PyhrmO1cFv-OC4%;Ti|{=X2WYM z(UBsZXk)d4lHOWNRs>h0FOs8=CkivLSp)XRU_&s;>Qm`QnGQ*9Y-s>(cRO#ryd5qn zWk+W}dLw>IdRZ(*5L~yrB;LnqDW%~e(gdxuFYNp5*pDjQs#6965@V>%6kYr+jMqnfc&-`^WM_rLmL}^QgIN)EV_Ht3I=YkOd+l`*R~dp9C?PAYKIhwQ`4| zrt3G;9`#&M8i+a#kvdIai;F1s2OMSQ=0GBIPJR085+so#@g4xs1jDGAP5!(kf-Y^_ zgnFu7=^gpqSqytI!oW8>5TZN!(e+gWr@F$rC2fK-C z$3L)FGzyE5xQLRX^N9?y-dS{q3K2_I1sfz&k=Ma~qo>J$%OwdHj>$LEf;A{j)9Rn+ zN{l24|0Cm9}Td{QGx6K@MWt$kxF)-ePOowQ_tCgXN>2;$zEXD!jaU9S3&=R*XN^AIG-5@^#Xn^oq}-}*OZyGG>YhVt#wkgq0l-jMPDtQYKZkQ zZ?Vfwhk_j70fI?;Z2B?7WnT?EwalfkaPe0qHKX-x%xRglWdIkv5&_^|RB?~Oa{v(M zT3^43fvmg&$r(HJLE9#7<_6PNi5?ATidhjVbg-*g?L_nbZ<+ zAIUUpCnAMusFq}`$n@PEi(rL0A}hm%71oX#Z3}n4lN}j?*QiPEN>=;T91-(9$3=}* z{1CZ9(p;YgF3pSpM;xXP){Iq{yU6MIRBRCni{-0)bvzRb4SYXa5%xR}AJ1skJ`vE0 zIKodK0MHXRSs`7rLn0Brp!#%;Zz@qb!z?0;MO;|Euu27xsMR`3W{`8o!xVWN$uQGo zq#tHhRq(?!So3&AFtT4-kT9bcp^{428C;&UAJXPpunPe!4;8A8Qr)t zxs=PGBqdyU9~bBNzM91Wt!lMg#^kg1O*1obH4PDn{dSL^d z=(gLgp!Ttit-i3w9^w}fR05uAHoJ%OHShbnY;)lLzzna-_paPyv2DHK`uE>+eKdM!~^|NmCV|zHngyO!`p8k4+7{g=@Zb;nibgX5)xg^ zz639AF_>QA0%w{`^&elkV$QwXvMxuyv0n_(q-pQpSy|HPkC?Q`PaUz}l~!ff`aBhK z8yk2Mf+C`*jq2qs8JOhCi4ROmC1urpY-r1GNof^VhTJFBZ&=_oU#DglX&;iMYBcJ` zxPEP=5APh1m-#f2Axkw`QSPaLA_$1Z09jL^p3~d+&wKCN%{Swk!>lK?+JFxY)(#FA zd;2vV0)=jzckc|IBT5Fu=eAR>sDB`zoRG$$#$FCrBMx|$_@(7PzQ|nVK$2<>E?!30 zyl>9^M98m}IE+}Eo8SEd<_pwmbdkrz?%40m-i^wYNk_XkfrnzRv~4;FHk3iW0)JpO zn=Y(Vkyy}tyrt0(3QRfyxv!oq2sV2@0O1dX4th_j+b9;!z zI0tHlZGuvTn#pOUJNETXRP*ljr}@(#hA67)By*jvZ?Yr{xi6R2WbPk}f=vXh*omUq ziDFraqJOMjL^matM0p_(^Yl#>AWwkLG=w4oKvr5J3aM6PDLa82Nv@eBB!$Vr&1^lG zIdU;nfhy=DR`1U-Vr} zFle*2e9QsF()i92djuGh(FPBk?h6{vwGg%_k4C}^7eDkaWk4c&j`1A2C7ai@J&3&} zh&?)KeO3ye9a0wNxO#a+aOR)S0bfba(O|sD^B+G709JchM_*Kb0YVjvd5`p%D{94G zH&@%9iDU}2iI%h@5RecMGU6g?q0o9>$ojiU9+J4v>AZj*adGYTc^i8Tp+9Z)pXR#$C5wug zCXAMEtlHC_w3^Wk%QG3vw$dGmE4Ao`l_I>IkF=pKc&gSp>cN;gr#jFL~}I)D+|Ch zPz9J)C&RKdh;qiKqu$WGqwTnB6>j-C^kJXWo0A*fH=X>xhgDJIcGLSc?+HUuLxPB9 zeKd?HmrGd4N|9TDjZpuI0fj?`#+oW&lcUPZn!s9!B$Gz5Mhz(cKtL%zoKLDITef{=0uuoBdS5(Q}s97QpE;=fpYyr=uttRrf>ZBT_$s zRnr1{vB!&V7>c4e@~O^B6fnhsFBG`V1@QrZeOA&@M66FF7){SvIXoSO1mQYF4wu($ z{EacCny1m{gtZ?|CP7j5R_IOe^x*;$hUr`I^nD;Z_Uw_rpbrGNR#pWg@EHC*NBTli_1`vFL4C-bwWyuF8nw7OHQ?mOPzY72^IC^9X~>`Fm=)Y=#+K z-@ABzdsy5Q&NkigBpQGJK6HOQSJSaN*BLre(ouK>nJ$v~eW+}zqL=5M*_KWB(&zyM zUzZOe@WM~zSR*{OE-W>e zMk6w%{va_JK1znLU_Q#Z{AV`GDX~0fX-3_G6vnT+g#DkU`H)nu-?EklH5K3GVd;)l zF*975zLt^PGNTf0p$)FIJ66O94scar1-@}E?6b%4bqRJ2N6kbB1#Ew&qWJl=> zIKPWE%e+%MQJnF3-w=;^wXl983OcsA8JLGSgo+dd^w0IE^qhsP6zs9^llP0xK07)P zDv8%tt&Wsn+gv?S1nm$VLL57{7ay@qaNg%mLb)*h0k%vRGcHpr6}C2f0w(l1HPK9e znxEa)!Z~W0-=5r`@(NV%N7(xE#!DAO;4;_;`=^TX6v90xW#KhIGH1a5{2g^j2V+QL{e-+~FV~?yf!8V>Qv(i4?J8*GX6$vy+tnl}9Fb!pqrCC}it_WiLt3RaXuE&I4~K zYqdy`5H1|b10SJAfxaoMEy1qMNXFEVpUPjmY8(F}l@%*oQ1wdxP#Q7ULDWQnb>-o>}cB|{*kXVkW~7LSx^{@x9~T@Mf>_IkWw{i~|@zhWT>)@r@%J*kEi`Ro6AZJH-5U&3v;NgjR{X()O8I z=!RX$MR8@%BQ<>{NnpVT`HbV+Bv4bd&Twa!{@irBjU-yjle3luLJtnp`Us15p$eg% z!_`6*V91>-9sqx9h58vjLxyE#Lc@FHX-UgERVy*6He-aCy_}#`t#ILlB{_a!I&YN` zj_s||cs6U?sOqgs5~q?$);J=VOtWe0Zk;!xKj}U$gQ;F6VUdH7z4o}Fc^mW1d`I5m zjEuGnOT>Vk(@g9Mj_{|?x7X9(=vm))sI)iiIrn^D(91U_#IV{Yg}UCeWoJC=mzW;u zu@`%A-Cp{ADcO$y^H%#L>k2nd_qXs*E#I%69EyMeTK>jITJnr*t14)ih3*V{v0vss z@~}2bfqKT`USKX(?hHqI|Hwq0q!Ib8JnI_Z|DjnkzO!lENhVN)Ng;s=>%ogsi?47I zELT#?&2u*AG{RgxVlex>QAhQH*nRhW=u7;1m=7_B54o~}=as1K0D4f?q}!t}RibE~;>uDRT36ASqYT8e5LzMk0NHY{V!Vf}HZw%dU3!Q3P5%`e1EN zX?e=d7VjK{lgEpN;qt%dxu!tEmeN-bok>NQi|fyqwm>M~i)dz_wN^}5To~oRK=T*K zi2i5{+WRENea1fiWQJ_bF@!L|V+_-4CY_luhb*E5e{95o8Fb9)UWB;mO!>i>COKF+ z-b8=i3JwM8o3L6?PAszRTp;#cwTuEMX!nuVjYZ{(M#+`{!v0PcSHnUDF)>$Mm=RPG zLspYfr^PS|Z}49QA7>g=C^5>=rz7TwsfaaiLosno#$>EwT3sJbE0+>Sl*=lpIAg_j z>PizzYs$WYkQWozkSw9-hnXXI%3%CMqSW8h>-42+l~Pew#>K>D!ge+DgXoWWNgP;- zl*;Jxz<8ZH3AHN2Y9ztTk1Djws+5+Te}BztRPhsvyMJe;P*n zSPCI3Eh<4b7UkwtR8(3aDg;?1sL@d>YrBB>Kls#cI;>@(gUEhAnfrq(iFEevXjbv0 zVqh1xh=#TfIq|?Yo7)}pi@MP@)Mve=R$-Ezc#=xdDyGHqHP3_af7J%HxE~UG`r;m^ z6rTbVbbeJE6cPcS&B$g4ya1XpuZH8)>ko00pOHL{F7X2CfA6NS#7W@)CeOy7A)z|~ z$jc1mGp>-@$fe1&vY=y;ObQ|?J_Qyn7=mF4{qM^vzmu6LB;}lPZXira*(o)oE3gX+ zyJ1P}rR!)x^AW7?-oJy=;gk_-+ATDVf1i;!XCflRZFK?$A==ZjMNCR<88h{RqR{bU z2D8BSxstgW*S|U-Xa{I+ZTg-bT*#EB%H(8Gf>w>}+_ICWq|2PDMT~JVsXy8_2p6+X zBGw16A+hMFiTKOzOoh7@K`V8`v65tcp6mdkP5YH?=Q`}C6OO*@H#axl`|R0EGwoRb z7_zalej5F>eii%qa?$T~5jji@;=J{Q$>25*=j!}(*LR$*-vjhJA1=G0U!MKmE*=l1 zgjy}0{<2llThedqJ86ySL#=Vj%Ly|{1cU?wCZ({PzzG8)1%n2+S%I5M=FCJJ@?E^V zB@p@$VVH_tsqe+6N&C6~?dQG>D)TfK>xeTTLseiB*z;$&+)l)2l?tY^TSvFPeRW&0 zH2AVc^CBAqAf`~9J5Rqk9!TkoRlI)Maz5Y_kEJB3mOCeboNTZP39Ly{?QO(*)9;(} zMebL8Y{i?77tqF7T=xt&BuJt=`R;7DaZ{MD>BwV%Z5xX}t1s@UmX`F2GEbWU)rpeRccP9j2?`>tzGwlYW+ngRJUn=@7u$=4&*6 zXFl?Fi2KK`c7#|$Y-Scl*GCtbdw!5U@x#mPH_d852PnN_#?Q|^3=nki6a5uT66%aO zS)h7o3Fa(YDOBQUH<(JE-c(%tUIhgaLo01RIc1(SM7c?au7s987X$~1kVKg@CQg%H zrcl?InZt#kG6fSVTT4O+8lspAAJhUJ;NGixI($`3Q?>xPaII0MiO z#1ESNylBBdI3mNOd6o42Egr*XjVw9*iKs=Ef8H3t$aaN8W~R*wiBwqaE_h)d?-zuw z4lf3X@^P2wX&R{7txW_%kC;=~^!C(1;T<{k2j zylp!EQMBU>3u$@hgkXfJPgv%}g2Ce)7V*>+V?4I35)rhK$zJ6jnNI5q0qm@W${*wW zi9n064dO)X@D$KHTHu!hqEbX6tuT`TF5%AMqn<2?V$Q}W@0zwmAI5@k*V2e!KcS%S z#S0&#E#oqUBZNi`oA^7}^_8+CnE`QFypPp$YiL0d(UTFMXmYK@6-@BR++t<<)wj5T zeBxcHLKo2_Z+(b}p>ciwuo;$<5o1{qq)0k#hANG@H3gJ&1k%lq`D7$3DhyfzYhyfY z>=TL@t@vc@gKjg@RwXNFiK8icQV2vP&|bKS907mcu>Pgcqh~L~U@{qf`5hZ}tC$!j z==iHRZ5aa^QXy>S2;JrAGTm(%NXpSX3?wIRgkFumPYEmQj}PmmZF;CT$9bP6E%^5M zGn?bo-qC9XZ{j4D|6J>xNki62Wn1iD^0|9g<@aGyedB85X`$)Mh@3(1?Jb9 zT^#|BlhNLz=|5YYiQle>h4LFc*7txms?K#m=kNJ3`$t*`q!O8MAx zQyQ~?trf*Ih@VnR3Ec2SfJ8|V6AQVO!zGJkqo$z078I#M|jt=F%HRc%`tJIrl{ zd-v=6eN|j4d=p|pLEnjlFd>;Y{@D4tE+Scl#nc)6@nk#G!VfWEG{-(6iD$vCI(w0y zqmX(EHAc|jPk_Ki8oC$HObR+?z=R%AKeuq;yM{Vnx*Bjx^+|o^NZfL`uP+LpJC={; zf+5A#%`=&pKRwv*`|K=nK(uQO^$mjU-6b`eA2HbRY(SVL)lrY2g^|eVWW0|W#Sts2 zK2oCc*xPbe4jmGF*9BZ+HTNG_RgG=?p99RM@)ffBf=X~o2+iq$T7}ubv zKS!IfbHG3LqE|e2B{!xP6Ec`Q&Rv-!^OVkme*Q# z)3@RqZ&{xe+Md+Z5>pwN7HfY10pk|_y?lkQhv%>sahPqu#z}QGxVq*K&hU~G1a`#_ zLAA+1_WYqD{GM2%>}wa&vI`kJHRAx40OT6sB6#Epe?1%$6rPCcc(76`dPI2Hkz4E> zFN$_Pmi*WL9i{IjqU8{QF!-}`eQ=xyhnPqVI8ps{#+o;Xo+E5XYp~I*w{rq?MAR1T z=}gi&Tmcha$g|o{dl+y?DtW;L=rOa>MPHTQsfm+_4d%({Z2gjXj%4DZrD(`ew8vCC z%*d~Y#g?NWC;sAwprxa;eZS&(@OPj1@6`jrp_iAJ(4A1@;c|mJI^V}szi+yZ9d{Ke z&%@+#myJywF=#%AVweLX2y>z!hmJdq^*{GbH+eW6hta7!89TseSKyQ!SzpRr4T0iAH=y%8$Pm)PeHbGqiru z^w*gZV1#iMv$9GgQ2-fP!2eoau}~Tc(IrB1tsP|1(t*iYutOKuUEBA2`L}3)%=!4W zsDy>WfdkY#^bo=o0_GT`j+gy<-^z0!jj!-V^j{=R;37f80?)b%sTzZUkdiacY$uBI zgK`N8Nwmo*8TG|OXP=!Az%!Fv6P{e|6ggCz0*>vZ~$ zb*@StX8^1-GkVJmBpL`oO+i5Kf&)bj1B!5gI)E_!x&$6gY93CTi4-XXbCiTksbK@% z4rCWQu&jJ|LbNZVI#Rr4vs?=?4*75VVIx?4O*}}RT9jO+{YDH0k~>LaY5u_=QjW<0 zMX}zer|f6d(jh2XilEhmWb6}|2H}J|@=**~(`s{wR=)ZI(I7-ZWtf0G#5Fo$+bCp4 zHWujyU+@qtIY|V}ryOZ15v>sSq(Kr`T=MFW-XkN*(^2aX`f|4<#NKqOc#>pMb;q!~ zZ%7R)S)V_>?0)=1G0D4rTg3gf=xFq^rG6b_=5^Y0KyzaM<~C!n)*9 zNT>UoR%-QcN{jb@nG*H6J9kSpF2--pOl&qQdot!8?mTAdeLR`J2U$DI7AUZ8-JNUe+KO? z-N7-qfraKr85C#Cto)sY4DW#QMu)l6xsl4k0q%<_Wp zIzQ=ex`5_}r&ls)kZn8~35YZ;n60YFrL~ngR2{6LwBiX)fwoh&cl_pzX~T)wyo{j? zs~52oSgBTBROgsU-h?N%M%v`j00SA!`qfS)k@L7LR6Mq~5g8#eMH-#<>n4qaT<9!Rl&an94P%5cp^8z{AZi_di|mE^ICP$U6T38S<}Yk z-=FvEmvFup5595kUMF26#7|RU&1a1(xTs<8V*wDm$$k@H_PvVo7}H(vb$q*bbbRz# zKXUzYW@|WBEcAM$8rxJ|-Oay#g(`R%fOc6E=M(+iq19S$EC5&_@lW0T*&^j2)uVt_ko(8p4}Y(Kf5sZQ6(0iq40INzS|;Pp*=!)H=v zxRj6-ED)_Y{rTuB3QDku+C|X9)U$1fc(34uTIvl^^S`uID$@bM+M{+3V$y;{066gJ z*Y5%QUz7j_mO)gwO8eMlkSVaXio^bCcLY%E;(F)*m&0$fOY_YJBG`JRJ=(V~0!6x5 z3BwemY2QSCJHh>fctk@PE(CZ4r1P}PROfIv0Ns&ji0&X5K<3&LaZ`LD4FyXq3XV^wEVOBh$cZm^ep_KnGWZj$Lj8 zq(vLnTOzN5XcWSdLexJ23IRk2g@_e|%s@At-*ioR?R(!Ib?R2-MflTW(&cx| z#%~nCmn}3^2XO5pa1e&S{U&}RZ(8>}fpy3|(AR>0hCfPuEsGNr5H&`E!mF;Uc}K~; zk%}!k6`c;aG$FKy*!n;x0@+#OF)30XX#_o96g3E);TW}rURk;*AEBQ`!uP@JWO7CD z3{lS>m_SzSg1T}NSQ9Qxs;mAr+pa+owJY%0Qi(QLB^<@QL&wtNg4}Q7V4q3vcQQtT zZqr(?t$SCv{nc*|!}Z=Ny@CdtCNZ}icbTlMD)Oo zf)SpsZiT6~r8uI66$%?4Ew&KQj%JK3f^3c`(S;Z_nj(T)b`_)=OvKxS?{0p5{3i(p zqz^rBJ6oy zpdd8JlzenZD|$a1e|EQqeCHo>O7kl);>-KHoDt_5X=^0D0Vqr)+ahbp6)TvD7CcfRTb}j3zJL^CNu3eIx}k-JWgwdvrYd?3Z;-}5=^Yl!ouWSxnTGB=(5(syh=Pq}N{oHwzx$806wtiQs zz)Y>6>U$XG8_s{x?lN&n6WRQ9TpqEL7Dnuw)?-wCz1oK+G%jawjnVnC$LzA^w)i1; zapbRr*3QnZcdqnrD<%^}EQ?}EgCa3LzBsz+>Ij_AwFq(M0S7fTLT~uw*S@)B z`$*+Y3$8;|v-wYL`@01jT$fxFd!>EP|MhkP@*J%7XFPpOEmK^k&k!XIgGvE$Lp)Tn zHXzdKK_xRhhxhuVU zR}z8XHxv7B32JN2WwxRJN2+d%C#Fj!*_;E|82ujY3rn@C0CiVd8NV^q^{q)I;Xq)7;toPG(h) zO=305?9&PRgqj#~xT+zd$LlZA58JC&_Mo&)?$PV1C>qD=MrcI&_cLDc}B-r2TF}^RZ!)!cj<+CjM;ktT|^I)uWu+GhW|b^;ynQ`9lY6zkSQtoQ-+H`?({(I^&67mE{ZTA!J#? zX{=-tN-Im7po8Mi6A@us46?4pdG6X!q3`u-1DYs_|zi zm5ez%5T$6lhhPn~V;ZaDL+ee&7$M@p;%cQ0Wr)cMwfLRMf7+1Cza!FCBh$**$4V$= zD&pju3F~u~!lMN4^!_M_p9`Xzs8&$u2kFKxdsnQP-?4?t-0_u6=r=$q_Kf7Q6o3wI8cjZHF*>lm1Y4qVi`AW%k}eYCp4g+(9z@-8*^-U{4}< z=T(gNE;;ei92=46A*n;#StF;2^4~p-e{*I700bo@8$WFL33WY{cZ~v*6sMDo|Hd0U zwvY^mZ4GYH6&Ozdx%nnNhlZB2>)y6&uC=(f<$)FlakMj=h^inu4Y&Rb5oywt!i3yKt1dJ0n0@~T?s{Q|IXuLlgR)&oH2gBBP zLZ+wh@013>8RWR`B{N^4To znSQpd*00zbJdSD_oLYQYmtf`2FeTM3>8%Qt!aF7v1vM@SdV3QAQJmZ~P%Cg0anLk~M9k#PC zd|BIUXLTJ<@oE=7xhmZ)K827kqQc0}&siJRNrMuEr|1dvO-qLoe66h|;!Dc9)NVhb zW_)?dO|AAjZX*B|Di<6O#G)~HM=p*NN;+HkU>|JGN>x&`pFC_)=aE+ZaueDr5o*)x zKP8T>apdXg+@c6_epQVz%OAWR^9x*&h%4O^p$ZF2BOo$k`yieNf1>CzE(|2L*Z2Jz zRxq!7?ThJhe}6q9-hN(bA_*4eqIlgS5&>gKgtEj6g92$tN*&L5`mq{5y!0l&MKUwJ zk{h0T1H%t6BK?aS9k%yWg@kVeLa=-s4X&FWhQ`rg?uKjfbC;$WekuQ4nx3*NPo^=J z$~1@ImW}!_@GK*teyQ$s`hmQBs;odCR>_`69~5KFVvpzdZtq_|IGf}b^M!^Ac zI?5ylWt0yLb1hz#BcE|jFeS;HR(XDS{FzTLw&N~|!KM#X#;t+jL>$Jj!AwzK)6J!^ zQ?y`XWA7$-$jlEWLpYvO50d^7+Jyjfmm@=Nr;L_v8#JZ{Iu4-F2}##QpGDBlPtubqh~3`sSy@(gdSs z^z*A)YJ7QBXZkZhUf z>f#t+clHZhP|xv2e_hrcgW%1#pHqUQ88@Nt9X82{A3<+VhHodFW1fEt!*at{o4{GU zv)9h&^PLYD9zWBK{ho^bMo$bLS~>gjXtrYDP{5v+d=I@tF4D1Jf!jT-o9)77;8gz<(m^|9Or6eg$Web|?N9^<6D} zLjU?npzE(zPfe3~w3bxv6?T+t$XY>_lcmT1-HuCviu$lRZe_5+V^HV%61bH@y1d+a zq#a%Z8cIK#01L;ED!7}AY zn=dN0j)wjNszEVch1m`FfCU@Q4h*e`jp#~ zf|v;pQwsybhk|gjS7d;HAY8m9tCYk@QB;CTj&{9Kr%rG9y8^QwN@4FlJDqB}II@>(-*-H*j~CKUo7?Pm{n+lb?J=K3Y0Y%h8%HVjJ8-4KP_#*R8jOdqyi*mjO;=-#s@c5r#6u|R-SP1YZo#6iS~ zVfCX%T%tP`Rf9HN=z=?q>M);?bNF{aq*{m0tj*|IyRkpda=hA93H)Sh=oGfO-V*P1 zAUbFpdvKool4wgl_#S9aF@@B8Pb07Bz<$kE$ijQxY%fGh!KqG-fKO4|xbLWsbj%UR zHBlY1I?9(_C>&~6b!LH!sx=;~@(YUdsnQ|TelVYw49l?kJ|o5pmGS1_^BJ&?&8^+> z{Ni%rm9TPYVUx4zbRTxnp8OZf)_5EEANjmCwIiBlS+BZSC355Kb>rZ~@Qu&#^;lCg zwCkbc;)6ZJ0+3zmgBSYUlRTu1dG&Ww2sNw(A$jm-*K5<;;9o!sLK6gai=ln!e_nqO z{AV9z(%93)#*4!ECmht@sc+YYS(un^?SM`d0X+VL^mc6cTApnDw~zm^H2aCR*J)?2 zZD{9z@>T1N3+@kANp`KW#;rsPTXU})dFetc1$%P9fMog%_?Xwz5=)7lve^YaVT)XFEdk9|o|g@gNsaCV3QH z(!$&zbJZn%S$wfKIIkhNAas(F5)mVQlr$N73p^w{7*gs?9*goW8nHYM3stLI#SBqZ zdXxhqS2=+EBy3K{ob%_`3a5fXNcf=fZ2}ls7L7!DiG{|bB++e(!wSA=axoW`-*XXW zX27_q2{w;#S=kDWwBEeDcc#~hU-TK0fYCRUlK~UZr!|{pQ+Z?^rABlf49HOm!N#;v zAO;c^bAP-&R|M9l_duR}-natKpy5D^M%(Qox*yYqvj5~)D=#B!VcpORd-3)br>Gqi*HQk9H+G`Kiw&m?C7AX{8KEA= zosY{i?a-yoWi>(N`21fGHk$StGij3_(jM=n@VrimAw>8*IFZ;t?pQt3Ds7CIXxzz} zP+CcTKM~;BQ4*a}N~o;4ZOGpvC=A(~VI}%O>65 z_;ZzA$8*fx;VvnaHq-WvX|_JZqSE+hRL^5U-X?!$2`1z0oCPCIwqS?XU79@({J(!@ zChrr3{$OSVcROL^DpXMQj6Ci?liA*Za#`@?_Hsj5Rg|mv=afnYYC#zrmQ866Kc?=S zS(Y>{qWGr+)eujau!(M9>A9QsbuN6-6hvF@F3#?F4${u{1X@`(zdl07O`_pqhK_jl&sO2? z;AzUUEb-i!uen%#d-FLE>~u5oP~8z?6WZ+ZeS%zni;KH5{rPKPEUP_8$JOJ^YE_d2mS^E~d)VO+I)1M^Ly&|{*&w+G-&+@>$2xH7yAVHBz>7jE* zT+%e8B=~FQ|H{0!ag#x7sGBN_O|r(U$z97;c`e;Jgx~{<*Ho4 zU%3F(d}+1fIJ|3H7?~A81I9mcHE(IrNHjGc|`x&x;ZoN-Bx@$rP0kxCno z3SdkL*DB~*6_KQ@%fQ7J?fAZX1c&W(@=k@oTp@8Re>Xl+VQ}gpM=xFgBPJc!A%i$g z8&$aN#7Uk6XUsbuBr3E|MK=ZanJ7&;{!xOu}OcrBLJU#{hdtLp-Z(N*xzB@oVGmhxoN%;m_>2m?ZG7n z>EwasOA#kTT2s+m1u?gHVffjTd!bvu!}RXT2%->1@B+_-Y5v}+#u8I9;P!o;>!Jf*#^i>xoaG^ zEk-jx!YOw7Ew>!QDJ8NHgt#-|#71aUXK3RGe$__rnDqnWA=Mjym+AtmuMP(;YH} z8&&jT%S$8zQ(f%aSs-QdC1*2*nLAK_|#=7wwRP(fkY<@gn8XzAlM(g6k zGKCHEDqiWT_T9wyPf&`>#pbp7T1X^Dj?IA~!=A$NPKF-=l)UXF1+%ff0i1icE4;-f zgx+3;Jxvc-cD6EEKAGF4n%03V#r3uiUDCPX=AsoC+c}aX3t4P9BjJ+2itrkLYIo1I zkOk)ek8Fsvt-shAH)FWp4ZrwH;lLqB-MztOxA|fFms6drI`8rPY&Hq}?%p8NJ9`nd zY;!puvG2)y@z_iLa>4&_Gg^GUE0xul81eM&RRPid|6}VdgW_DfW?|feOM(;JJ-8Fx zU4jQExVyVsf@^>TcX!u8f_rdxx9^@j?|bU`PMz8_zryU9nwqPxwR&~;(k8^_Z<*Yy z4oR;2v%SPN#OZfEV4fvdH@=*R*Kr1AP1uwnP&0#d2?5=-a; z>p8>k%I-bN@AzAQ=7|yuL#?)+e_TH!dAlFf^o@hi2Pn>3x3e{uL4oY#L;xnf$yfaW zhIzgtRVDvhxHNLMFaSO-pToM=Xr{>D20}b83QxIoD3*$L)FjumSD7r*QuuqmLMi`C zLIa{GsR^ALUo^uVL0gWO=ILS#D$YU`Et8=cGUN`^`oDHa;uZMy0F69l+LOn(+hGfy7uskroBy@;OL&+o#9 zZDE-F96+Ehp=+%*!dFQ3nUrOE$`m2SpGM|=rQ$RKsG?-{szEfBRzw4*9Bz?~8<`DI zmuh-@<)zgib@Jj~s-Zxal}DoV^r4HJ<6b&PqodEEFyUx1(oFj|RfvepMM>$vuKqF3 z5+jfrweq-;a(JeSWyG0?4J?T~M8r9832gpk5e9=}Er)9;8={7b!R?HX`6)7K*90kK zLLxv{BDdC%(pk2jl8u{-Oj(pCnXP1Kqpa(23+S4DYn3l#q@kyvz2n{u*43n1t@czqG$_6Takwym`^O#Mwmn1n+!4 ze&6B#)~)?TAGRsn0{dC{7DF4y_p=?M$62?_o>8{%yP-o#kl6=UrI5K2SL8hGk4i9D zNxHVn##1{&t53eDW=;gLGfIed?H2})K2D(2J-=@4(Q7xMfolO0v8I?OfpMFFVlzLG zYTT>9Zh-$o1OYeii(0I0YnqXdS%EgRI!g^Y3$8SrmdZ&9e5@8Cu8J&=mXOWwpT9*H z0ncUMeBuva#y-t0KLd&&@!Vy7fLlen6BtM4!$dD{gCHNP8IBk@wCYiMqM&IqN zk8+-DZ=Y$LjmgVpKI^r%wY3>R%)Q*LgYG)++EWH&B!cz;S|vkguG#?@x+_A@-dzm} zy*hxK8`o7}=y4HO>G6E|z0+HD6w-ZhRKJ0W;n41N*X|SH-T$c*VgTPzfn*oY2tPRy z&f%v4YNhUG=2Z+)Kv_vKULKYSe8t~Vonj?Pe3DzwIx?W)Q6~?a!?%7!L?BjlzItZg zl$j?`d_U_*@j7h|16xH10W+luimIyWiuctx{~iyBj5}<7wOv1s+2=r(AJ9NMx9VBd z<7?`<;DEf=u0Ej%{U