cpnota · cpnota · Mar 17, 2024 · Oct 3, 2022 · Nov 29, 2023 · Nov 29, 2023
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9]
+        python-version: [3.8, 3.11]
 
     steps:
     - uses: actions/checkout@v2
@@ -25,9 +25,8 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        sudo apt-get install swig
-        sudo apt-get install unrar
-        pip install torch~=1.11 --extra-index-url https://download.pytorch.org/whl/cpu
+        python -m pip install --upgrade pip
+        pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu
         make install
     - name: Lint code
       run: |

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -1,33 +1,34 @@
 # This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 
 name: Upload Python Package
 
 on:
   release:
-    types: [created]
+    types: [published]
+
+permissions:
+  contents: read
 
 jobs:
   deploy:
-
     runs-on: ubuntu-latest
-
-    environment: deployment
-
+    environment: publish
+    permissions:
+      id-token: write
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v3
       with:
-        python-version: '3.x'
+        python-version: 3.11
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install setuptools wheel twine
-    - name: Build and publish
-      env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-      run: |
-        python setup.py sdist bdist_wheel
-        twine upload dist/*
+        pip install torch~=2.0 --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install setuptools wheel
+        make install
+    - name: Build package
+      run: make build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -1,26 +1,16 @@
-# .readthedocs.yml
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
-# Required
 version: 2
 
-# Build documentation in the docs/ directory with Sphinx
-sphinx:
-  configuration: docs/source/conf.py
-
-# Build documentation with MkDocs
-#mkdocs:
-#  configuration: mkdocs.yml
+build:
+  os: "ubuntu-22.04"
+  tools:
+    python: "3.11"
 
-# Optionally build your docs in additional formats such as PDF and ePub
-formats: all
-
-# Optionally set the version of Python and requirements required to build your docs
 python:
-  version: 3.7
   install:
     - method: pip
       path: .
       extra_requirements:
         - docs
+
+sphinx:
+  configuration: docs/source/conf.py
diff --git a/Makefile b/Makefile
@@ -11,10 +11,13 @@ integration-test:
 	python -m unittest discover -s integration -p "*test.py"
 
 lint:
-	flake8 --ignore "E501,E731,E74,E402,F401,W503,E128" all
+	black --check all benchmarks examples integration setup.py
+	isort --profile black --check all benchmarks examples integration setup.py
+	flake8 --select "F401" all benchmarks examples integration setup.py
 
 format:
-	autopep8 --in-place --aggressive --aggressive --ignore "E501,E731,E74,E402,F401,W503,E128" -r all
+	black all benchmarks examples integration setup.py
+	isort --profile black all benchmarks examples integration setup.py
 
 tensorboard:
 	tensorboard --logdir runs

diff --git a/README.md b/README.md
@@ -21,10 +21,11 @@ Additionally, we provide an [example project](https://github.com/cpnota/all-exam
 
 ## High-Quality Reference Implementations
 
-The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and PyBullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
+The `autonomous-learning-library` separates reinforcement learning agents into two modules: `all.agents`, which provides flexible, high-level implementations of many common algorithms which can be adapted to new problems and environments, and `all.presets` which provides specific instansiations of these agents tuned for particular sets of environments, including Atari games, classic control tasks, and MuJoCo/Pybullet robotics simulations. Some benchmark results showing results on-par with published results can be found below:
 
-![atari40](benchmarks/atari40.png)
-![pybullet](benchmarks/pybullet.png)
+![atari40](benchmarks/atari_40m.png)
+![atari40](benchmarks/mujoco_v4.png)
+![pybullet](benchmarks/pybullet_v0.png)
 
 As of today, `all` contains implementations of the following deep RL algorithms:
 

diff --git a/all/__init__.py b/all/__init__.py
@@ -1,26 +1,16 @@
-import all.agents
-import all.approximation
-import all.core
-import all.environments
-import all.logging
-import all.memory
-import all.nn
-import all.optim
-import all.policies
-import all.presets
 from all.core import State, StateArray
 
 __all__ = [
-    'agents',
-    'approximation',
-    'core',
-    'environments',
-    'logging',
-    'memory',
-    'nn',
-    'optim',
-    'policies',
-    'presets',
-    'State',
-    'StateArray'
+    "agents",
+    "approximation",
+    "core",
+    "environments",
+    "logging",
+    "memory",
+    "nn",
+    "optim",
+    "policies",
+    "presets",
+    "State",
+    "StateArray",
 ]
diff --git a/all/agents/__init__.py b/all/agents/__init__.py
@@ -15,7 +15,6 @@
 from .vqn import VQN, VQNTestAgent
 from .vsarsa import VSarsa, VSarsaTestAgent
 
-
 __all__ = [
     # Agent interfaces
     "Agent",

diff --git a/all/agents/_agent.py b/all/agents/_agent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/_multiagent.py b/all/agents/_multiagent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/_parallel_agent.py b/all/agents/_parallel_agent.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+
 from all.optim import Schedulable
 
 

diff --git a/all/agents/a2c.py b/all/agents/a2c.py
@@ -1,7 +1,8 @@
-import torch
 from torch.nn.functional import mse_loss
+
 from all.logging import DummyLogger
 from all.memory import NStepAdvantageBuffer
+
 from ._agent import Agent
 from ._parallel_agent import ParallelAgent
 
@@ -28,15 +29,15 @@ class A2C(ParallelAgent):
     """
 
     def __init__(
-            self,
-            features,
-            v,
-            policy,
-            discount_factor=0.99,
-            entropy_loss_scaling=0.01,
-            n_envs=None,
-            n_steps=4,
-            logger=DummyLogger()
+        self,
+        features,
+        v,
+        policy,
+        discount_factor=0.99,
+        entropy_loss_scaling=0.01,
+        n_envs=None,
+        n_steps=4,
+        logger=DummyLogger(),
     ):
         if n_envs is None:
             raise RuntimeError("Must specify n_envs.")
@@ -80,7 +81,9 @@ def _train(self, next_states):
             value_loss = mse_loss(values, targets)
             policy_gradient_loss = -(distribution.log_prob(actions) * advantages).mean()
             entropy_loss = -distribution.entropy().mean()
-            policy_loss = policy_gradient_loss + self.entropy_loss_scaling * entropy_loss
+            policy_loss = (
+                policy_gradient_loss + self.entropy_loss_scaling * entropy_loss
+            )
             loss = value_loss + policy_loss
 
             # backward pass
@@ -90,16 +93,16 @@ def _train(self, next_states):
             self.features.step()
 
             # record metrics
-            self.logger.add_info('entropy', -entropy_loss)
-            self.logger.add_info('normalized_value_error', value_loss / targets.var())
+            self.logger.add_info("entropy", -entropy_loss)
+            self.logger.add_info("normalized_value_error", value_loss / targets.var())
 
     def _make_buffer(self):
         return NStepAdvantageBuffer(
             self.v,
             self.features,
             self.n_steps,
             self.n_envs,
-            discount_factor=self.discount_factor
+            discount_factor=self.discount_factor,
         )
 
 

diff --git a/all/agents/c51.py b/all/agents/c51.py
@@ -1,6 +1,8 @@
-import torch
 import numpy as np
+import torch
+
 from all.logging import DummyLogger
+
 from ._agent import Agent
 
 
@@ -26,16 +28,16 @@ class C51(Agent):
     """
 
     def __init__(
-            self,
-            q_dist,
-            replay_buffer,
-            discount_factor=0.99,
-            eps=1e-5,
-            exploration=0.02,
-            minibatch_size=32,
-            replay_start_size=5000,
-            update_frequency=1,
-            logger=DummyLogger(),
+        self,
+        q_dist,
+        replay_buffer,
+        discount_factor=0.99,
+        eps=1e-5,
+        exploration=0.02,
+        minibatch_size=32,
+        replay_start_size=5000,
+        update_frequency=1,
+        logger=DummyLogger(),
     ):
         # objects
         self.q_dist = q_dist
@@ -81,7 +83,9 @@ def _best_actions(self, probs):
     def _train(self):
         if self._should_train():
             # sample transitions from buffer
-            states, actions, rewards, next_states, weights = self.replay_buffer.sample(self.minibatch_size)
+            states, actions, rewards, next_states, weights = self.replay_buffer.sample(
+                self.minibatch_size
+            )
             # forward pass
             dist = self.q_dist(states, actions)
             # compute target distribution
@@ -100,14 +104,15 @@ def _train(self):
 
     def _should_train(self):
         self._frames_seen += 1
-        return self._frames_seen > self.replay_start_size and self._frames_seen % self.update_frequency == 0
+        return (
+            self._frames_seen > self.replay_start_size
+            and self._frames_seen % self.update_frequency == 0
+        )
 
     def _compute_target_dist(self, states, rewards):
         actions = self._best_actions(self.q_dist.no_grad(states))
         dist = self.q_dist.target(states, actions)
-        shifted_atoms = (
-            rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms
-        )
+        shifted_atoms = rewards.view((-1, 1)) + self.discount_factor * self.q_dist.atoms
         return self.q_dist.project(dist, shifted_atoms)
 
     def _kl(self, dist, target_dist):
@@ -117,7 +122,7 @@ def _kl(self, dist, target_dist):
 
 
 class C51TestAgent(Agent):
-    def __init__(self, q_dist, n_actions, exploration=0.):
+    def __init__(self, q_dist, n_actions, exploration=0.0):
         self.q_dist = q_dist
         self.n_actions = n_actions
         self.exploration = exploration
Original file line number	Diff line number	Diff line change
		@@ -1,4 +1,5 @@
		from abc import ABC, abstractmethod

		from all.optim import Schedulable


Expand Down