Skip to content

Adds native vector implementation #599

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2ade5ad
Vector ALE environment
pseudo-rnd-thoughts Mar 3, 2025
312f40a
Update to builds successfully on windows
pseudo-rnd-thoughts Mar 3, 2025
21de1b3
move externals into the project
pseudo-rnd-thoughts Mar 3, 2025
235d8ad
Update the python binding to return the numpy arrays
pseudo-rnd-thoughts Mar 4, 2025
07f0c7f
Update implementation
pseudo-rnd-thoughts Mar 25, 2025
ec056dc
Fix pre-commit
pseudo-rnd-thoughts Mar 25, 2025
7ae8bf1
Add documentation
pseudo-rnd-thoughts Mar 26, 2025
3356434
Update implementation
pseudo-rnd-thoughts Mar 27, 2025
d2c70dd
Update implementation
pseudo-rnd-thoughts Mar 27, 2025
002465f
Fix bugs
pseudo-rnd-thoughts Mar 30, 2025
4816839
Fix tests and windows min
pseudo-rnd-thoughts Mar 31, 2025
81f1053
Update license
pseudo-rnd-thoughts Mar 31, 2025
b10fd39
Fix testing
pseudo-rnd-thoughts Mar 31, 2025
d2badf9
Fix licenses
pseudo-rnd-thoughts Mar 31, 2025
4e571cf
Fix env-id ordering
pseudo-rnd-thoughts Apr 1, 2025
84471cc
Fix tests
pseudo-rnd-thoughts Apr 1, 2025
ffe55a4
Reduce opencv version and fix weird tabbing in `ale_vector_python_int…
pseudo-rnd-thoughts Apr 1, 2025
98c59ef
Remove opencv dependency overrides
pseudo-rnd-thoughts Apr 1, 2025
4a20355
Remove zlib dependency overrides
pseudo-rnd-thoughts Apr 2, 2025
2883c28
Merge branch 'master' into simple-vector
pseudo-rnd-thoughts Apr 20, 2025
fcb84ef
Update the sdl2 version and add wayland / x11 feature
pseudo-rnd-thoughts Apr 21, 2025
270c56f
Simplify and revert version change
pseudo-rnd-thoughts Apr 21, 2025
9ef7ad5
Add default-features false for sdl2 and opencv
pseudo-rnd-thoughts Apr 21, 2025
59ad428
Updated `default-features` to use a boolean rather than string boolean
pseudo-rnd-thoughts Apr 21, 2025
cb58d67
Add `from __future__ import annotations` for python 3.9 support
pseudo-rnd-thoughts Apr 21, 2025
a8145d0
Add more debugging on the observation difference
pseudo-rnd-thoughts Apr 23, 2025
3b6ea9d
Merge remote-tracking branch 'origin/simple-vector' into simple-vector
pseudo-rnd-thoughts Apr 23, 2025
fc93004
pre-commit
pseudo-rnd-thoughts Apr 23, 2025
b3cad30
change print to warning
pseudo-rnd-thoughts Apr 23, 2025
75aabcc
Add changelogs
pseudo-rnd-thoughts Apr 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,44 +184,44 @@ jobs:

- runs-on: macos-13
python: '3.9'
wheel-name: 'cp39-cp39-macosx_10_15_x86_64'
wheel-name: 'cp39-cp39-macosx_13_0_x86_64'
arch: x86_64
- runs-on: macos-13
python: '3.10'
wheel-name: 'cp310-cp310-macosx_10_15_x86_64'
wheel-name: 'cp310-cp310-macosx_13_0_x86_64'
arch: x86_64
- runs-on: macos-13
python: '3.11'
wheel-name: 'cp311-cp311-macosx_10_15_x86_64'
wheel-name: 'cp311-cp311-macosx_13_0_x86_64'
arch: x86_64
- runs-on: macos-13
python: '3.12'
wheel-name: 'cp312-cp312-macosx_10_15_x86_64'
wheel-name: 'cp312-cp312-macosx_13_0_x86_64'
arch: x86_64
- runs-on: macos-13
python: '3.13'
wheel-name: 'cp313-cp313-macosx_10_15_x86_64'
wheel-name: 'cp313-cp313-macosx_13_0_x86_64'
arch: x86_64

- runs-on: macos-14
python: '3.9'
wheel-name: 'cp39-cp39-macosx_11_0_arm64'
wheel-name: 'cp39-cp39-macosx_13_0_arm64'
arch: arm64
- runs-on: macos-14
python: '3.10'
wheel-name: 'cp310-cp310-macosx_11_0_arm64'
wheel-name: 'cp310-cp310-macosx_13_0_arm64'
arch: arm64
- runs-on: macos-14
python: '3.11'
wheel-name: 'cp311-cp311-macosx_11_0_arm64'
wheel-name: 'cp311-cp311-macosx_13_0_arm64'
arch: arm64
- runs-on: macos-14
python: '3.12'
wheel-name: 'cp312-cp312-macosx_11_0_arm64'
wheel-name: 'cp312-cp312-macosx_13_0_arm64'
arch: arm64
- runs-on: macos-14
python: '3.13'
wheel-name: 'cp313-cp313-macosx_11_0_arm64'
wheel-name: 'cp313-cp313-macosx_13_0_arm64'
arch: arm64

runs-on: ${{ matrix.runs-on }}
Expand All @@ -240,10 +240,10 @@ jobs:

- name: Install ALE wheel
# wildcarding doesn't work for some reason, therefore, update the project version here
run: python -m pip install ale_py-0.10.2-${{ matrix.wheel-name }}.whl
run: python -m pip install ale_py-0.11.0-${{ matrix.wheel-name }}.whl

- name: Install Gymnasium and pytest
run: python -m pip install gymnasium>=1.0.0 pytest
run: python -m pip install gymnasium>=1.0.0 pytest opencv-python

- name: Test
run: python -m pytest
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
hooks:
- id: flake8
args:
- '--per-file-ignores=tests/python/test_atari_env.py:F811 tests/python/test_python_interface.py:F811'
- '--per-file-ignores=tests/python/test_atari_env.py:F811 tests/python/test_python_interface.py:F811 src/ale/python/__init__.py:E402'
- --ignore=E203,W503,E741
- --max-complexity=30
- --max-line-length=456
Expand Down
31 changes: 31 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,37 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.11.0](https://github.com/Farama-Foundation/Arcade-Learning-Environment/compare/v0.10.2...v0.11.0) - 2025-04-26

This release adds (an experiment) built-in vectorisation environment, available through `gymnasium.make_vec("ALE/{game_name}-v5", num_envs)` or `ale_py.AtariVectorEnv("{rom_name}", num_envs)`.

```python
import gymnasium as gym
import ale_py

gym.register_envs(ale_py)

envs = gym.make_vec("ALE/Pong-v5")
observations, infos = envs.reset()

for i in range(100):
actions = envs.action_space.sample()
observations, rewards, terminations, truncations, infos = envs.step(actions)

envs.close()
```

Vectorisation is a crucial feature of RL to help increase the sample rate of environments through sampling multiple sub-environments at the same time.
[Gymnasium](https://gymnasium.farama.org/api/vector/) provides a generalised vectorisation capability, however, is relatively slow due its python implementation.
For faster implementations, [EnvPool](https://github.com/sail-sg/envpool) provide C++ vectorisation that significantly increase the sample speed but it no longer maintained.
Inspired by the `EnvPool` implementation, we've implemented an asynchronous vectorisation environment in C++, in particular, the [standard Atari preprocessing](https://gymnasium.farama.org/api/wrappers/misc_wrappers/#gymnasium.wrappers.AtariPreprocessing) including frame skipping, frame stacking, observation resizing, etc.

For full documentation of the vector environment, see [this page](https://ale.farama.org/v0.11.0/vector-environment).

We will continue building out this vectorisation to include [XLA](https://github.com/openxla/xla) support, improved preprocessing and auto resetting.

As this is an experimental feature, we wish to hear about any bugs, problems or features to add. Raise an issue on GitHub or ask a question on the [Farama Discord server](https://discord.gg/bnJ6kubTg6).

## [0.10.2](https://github.com/Farama-Foundation/Arcade-Learning-Environment/compare/v0.10.1...v0.10.2) - 2025-02-13

Fixed performance regression for CPP users - A single-argument `act` function was missing causing the `paddle_strength` introduced in v0.10.0 to default to zero rather than one. As Gymnasium passed this variable to act, this was only an issue for users directly interacting with `ale_interface`. For more details, see https://github.com/Farama-Foundation/Arcade-Learning-Environment/pull/595.
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ if(SDL_SUPPORT)
list(APPEND VCPKG_MANIFEST_FEATURES "sdl")
endif()

option(BUILD_VECTOR_LIB "Build Vector Interface" OFF)
if (BUILD_VECTOR_LIB)
list(APPEND VCPKG_MANIFEST_FEATURES "vector")
add_definitions(-DBUILD_VECTOR_LIB)
endif()

# Set cmake module path
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})

Expand Down
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ env.close()
getting-started
env-spec
environments
vector-environment
multi-agent-environments
faq
citing
Expand Down
196 changes: 196 additions & 0 deletions docs/vector-environment.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# ALE Vector Environment Guide

## Introduction

The Arcade Learning Environment (ALE) Vector Environment provides a high-performance implementation for running multiple Atari environments in parallel. This implementation utilizes native C++ code with multi-threading to achieve significant performance improvements, especially when running many environments simultaneously.

The vector environment is equivalent to `FrameStackObservation(AtariPreprocessing(gym.make("ALE/{AtariGame}-v5")), stack_size=4)`.

## Key Features

- **Parallel Execution**: Run multiple Atari environments simultaneously with minimal overhead
- **Standard Preprocessing**: Includes standard preprocessing steps from the Atari Deep RL literature:
- Frame skipping
- Observation resizing
- Grayscale conversion
- Frame stacking
- NoOp initialization at reset
- Fire reset (for games requiring the fire button to start)
- Episodic life modes
- **Performance Optimizations**:
- Native C++ implementation
- Next-step autoreset (see [blog](https://farama.org/Vector-Autoreset-Mode) for more detail)
- Multi-threading for parallel execution
- Thread affinity options for better performance on multi-core systems
- Batch processing capabilities
- **Asynchronous Operation**: Split step operation into `send` and `recv` for more flexible control flow
- **Gymnasium Compatible**: Implements the Gymnasium `VectorEnv` [interface](https://gymnasium.farama.org/api/vector/)

## Installation

The vector implementation is packaged with ale-py that can be installed through PyPI, `pip install ale-py`.

Optionally, users can build the project locally, requiring VCPKG, that will install OpenCV to support observation preprocessing.

## Basic Usage

### Creating a Vector Environment

```python
from ale_py.vector_env import VectorAtariEnv

# Create a vector environment with 4 parallel instances of Breakout
envs = VectorAtariEnv(
game="Breakout",
num_envs=4,
)

# Reset all environments
observations, info = envs.reset()

# Take random actions in all environments
actions = envs.action_space.sample()
observations, rewards, terminations, truncations, infos = envs.step(actions)

# Close the environment when done
envs.close()
```

## Advanced Configuration

The vector environment provides numerous configuration options:

```python
envs = VectorAtariEnv(
# Required parameters
game="Breakout", # ROM name in snake_case
num_envs=8, # Number of parallel environments

# Preprocessing parameters
frame_skip=4, # Number of frames to skip (action repeat)
grayscale=True, # Use grayscale observations
stack_num=4, # Number of frames to stack
img_height=84, # Height to resize frames to
img_width=84, # Width to resize frames to

# Environment behavior
noop_max=30, # Maximum number of no-ops at reset
fire_reset=True, # Press FIRE on reset for games that require it
episodic_life=False, # End episodes on life loss
max_episode_steps=108000, # Max frames per episode (27000 steps * 4 frame skip)
repeat_action_probability=0.0, # Sticky actions probability
full_action_space=False, # Use full action space (not minimal)

# Performance options
batch_size=0, # Number of environments to process at once (default=0 is the `num_envs`)
num_threads=0, # Number of worker threads (0=auto)
thread_affinity_offset=-1,# CPU core offset for thread affinity (-1=no affinity)
seed=0, # Random seed
)
```

## Observation Format

The observation format from the vector environment is:

```
observations.shape = (num_envs, stack_size, height, width)
```

Where:
- `num_envs`: Number of parallel environments
- `stack_size`: Number of stacked frames (typically 4)
- `height`, `width`: Image dimensions (typically 84x84)

This differs from the standard Gymnasium Atari environment format which uses:
```
observations.shape = (num_envs, stack_size, height, width) # Without num_envs
```

## Performance Considerations

### Number of Environments

Increasing the number of environments typically improves throughput until you hit CPU core limits.
For optimal performance, set `num_envs` close to the number of physical CPU cores.

### Send/Recv vs Step

Using the `send`/`recv` API can allow for better overlapping of computation and environment stepping:

```python
# Send actions to environments
envs.send(actions)

# Do other computation here while environments are stepping

# Receive results when ready
observations, rewards, terminations, truncations, infos = envs.recv()
```

### Batch Size

The `batch_size` parameter controls how many environments are processed simultaneously by the worker threads:

```python
# Process environments in batches of 4
envs = VectorAtariEnv(game="Breakout", num_envs=16, batch_size=4)
```

A smaller batch size can improve latency while a larger batch size can improve throughput.
When passing a batch size, the information will include the environment id of each observation
which is critical as the first (batch size) observations are returned for `reset` and `recv`.

### Thread Affinity

On systems with multiple CPU cores, setting thread affinity can improve performance:

```python
# Set thread affinity starting from core 0
envs = VectorAtariEnv(game="Breakout", num_envs=8, thread_affinity_offset=0)
```


## Examples

### Training Example with PyTorch

```python
import torch
import numpy as np
from ale_py.vector_env import VectorAtariEnv

# Create environment
envs = VectorAtariEnv(game="Breakout", num_envs=8)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize model (simplified example)
model = torch.nn.Sequential(
torch.nn.Conv2d(4, 32, kernel_size=8, stride=4),
torch.nn.ReLU(),
torch.nn.Conv2d(32, 64, kernel_size=4, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 64, kernel_size=3, stride=1),
torch.nn.ReLU(),
torch.nn.Flatten(),
torch.nn.Linear(3136, 512),
torch.nn.ReLU(),
torch.nn.Linear(512, envs.single_action_space.n)
).to(device)

# Reset environment
observations, _ = envs.reset()

# Training loop
for step in range(1000):
# Convert observations to PyTorch tensors
obs_tensor = torch.tensor(observations, dtype=torch.float32, device=device) / 255.0

# Get actions from model
with torch.no_grad():
q_values = model(obs_tensor)
actions = q_values.max(dim=1)[1].cpu().numpy()

# Step the environment
observations, rewards, terminations, truncations, infos = envs.step(actions)
```
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ dynamic = ["version"]
test = [
"pytest>=7.0",
"gymnasium>=1.0.0",
"opencv-python>=3.0",
]

[project.urls]
homepage = "https://github.com/Farama-Foundation/Arcade-Learning-Environment"
documentation = "https://github.com/Farama-Foundation/Arcade-Learning-Environment/tree/master/docs"
documentation = "https://ale.farama.org"
changelog = "https://github.com/Farama-Foundation/Arcade-Learning-Environment/blob/master/CHANGELOG.md"

[tool.setuptools]
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def build_extension(self, ext):
"-DSDL_DYNLOAD=ON",
"-DBUILD_CPP_LIB=OFF",
"-DBUILD_PYTHON_LIB=ON",
"-DBUILD_VECTOR_LIB=ON",
]
build_args = []

Expand Down
11 changes: 11 additions & 0 deletions src/ale/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ if (BUILD_CPP_LIB OR BUILD_PYTHON_LIB)
add_library(ale-lib ale_interface.cpp)
set_target_properties(ale-lib PROPERTIES OUTPUT_NAME ale)
target_link_libraries(ale-lib PUBLIC ale)

if (BUILD_VECTOR_LIB)
find_package(OpenCV CONFIG REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})

target_include_directories(ale PUBLIC external)

add_subdirectory(vector)

target_link_libraries(ale PRIVATE ${OpenCV_LIBS})
endif()
endif()

# Python Library
Expand Down
Loading