Update .toml extras; fixed tests; added changelog; removed old setup.py;

RaghuSpaceRajan · RaghuSpaceRajan · commit 2ca8a019c05f · 2026-04-02T19:14:10.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,47 @@
+## [1.0.0] - 2026-04-02
+
+Release Highlights: Version 1.0.0 (Generated using an LLM: Google Gemini)
+🚀 Major Breaking Changes & Core Updates
+Changed to manage project with uv - change minimum Python version to 3.11.
+
+Improved README.
+
+Gymnasium Migration: Full migration from gym to gymnasium (v1.0.0 compatibility). This includes updated return values for step() and reset() (support for terminated/truncated flags).
+
+API Refactor: Significant renaming of internal functions for clarity, specifically around Markov state management (get_augmented_state, etc.) and image representation.
+
+Dependency Modernization: Upgraded numpy and random number generation to align with modern Gymnasium standards (_np_random).
+
+🛠 Environment Enhancements (RLToyEnv)
+Advanced Rendering: Added a more flexible render() function that allows for custom trajectory rollouts and "imaginary" rollouts from specific starting states.
+
+Observation Capabilities: * Improved get_image_representation to support uncertainty visualization (epistemic and aleatoric) via bar plots.
+
+Added support for setting custom dtype_s and dtype_o for state and observation spaces.
+
+Dynamics & Noise: Transition and reward noises can now be state-and-action dependent. Improved default noise profiles for continuous environments.
+
+Reward Logic: Improved reward_every_n_steps logic to work across discrete, continuous, and grid environments.
+
+🧪 Wrappers & Compatibility
+Gymnasium Wrapper: Updated wrapper to support irrelevant dimensions and image transformations.
+
+External Integration: Improved support and examples for MiniGrid, ProcGen, and Mujoco (v4) environments.
+
+Resource Management: Added close() functionality to properly release Pygame resources.
+
+📈 Tooling & Documentation
+Example Suite: Overhauled example.py with a better CLI, individual function calls, and logging toggles for image observations.
+
+Experimentation: Updated experiment configuration scripts and cleaned up Jupyter notebooks for plotting results.
+
+CI/CD: Updated GitHub workflows to support newer Python versions and fixed code coverage reporting.
+
+🐛 Bug Fixes
+Fixed issues with copy.deepcopy() by removing redundant state variables (self.P, self.R).
+
+Resolved reward bugs related to delays exceeding sequence lengths.
+
+Fixed terminal state logic for grid environments.
+
+Rectified various test failures in TestGymEnvWrapper and TestRLToyEnv.
diff --git a/README.md b/README.md
@@ -38,6 +38,9 @@ ae = gym.make("QbertNoFrameskip-v4")
 env = GymEnvWrapper(ae, **config)
 ```
 
+## Important Note
+We are moving to package management with `uv` and away from using Ray Rllib, so some experiment / agent running functionality might break. The wrappers and toy environment should still work fine though.
+
 ## Getting started
 There are 4 parts to the package:
 1) **Toy Environments**: The base toy Environment in [`mdp_playground/envs/rl_toy_env.py`](mdp_playground/envs/rl_toy_env.py) implements the toy environment functionality, including discrete and continuous environments, and is parameterised by a `config` dict which contains all the information needed to instantiate the required toy MDP. Please see [`example.py`](example.py) for some simple examples of how to use these. For further details, please refer to the documentation in [`mdp_playground/envs/rl_toy_env.py`](mdp_playground/envs/rl_toy_env.py).
diff --git a/mdp_playground/envs/rl_toy_env.py b/mdp_playground/envs/rl_toy_env.py
@@ -2490,13 +2490,21 @@ def render(self,):
     def imagine_and_render(self, actions, state=None, render=True):
         """
         Performs steps in a deep copy of the environment with an action
-        sequence and then optionally renders the resulting trajectory and returns the rendered RGB images.
+        sequence and then optionally renders the resulting trajectory and 
+        returns the rendered RGB images. It's called "imagine" and not "rollout"
+        because performing steps in a copy of the environment means that the
+        original environment and its state is not affected by the actions rolled
+        out here.
         If render is False, returns the observations created by stepping in the env 
         using actions instead of rendered images.
 
-        Currently, render_mode is hardcoded to "rgb_array" for the copied environment. 
+        Notes:
+        1) Currently, render_mode is hardcoded to "rgb_array" for the copied environment. 
         Would need to look deeper into pygame, e.g. for how to instantiate mutliple windows
-        to support "human" render_mode as well.
+        to support "human" render_mode as well. 
+        2) Ideally, the rollout and render would be separated but currently the render() is
+        based on the current state of the environment, so separating is going to be harder.
+
 
         Parameters
         ----------
diff --git a/mdp_playground/spaces/test_image_continuous.py b/mdp_playground/spaces/test_image_continuous.py
@@ -52,7 +52,7 @@ def test_image_continuous(self):
         img1 = Image.fromarray(np.squeeze(imc.generate_image(pos)), "RGB")
         if render:
             img1.show()
-        img1.save("cont_state_target.pdf")
+        img1.save("cont_state_target.pdf", format="PDF")
 
         # Terminal sub-spaces
         lows = np.array([2.0, 4.0])
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,87 @@
+[project]
+name = "mdp-playground"
+version = "1.0.0"
+description = "A python package to design and debug RL agents"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "ale-py>=0.11.2",
+    "dill>=0.4.1",
+    "gymnasium>=1.2.2",
+    "numpy>=2.4.2",
+    "scipy>=1.17.0",
+    "pillow>=12.2.0",                  # Image processing
+]
+license = { text = "Apache License, Version 2.0" }
+
+authors = [
+  { name = "Raghu Rajan" },
+  { name = "Jessica Borja" },
+  { name = "Suresh Guttikonda" },
+  { name = "Fabio Ferreira" },
+  { name = "Jan Ole von Hartz" },
+  { name = "André Biedenkapp" },
+  { name = "Frank Hutter" }
+]
+
+maintainers = [
+  { name = "Raghu Rajan", email = "rajanr@cs.uni-freiburg.de" }
+]
+
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "License :: OSI Approved :: Apache Software License",
+  "Operating System :: OS Independent",
+  "Natural Language :: English",
+  "Intended Audience :: Developers",
+  "Intended Audience :: Education",
+  "Intended Audience :: Science/Research",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+
+[project.urls]
+Homepage = "https://github.com/automl/mdp-playground"
+"Bug Tracker" = "https://github.com/automl/mdp-playground/issues"
+
+
+[project.optional-dependencies]
+# A single consolidated extra containing all environment and analysis tools. Many of these haven't been tested yet.
+extras = [
+    # "ray[default,rllib]>=2.54.1",
+    # "tensorflow>=2.21.0",
+    # "tensorflow-probability>=0.23.0",
+    "gymnasium[atari,other]>=1.2.2",
+    "mujoco>=3.1.0",
+    "configspace>=1.2.2",
+    "pandas>=3.0.0",
+    "scipy>=1.17.1",
+    "matplotlib>=3.10.8",              # Plotting
+    "opencv-python>=4.13.0.92",        # CV2
+    # "opencv-python-headless>=4.13.0.92",
+    "requests>=2.31.0",
+]
+
+# hpo_analysis = [
+#   "cave>=1.4.0"
+# ]
+
+[project.scripts]
+run-mdpp-experiments = "mdp_playground.scripts.run_experiments:cli"
+
+[tool.setuptools.packages.find]
+where = ["."]
+
+[tool.setuptools.package-data]
+"*" = ["*"]
+
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[dependency-groups]
+dev = [
+    "pytest>=9.0.2",
+]
+
+[tool.uv]
+package = true
diff --git a/setup.py b/setup.py
diff --git a/uv.lock b/uv.lock