ufal
diff --git a/‎labs/npfl139/LICENSE‎
Lines changed: 373 additions & 0 deletions b/‎labs/npfl139/LICENSE‎
Lines changed: 373 additions & 0 deletions
diff --git a/‎labs/npfl139/README.md‎
Lines changed: 5 additions & 0 deletions b/‎labs/npfl139/README.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎labs/npfl139/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎labs/npfl139/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎labs/npfl139/env_wrappers.py‎
Lines changed: 63 additions & 0 deletions b/‎labs/npfl139/env_wrappers.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎labs/npfl139/evaluation_env.py‎
Lines changed: 92 additions & 0 deletions b/‎labs/npfl139/evaluation_env.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎labs/npfl139/initializers_override.py‎
Lines changed: 142 additions & 0 deletions b/‎labs/npfl139/initializers_override.py‎
Lines changed: 142 additions & 0 deletions
@@ -0,0 +1,5 @@
+# The `npfl139` Package: Modules Used in the Deep Reinforcement Learning Course (NPFL139)
+
+This package contains the modules used in the
+[Deep Reinforcement Learning course (NPFL139)](http://ufal.mff.cuni.cz/courses/npfl139),
+available under the Mozilla Public License 2.0.
@@ -0,0 +1,16 @@
+# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# EvaluationEnv
+from .evaluation_env import EvaluationEnv
+
+# Environment wrappers
+from .env_wrappers import DiscreteCartPoleWrapper
+
+# Utils
+from .initializers_override import global_keras_initializers
+from .startup_impl import startup
+from .version import __version__, require_version
@@ -0,0 +1,63 @@
+# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import gymnasium as gym
+import numpy as np
+
+
+# Discrete environments.
+class DiscretizationWrapper(gym.ObservationWrapper):
+    def __init__(self, env, separators, tiles=None):
+        super().__init__(env)
+        self._separators = separators
+        self._tiles = tiles
+
+        if tiles is None:
+            states = 1
+            for separator in separators:
+                states *= 1 + len(separator)
+            self.observation_space = gym.spaces.Discrete(states)
+        else:
+            self._first_tile_states, self._rest_tiles_states = 1, 1
+            for separator in separators:
+                self._first_tile_states *= 1 + len(separator)
+                self._rest_tiles_states *= 2 + len(separator)
+            self.observation_space = gym.spaces.MultiDiscrete([
+                self._first_tile_states + i * self._rest_tiles_states for i in range(tiles)])
+
+            self._separator_offsets = [0 if len(s) <= 1 else (s[1] - s[0]) / tiles for s in separators]
+            self._separator_tops = [np.inf if len(s) <= 1 else s[-1] + (s[1] - s[0]) for s in separators]
+
+    def observation(self, observations):
+        state = 0
+        for observation, separator in zip(observations, self._separators):
+            state *= 1 + len(separator)
+            state += np.digitize(observation, separator)
+        if self._tiles is None:
+            return state
+        else:
+            states = np.empty(self._tiles, dtype=np.int64)
+            states[0] = state
+            for t in range(1, self._tiles):
+                state = 0
+                for i in range(len(self._separators)):
+                    state *= 2 + len(self._separators[i])
+                    value = observations[i] + ((t * (2 * i + 1)) % self._tiles) * self._separator_offsets[i]
+                    if value > self._separator_tops[i]:
+                        state += 1 + len(self._separators[i])
+                    else:
+                        state += np.digitize(value, self._separators[i])
+                states[t] = self._first_tile_states + (t - 1) * self._rest_tiles_states + state
+            return states
+
+
+class DiscreteCartPoleWrapper(DiscretizationWrapper):
+    def __init__(self, env, bins=8):
+        super().__init__(env, [
+            np.linspace(-2.4, 2.4, num=bins + 1)[1:-1],  # cart position
+            np.linspace(-3, 3, num=bins + 1)[1:-1],      # cart velocity
+            np.linspace(-0.2, 0.2, num=bins + 1)[1:-1],  # pole angle
+            np.linspace(-2, 2, num=bins + 1)[1:-1],      # pole angle velocity
+        ])
@@ -0,0 +1,92 @@
+# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+import os
+import sys
+
+import gymnasium as gym
+import numpy as np
+
+
+class EvaluationEnv(gym.Wrapper):
+    def __init__(self, env, seed=None, render_each=0, evaluate_for=100, report_each=10):
+        super().__init__(env)
+        self._render_each = render_each
+        self._evaluate_for = evaluate_for
+        self._report_each = report_each
+        self._report_verbose = os.getenv("VERBOSE", "1") not in ["", "0"]
+
+        if all(hasattr(env.unwrapped, member) for member in ["ale", "seed_game", "load_game"]):  # Seed ALE.
+            self.unwrapped.seed_game(seed)
+            self.unwrapped.load_game()
+        gym.Env.reset(self.unwrapped, seed=seed)
+        self.action_space.seed(seed)
+        self.observation_space.seed(seed)
+        for passthrough in ["expert_trajectory", "expert_episode"]:
+            if hasattr(env, passthrough):
+                setattr(self, passthrough, getattr(env, passthrough))
+            elif hasattr(env.unwrapped, passthrough):
+                setattr(self, passthrough, getattr(env.unwrapped, passthrough))
+
+        self._episode_running = False
+        self._episode_returns = []
+        self._evaluating_from = None
+        self._original_render_mode = env.render_mode
+        self._pygame = __import__("pygame") if self._render_each else None
+
+    @property
+    def episode(self):
+        return len(self._episode_returns)
+
+    def reset(self, *, start_evaluation=False, logging=True, seed=None, options=None):
+        start_evaluation = start_evaluation or (options or {}).get("start_evaluation", False)
+        logging = logging and (options or {}).get("logging", True)
+
+        if seed is not None:
+            raise RuntimeError("The EvaluationEnv cannot be reseeded")
+        if self._evaluating_from is not None and self._episode_running:
+            raise RuntimeError("Cannot reset a running episode after `start_evaluation=True`")
+        if start_evaluation and self._evaluating_from is None:
+            self._evaluating_from = self.episode
+
+        if logging and self._render_each and (self.episode + 1) % self._render_each == 0:
+            self.unwrapped.render_mode = "human"
+        elif self._render_each:
+            self.unwrapped.render_mode = self._original_render_mode
+        self._episode_running = True
+        self._episode_return = 0 if logging or self._evaluating_from is not None else None
+        return super().reset(options=options)
+
+    def step(self, action):
+        if not self._episode_running:
+            raise RuntimeError("Cannot run `step` on environments without an active episode, run `reset` first")
+
+        observation, reward, terminated, truncated, info = super().step(action)
+        done = terminated or truncated
+
+        self._episode_running = not done
+        if self._episode_return is not None:
+            self._episode_return += reward
+        if self._episode_return is not None and done:
+            self._episode_returns.append(self._episode_return)
+
+            if self._report_each and self.episode % self._report_each == 0:
+                print("Episode {}, mean {}-episode return {:.2f} +-{:.2f}{}".format(
+                    self.episode, self._evaluate_for, np.mean(self._episode_returns[-self._evaluate_for:]),
+                    np.std(self._episode_returns[-self._evaluate_for:]), "" if not self._report_verbose else
+                    ", returns " + " ".join(map("{:g}".format, self._episode_returns[-self._report_each:]))),
+                    file=sys.stderr, flush=True)
+            if self._evaluating_from is not None and self.episode >= self._evaluating_from + self._evaluate_for:
+                print("The mean {}-episode return after evaluation {:.2f} +-{:.2f}".format(
+                    self._evaluate_for, np.mean(self._episode_returns[-self._evaluate_for:]),
+                    np.std(self._episode_returns[-self._evaluate_for:])), flush=True)
+                self.close()
+                sys.exit(0)
+
+        if self._pygame and self.unwrapped.render_mode == "human" and self._pygame.get_init():
+            if self._pygame.event.get(self._pygame.QUIT):
+                self.unwrapped.render_mode = self._original_render_mode
+
+        return observation, reward, terminated, truncated, info
@@ -0,0 +1,142 @@
+# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+from collections.abc import Callable
+import math
+from typing import Any
+
+import torch
+
+
+class KerasParameterInitialization:
+    def reset_parameters_linear(self) -> None:
+        torch.nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            torch.nn.init.zeros_(self.bias)
+
+    def reset_parameters_bilinear(self) -> None:
+        # Keras does not have a Bilinear layer. But we analogously use
+        # the Xavier uniform initialization, where
+        # - the fan_out for each out_feature is in_feature1 * in_feature2
+        # - the fan_in for each in_feature1 is out_feature * in_feature2
+        # - the fan_in for each in_feature2 is out_feature * in_feature1
+        # - the overall fan_in is computed as a weighted average of the above two as
+        #   (2 * out_feature * in_feature1 * in_feature2) / (in_feature1 + in_feature2)
+        out, in1, in2 = self.weight.shape
+        fan_in = (2 * out * in1 * in2) / (in1 + in2)
+        fan_out = in1 * in2
+        bound = math.sqrt(6 / (fan_in + fan_out))
+        torch.nn.init.uniform_(self.weight, -bound, bound)
+        if self.bias is not None:
+            torch.nn.init.zeros_(self.bias)
+
+    def reset_parameters_rnn(self) -> None:
+        for name, parameter in self.named_parameters():
+            if "weight_ih" in name:
+                torch.nn.init.xavier_uniform_(parameter)
+            elif "weight_hh" in name:
+                torch.nn.init.orthogonal_(parameter)
+            elif "bias" in name:
+                torch.nn.init.zeros_(parameter)
+                if isinstance(self, (torch.nn.LSTM, torch.nn.LSTMCell)):  # Set LSTM forget gate bias to 1
+                    parameter.data[self.hidden_size:self.hidden_size * 2] = 1
+
+    def reset_parameters_embedding(self) -> None:
+        torch.nn.init.uniform_(self.weight, -0.05, 0.05)
+        self._fill_padding_idx_with_zero()
+
+    overrides: dict[torch.nn.Module, Callable] = {
+        torch.nn.Linear: reset_parameters_linear,
+        torch.nn.Conv1d: reset_parameters_linear,
+        torch.nn.Conv2d: reset_parameters_linear,
+        torch.nn.Conv3d: reset_parameters_linear,
+        torch.nn.ConvTranspose1d: reset_parameters_linear,
+        torch.nn.ConvTranspose2d: reset_parameters_linear,
+        torch.nn.ConvTranspose3d: reset_parameters_linear,
+        torch.nn.Bilinear: reset_parameters_bilinear,
+        torch.nn.RNN: reset_parameters_rnn,
+        torch.nn.RNNCell: reset_parameters_rnn,
+        torch.nn.LSTM: reset_parameters_rnn,
+        torch.nn.LSTMCell: reset_parameters_rnn,
+        torch.nn.GRU: reset_parameters_rnn,
+        torch.nn.GRUCell: reset_parameters_rnn,
+        torch.nn.Embedding: reset_parameters_embedding,
+        torch.nn.EmbeddingBag: reset_parameters_embedding,
+    }
+
+
+class KerasNormalizationLayers:
+    @staticmethod
+    def override_default_argument_value(func: Callable, name: str, default: Any) -> None:
+        default_names = func.__code__.co_varnames[:func.__code__.co_argcount][-len(func.__defaults__):]
+        assert name in default_names, f"Argument {name} not found in {func.__name__} arguments"
+        func.__defaults__ = tuple(
+            default if arg_name == name else arg_value
+            for arg_name, arg_value in zip(default_names, func.__defaults__)
+        )
+
+    batch_norms = [
+        torch.nn.BatchNorm1d,
+        torch.nn.BatchNorm2d,
+        torch.nn.BatchNorm3d,
+        torch.nn.LazyBatchNorm1d,
+        torch.nn.LazyBatchNorm2d,
+        torch.nn.LazyBatchNorm3d,
+        torch.nn.SyncBatchNorm,
+    ]
+
+    all_norms = batch_norms + [
+        torch.nn.LayerNorm,
+        torch.nn.GroupNorm,
+    ]
+
+
+def global_keras_initializers(
+    parameter_initialization: bool = True,
+    batchnorm_momentum_override: float | None = 0.01,
+    norm_layer_epsilon_override: float | None = 0.001,
+) -> None:
+    """Change default PyTorch initializers to Keras defaults.
+
+    The following initializers are used:
+
+    - `Linear`, `Conv1d`, `Conv2d`, `Conv3d`, `ConvTranspose1d`, `ConvTranspose2d`, `ConvTranspose3d`, `Bilinear`:
+      Xavier uniform for weights, zeros for biases.
+    - `Embedding`, `EmbeddingBag`: Uniform [-0.05, 0.05] for weights.
+    - `RNN`, `RNNCell`, `LSTM`, `LSTMCell`, `GRU`, `GRUCell`: Xavier uniform for input weights,
+      orthogonal for recurrent weights, zeros for biases (with LSTM forget gate bias set to 1).
+
+    Furthermore, for batch normalization layers, the default momentum value is changed
+    from 0.1 to the Keras default of 0.01 (or any other value specified).
+
+    Finally, for batch normalization, layer normalization, and group normalization layers,
+    the default epsilon value is changed from 1e-5 to the Keras default of 1e-3
+    (or any other value specified).
+
+    Parameters:
+     parameter_initialization: If True, override the default PyTorch initializers with Keras defaults.
+     batchnorm_momentum_override: If not None, override the default value of batch normalization
+       momentum from 0.1 to this value.
+     norm_layer_epsilon_override: If not None, override the default value of epsilon
+       for batch normalization, layer normalization, and group normalization layers from
+       1e-5 to this value.
+    """
+    if parameter_initialization:
+        for class_, reset_parameters_method in KerasParameterInitialization.overrides.items():
+            class_.reset_parameters = reset_parameters_method
+
+    if batchnorm_momentum_override is not None:
+        for batch_norm_super in KerasNormalizationLayers.batch_norms:
+            for batch_norm in [batch_norm_super] + batch_norm_super.__subclasses__():
+                KerasNormalizationLayers.override_default_argument_value(
+                    batch_norm.__init__, "momentum", batchnorm_momentum_override
+                )
+
+    if norm_layer_epsilon_override is not None:
+        for norm_layer_super in KerasNormalizationLayers.all_norms:
+            for norm_layer in [norm_layer_super] + norm_layer_super.__subclasses__():
+                KerasNormalizationLayers.override_default_argument_value(
+                    norm_layer.__init__, "eps", norm_layer_epsilon_override
+                )