Skip to content

Commit be3f458

Browse files
committed
Release npfl139 version 2526.1.0.
1 parent 46a05bf commit be3f458

File tree

9 files changed

+856
-0
lines changed

9 files changed

+856
-0
lines changed

labs/npfl139/LICENSE

Lines changed: 373 additions & 0 deletions
Large diffs are not rendered by default.

labs/npfl139/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# The `npfl139` Package: Modules Used in the Deep Reinforcement Learning Course (NPFL139)
2+
3+
This package contains the modules used in the
4+
[Deep Reinforcement Learning course (NPFL139)](http://ufal.mff.cuni.cz/courses/npfl139),
5+
available under the Mozilla Public License 2.0.

labs/npfl139/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
2+
#
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
7+
# EvaluationEnv
8+
from .evaluation_env import EvaluationEnv
9+
10+
# Environment wrappers
11+
from .env_wrappers import DiscreteCartPoleWrapper
12+
13+
# Utils
14+
from .initializers_override import global_keras_initializers
15+
from .startup_impl import startup
16+
from .version import __version__, require_version

labs/npfl139/env_wrappers.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
2+
#
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
import gymnasium as gym
7+
import numpy as np
8+
9+
10+
# Discrete environments.
11+
class DiscretizationWrapper(gym.ObservationWrapper):
12+
def __init__(self, env, separators, tiles=None):
13+
super().__init__(env)
14+
self._separators = separators
15+
self._tiles = tiles
16+
17+
if tiles is None:
18+
states = 1
19+
for separator in separators:
20+
states *= 1 + len(separator)
21+
self.observation_space = gym.spaces.Discrete(states)
22+
else:
23+
self._first_tile_states, self._rest_tiles_states = 1, 1
24+
for separator in separators:
25+
self._first_tile_states *= 1 + len(separator)
26+
self._rest_tiles_states *= 2 + len(separator)
27+
self.observation_space = gym.spaces.MultiDiscrete([
28+
self._first_tile_states + i * self._rest_tiles_states for i in range(tiles)])
29+
30+
self._separator_offsets = [0 if len(s) <= 1 else (s[1] - s[0]) / tiles for s in separators]
31+
self._separator_tops = [np.inf if len(s) <= 1 else s[-1] + (s[1] - s[0]) for s in separators]
32+
33+
def observation(self, observations):
34+
state = 0
35+
for observation, separator in zip(observations, self._separators):
36+
state *= 1 + len(separator)
37+
state += np.digitize(observation, separator)
38+
if self._tiles is None:
39+
return state
40+
else:
41+
states = np.empty(self._tiles, dtype=np.int64)
42+
states[0] = state
43+
for t in range(1, self._tiles):
44+
state = 0
45+
for i in range(len(self._separators)):
46+
state *= 2 + len(self._separators[i])
47+
value = observations[i] + ((t * (2 * i + 1)) % self._tiles) * self._separator_offsets[i]
48+
if value > self._separator_tops[i]:
49+
state += 1 + len(self._separators[i])
50+
else:
51+
state += np.digitize(value, self._separators[i])
52+
states[t] = self._first_tile_states + (t - 1) * self._rest_tiles_states + state
53+
return states
54+
55+
56+
class DiscreteCartPoleWrapper(DiscretizationWrapper):
57+
def __init__(self, env, bins=8):
58+
super().__init__(env, [
59+
np.linspace(-2.4, 2.4, num=bins + 1)[1:-1], # cart position
60+
np.linspace(-3, 3, num=bins + 1)[1:-1], # cart velocity
61+
np.linspace(-0.2, 0.2, num=bins + 1)[1:-1], # pole angle
62+
np.linspace(-2, 2, num=bins + 1)[1:-1], # pole angle velocity
63+
])

labs/npfl139/evaluation_env.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
2+
#
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
import os
7+
import sys
8+
9+
import gymnasium as gym
10+
import numpy as np
11+
12+
13+
class EvaluationEnv(gym.Wrapper):
14+
def __init__(self, env, seed=None, render_each=0, evaluate_for=100, report_each=10):
15+
super().__init__(env)
16+
self._render_each = render_each
17+
self._evaluate_for = evaluate_for
18+
self._report_each = report_each
19+
self._report_verbose = os.getenv("VERBOSE", "1") not in ["", "0"]
20+
21+
if all(hasattr(env.unwrapped, member) for member in ["ale", "seed_game", "load_game"]): # Seed ALE.
22+
self.unwrapped.seed_game(seed)
23+
self.unwrapped.load_game()
24+
gym.Env.reset(self.unwrapped, seed=seed)
25+
self.action_space.seed(seed)
26+
self.observation_space.seed(seed)
27+
for passthrough in ["expert_trajectory", "expert_episode"]:
28+
if hasattr(env, passthrough):
29+
setattr(self, passthrough, getattr(env, passthrough))
30+
elif hasattr(env.unwrapped, passthrough):
31+
setattr(self, passthrough, getattr(env.unwrapped, passthrough))
32+
33+
self._episode_running = False
34+
self._episode_returns = []
35+
self._evaluating_from = None
36+
self._original_render_mode = env.render_mode
37+
self._pygame = __import__("pygame") if self._render_each else None
38+
39+
@property
40+
def episode(self):
41+
return len(self._episode_returns)
42+
43+
def reset(self, *, start_evaluation=False, logging=True, seed=None, options=None):
44+
start_evaluation = start_evaluation or (options or {}).get("start_evaluation", False)
45+
logging = logging and (options or {}).get("logging", True)
46+
47+
if seed is not None:
48+
raise RuntimeError("The EvaluationEnv cannot be reseeded")
49+
if self._evaluating_from is not None and self._episode_running:
50+
raise RuntimeError("Cannot reset a running episode after `start_evaluation=True`")
51+
if start_evaluation and self._evaluating_from is None:
52+
self._evaluating_from = self.episode
53+
54+
if logging and self._render_each and (self.episode + 1) % self._render_each == 0:
55+
self.unwrapped.render_mode = "human"
56+
elif self._render_each:
57+
self.unwrapped.render_mode = self._original_render_mode
58+
self._episode_running = True
59+
self._episode_return = 0 if logging or self._evaluating_from is not None else None
60+
return super().reset(options=options)
61+
62+
def step(self, action):
63+
if not self._episode_running:
64+
raise RuntimeError("Cannot run `step` on environments without an active episode, run `reset` first")
65+
66+
observation, reward, terminated, truncated, info = super().step(action)
67+
done = terminated or truncated
68+
69+
self._episode_running = not done
70+
if self._episode_return is not None:
71+
self._episode_return += reward
72+
if self._episode_return is not None and done:
73+
self._episode_returns.append(self._episode_return)
74+
75+
if self._report_each and self.episode % self._report_each == 0:
76+
print("Episode {}, mean {}-episode return {:.2f} +-{:.2f}{}".format(
77+
self.episode, self._evaluate_for, np.mean(self._episode_returns[-self._evaluate_for:]),
78+
np.std(self._episode_returns[-self._evaluate_for:]), "" if not self._report_verbose else
79+
", returns " + " ".join(map("{:g}".format, self._episode_returns[-self._report_each:]))),
80+
file=sys.stderr, flush=True)
81+
if self._evaluating_from is not None and self.episode >= self._evaluating_from + self._evaluate_for:
82+
print("The mean {}-episode return after evaluation {:.2f} +-{:.2f}".format(
83+
self._evaluate_for, np.mean(self._episode_returns[-self._evaluate_for:]),
84+
np.std(self._episode_returns[-self._evaluate_for:])), flush=True)
85+
self.close()
86+
sys.exit(0)
87+
88+
if self._pygame and self.unwrapped.render_mode == "human" and self._pygame.get_init():
89+
if self._pygame.event.get(self._pygame.QUIT):
90+
self.unwrapped.render_mode = self._original_render_mode
91+
92+
return observation, reward, terminated, truncated, info
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# This file is part of NPFL139 <http://github.com/ufal/npfl139/>.
2+
#
3+
# This Source Code Form is subject to the terms of the Mozilla Public
4+
# License, v. 2.0. If a copy of the MPL was not distributed with this
5+
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6+
from collections.abc import Callable
7+
import math
8+
from typing import Any
9+
10+
import torch
11+
12+
13+
class KerasParameterInitialization:
14+
def reset_parameters_linear(self) -> None:
15+
torch.nn.init.xavier_uniform_(self.weight)
16+
if self.bias is not None:
17+
torch.nn.init.zeros_(self.bias)
18+
19+
def reset_parameters_bilinear(self) -> None:
20+
# Keras does not have a Bilinear layer. But we analogously use
21+
# the Xavier uniform initialization, where
22+
# - the fan_out for each out_feature is in_feature1 * in_feature2
23+
# - the fan_in for each in_feature1 is out_feature * in_feature2
24+
# - the fan_in for each in_feature2 is out_feature * in_feature1
25+
# - the overall fan_in is computed as a weighted average of the above two as
26+
# (2 * out_feature * in_feature1 * in_feature2) / (in_feature1 + in_feature2)
27+
out, in1, in2 = self.weight.shape
28+
fan_in = (2 * out * in1 * in2) / (in1 + in2)
29+
fan_out = in1 * in2
30+
bound = math.sqrt(6 / (fan_in + fan_out))
31+
torch.nn.init.uniform_(self.weight, -bound, bound)
32+
if self.bias is not None:
33+
torch.nn.init.zeros_(self.bias)
34+
35+
def reset_parameters_rnn(self) -> None:
36+
for name, parameter in self.named_parameters():
37+
if "weight_ih" in name:
38+
torch.nn.init.xavier_uniform_(parameter)
39+
elif "weight_hh" in name:
40+
torch.nn.init.orthogonal_(parameter)
41+
elif "bias" in name:
42+
torch.nn.init.zeros_(parameter)
43+
if isinstance(self, (torch.nn.LSTM, torch.nn.LSTMCell)): # Set LSTM forget gate bias to 1
44+
parameter.data[self.hidden_size:self.hidden_size * 2] = 1
45+
46+
def reset_parameters_embedding(self) -> None:
47+
torch.nn.init.uniform_(self.weight, -0.05, 0.05)
48+
self._fill_padding_idx_with_zero()
49+
50+
overrides: dict[torch.nn.Module, Callable] = {
51+
torch.nn.Linear: reset_parameters_linear,
52+
torch.nn.Conv1d: reset_parameters_linear,
53+
torch.nn.Conv2d: reset_parameters_linear,
54+
torch.nn.Conv3d: reset_parameters_linear,
55+
torch.nn.ConvTranspose1d: reset_parameters_linear,
56+
torch.nn.ConvTranspose2d: reset_parameters_linear,
57+
torch.nn.ConvTranspose3d: reset_parameters_linear,
58+
torch.nn.Bilinear: reset_parameters_bilinear,
59+
torch.nn.RNN: reset_parameters_rnn,
60+
torch.nn.RNNCell: reset_parameters_rnn,
61+
torch.nn.LSTM: reset_parameters_rnn,
62+
torch.nn.LSTMCell: reset_parameters_rnn,
63+
torch.nn.GRU: reset_parameters_rnn,
64+
torch.nn.GRUCell: reset_parameters_rnn,
65+
torch.nn.Embedding: reset_parameters_embedding,
66+
torch.nn.EmbeddingBag: reset_parameters_embedding,
67+
}
68+
69+
70+
class KerasNormalizationLayers:
71+
@staticmethod
72+
def override_default_argument_value(func: Callable, name: str, default: Any) -> None:
73+
default_names = func.__code__.co_varnames[:func.__code__.co_argcount][-len(func.__defaults__):]
74+
assert name in default_names, f"Argument {name} not found in {func.__name__} arguments"
75+
func.__defaults__ = tuple(
76+
default if arg_name == name else arg_value
77+
for arg_name, arg_value in zip(default_names, func.__defaults__)
78+
)
79+
80+
batch_norms = [
81+
torch.nn.BatchNorm1d,
82+
torch.nn.BatchNorm2d,
83+
torch.nn.BatchNorm3d,
84+
torch.nn.LazyBatchNorm1d,
85+
torch.nn.LazyBatchNorm2d,
86+
torch.nn.LazyBatchNorm3d,
87+
torch.nn.SyncBatchNorm,
88+
]
89+
90+
all_norms = batch_norms + [
91+
torch.nn.LayerNorm,
92+
torch.nn.GroupNorm,
93+
]
94+
95+
96+
def global_keras_initializers(
97+
parameter_initialization: bool = True,
98+
batchnorm_momentum_override: float | None = 0.01,
99+
norm_layer_epsilon_override: float | None = 0.001,
100+
) -> None:
101+
"""Change default PyTorch initializers to Keras defaults.
102+
103+
The following initializers are used:
104+
105+
- `Linear`, `Conv1d`, `Conv2d`, `Conv3d`, `ConvTranspose1d`, `ConvTranspose2d`, `ConvTranspose3d`, `Bilinear`:
106+
Xavier uniform for weights, zeros for biases.
107+
- `Embedding`, `EmbeddingBag`: Uniform [-0.05, 0.05] for weights.
108+
- `RNN`, `RNNCell`, `LSTM`, `LSTMCell`, `GRU`, `GRUCell`: Xavier uniform for input weights,
109+
orthogonal for recurrent weights, zeros for biases (with LSTM forget gate bias set to 1).
110+
111+
Furthermore, for batch normalization layers, the default momentum value is changed
112+
from 0.1 to the Keras default of 0.01 (or any other value specified).
113+
114+
Finally, for batch normalization, layer normalization, and group normalization layers,
115+
the default epsilon value is changed from 1e-5 to the Keras default of 1e-3
116+
(or any other value specified).
117+
118+
Parameters:
119+
parameter_initialization: If True, override the default PyTorch initializers with Keras defaults.
120+
batchnorm_momentum_override: If not None, override the default value of batch normalization
121+
momentum from 0.1 to this value.
122+
norm_layer_epsilon_override: If not None, override the default value of epsilon
123+
for batch normalization, layer normalization, and group normalization layers from
124+
1e-5 to this value.
125+
"""
126+
if parameter_initialization:
127+
for class_, reset_parameters_method in KerasParameterInitialization.overrides.items():
128+
class_.reset_parameters = reset_parameters_method
129+
130+
if batchnorm_momentum_override is not None:
131+
for batch_norm_super in KerasNormalizationLayers.batch_norms:
132+
for batch_norm in [batch_norm_super] + batch_norm_super.__subclasses__():
133+
KerasNormalizationLayers.override_default_argument_value(
134+
batch_norm.__init__, "momentum", batchnorm_momentum_override
135+
)
136+
137+
if norm_layer_epsilon_override is not None:
138+
for norm_layer_super in KerasNormalizationLayers.all_norms:
139+
for norm_layer in [norm_layer_super] + norm_layer_super.__subclasses__():
140+
KerasNormalizationLayers.override_default_argument_value(
141+
norm_layer.__init__, "eps", norm_layer_epsilon_override
142+
)

0 commit comments

Comments
 (0)