Skip to content

Commit dcff263

Browse files
ericlpcmoritz
authored andcommitted
[rllib] Revert [rllib] Port DDPG to the build_tf_policy pattern (#5626)
1 parent 1823ea7 commit dcff263

File tree

6 files changed

+665
-506
lines changed

6 files changed

+665
-506
lines changed

python/ray/ray_constants.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def env_integer(key, default):
5454
# The maximum resource quantity that is allowed. TODO(rkn): This could be
5555
# relaxed, but the current implementation of the node manager will be slower
5656
# for large resource quantities due to bookkeeping of specific resource IDs.
57-
MAX_RESOURCE_QUANTITY = 10000
57+
MAX_RESOURCE_QUANTITY = 20000
5858

5959
# Each memory "resource" counts as this many bytes of memory.
6060
MEMORY_RESOURCE_UNIT_BYTES = 50 * 1024 * 1024

rllib/agents/ddpg/ddpg.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# === Model ===
4242
# Apply a state preprocessor with spec given by the "model" config option
4343
# (like other RL algorithms). This is mostly useful if you have a weird
44-
# observation shape, like an image. Auto-enabled if a custom model is set.
44+
# observation shape, like an image. Disabled by default.
4545
"use_state_preprocessor": False,
4646
# Postprocess the policy network model output with these hidden layers. If
4747
# use_state_preprocessor is False, then these will be the *only* hidden
@@ -173,7 +173,7 @@ def make_exploration_schedule(config, worker_index):
173173
if config["per_worker_exploration"]:
174174
assert config["num_workers"] > 1, "This requires multiple workers"
175175
if worker_index >= 0:
176-
# Exploration constants from the Ape-X paper
176+
# FIXME: what do magic constants mean? (0.4, 7)
177177
max_index = float(config["num_workers"] - 1)
178178
exponent = 1 + worker_index / max_index * 7
179179
return ConstantSchedule(0.4**exponent)

0 commit comments

Comments
 (0)