[rllib] Use nested scope in custom loss example

ericl · web-flow · commit 30bf8e46c78c · 2019-03-04T18:29:22.000-08:00
diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh
@@ -353,6 +353,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/python/ray/rllib/tests/run_silent.sh examples/cartpole_lstm.py --stop=200 --use-prev-action-reward
 
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/python/ray/rllib/tests/run_silent.sh examples/custom_loss.py --iters=2
+
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/python/ray/rllib/tests/run_silent.sh examples/custom_metrics_and_callbacks.py --num-iters=2
 
diff --git a/python/ray/rllib/examples/custom_loss.py b/python/ray/rllib/examples/custom_loss.py
@@ -31,16 +31,17 @@
     type=str,
     default=os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
-        "../test/data/cartpole_small"))
+        "../tests/data/cartpole_small"))
 
 
 class CustomLossModel(Model):
     """Custom model that adds an imitation loss on top of the policy loss."""
 
     def _build_layers_v2(self, input_dict, num_outputs, options):
         self.obs_in = input_dict["obs"]
-        self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
-                                           num_outputs, options)
+        with tf.variable_scope("shared", reuse=tf.AUTO_REUSE):
+            self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
+                                               num_outputs, options)
         return self.fcnet.outputs, self.fcnet.last_layer
 
     def custom_loss(self, policy_loss, loss_inputs):
@@ -49,12 +50,10 @@ def custom_loss(self, policy_loss, loss_inputs):
         input_ops = reader.tf_input_ops()
 
         # define a secondary loss by building a graph copy with weight sharing
-        with tf.variable_scope(
-                self.scope, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False):
-            logits, _ = self._build_layers_v2({
-                "obs": restore_original_dimensions(input_ops["obs"],
-                                                   self.obs_space)
-            }, self.num_outputs, self.options)
+        logits, _ = self._build_layers_v2({
+            "obs": restore_original_dimensions(input_ops["obs"],
+                                               self.obs_space)
+        }, self.num_outputs, self.options)
 
         # You can also add self-supervised losses easily by referencing tensors
         # created during _build_layers_v2(). For example, an autoencoder-style
diff --git a/python/ray/rllib/offline/input_reader.py b/python/ray/rllib/offline/input_reader.py
@@ -45,12 +45,9 @@ def tf_input_ops(self, queue_size=1):
             ...     def custom_loss(self, policy_loss, loss_inputs):
             ...         reader = JsonReader(...)
             ...         input_ops = reader.tf_input_ops()
-            ...         with tf.variable_scope(
-            ...                 self.scope, reuse=tf.AUTO_REUSE,
-            ...                 auxiliary_name_scope=False):
-            ...             logits, _ = self._build_layers_v2(
-            ...                 {"obs": input_ops["obs"]},
-            ...                 self.num_outputs, self.options)
+            ...         logits, _ = self._build_layers_v2(
+            ...             {"obs": input_ops["obs"]},
+            ...             self.num_outputs, self.options)
             ...         il_loss = imitation_loss(logits, input_ops["action"])
             ...         return policy_loss + il_loss