Add shared field to adanet.Subnetwork.

cweill · cweill · commit e773bad4aa22 · 2018-11-29T15:03:12.000-05:00
This deprecates, replaces, and is more flexible than `persisted_tensors`.

TODO: Replace `persisted_tensors` with `shared` in examples and tutorials.
PiperOrigin-RevId: 223382387
diff --git a/RELEASE.md b/RELEASE.md
@@ -15,6 +15,7 @@ limitations under the License.
 
 # Current version (0.4.0-dev)
  * Under development.
+ * Add `shared` field to `adanet.Subnetwork` to deprecate, replace, and be more flexible than `persisted_tensors`.
  * Officially support multi-head learning with or without dict labels.
  * Rebuild the ensemble across iterations in Python without a frozen graph. This allows users to share more than `Tensors` between iterations including Python primitives, objects, and lambdas for greater flexibility. Eliminating reliance on a `MetaGraphDef` proto also eliminates I/O allowing for faster training, and better future-proofing.
  * Allow users to pass custom eval metrics when constructing an `adanet.Estimator`.
diff --git a/adanet/core/estimator_test.py b/adanet/core/estimator_test.py
@@ -120,7 +120,8 @@ def build_subnetwork(self,
         last_layer=last_layer if self._return_penultimate_layer else logits,
         logits=logits,
         complexity=3,
-        persisted_tensors=persisted_tensors)
+        persisted_tensors=persisted_tensors,
+        shared=persisted_tensors)
 
   def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels,
                                 iteration_step, summary, previous_ensemble):
diff --git a/adanet/core/subnetwork/BUILD b/adanet/core/subnetwork/BUILD
@@ -18,6 +18,8 @@ py_library(
 py_library(
     name = "generator",
     srcs = ["generator.py"],
+    deps = [
+    ],
 )
 
 py_test(
diff --git a/adanet/core/subnetwork/generator.py b/adanet/core/subnetwork/generator.py
@@ -22,6 +22,8 @@
 import abc
 import collections
 
+from tensorflow.python.util import deprecation
+
 
 def _validate_nested_persisted_tensors(persisted_tensors):
   """Raises a ValueError when a nested dict is empty in persisted_tensors."""
@@ -37,15 +39,23 @@ def _validate_nested_persisted_tensors(persisted_tensors):
 class Subnetwork(
     collections.namedtuple(
         "Subnetwork",
-        ["last_layer", "logits", "complexity", "persisted_tensors"])):
+        ["last_layer", "logits", "complexity", "persisted_tensors", "shared"])):
   """An AdaNet subnetwork.
 
   In the AdaNet paper, an `adanet.Subnetwork` is are called a 'subnetwork',
   and indicated by 'h'. A collection of weighted subnetworks form an AdaNet
   ensemble.
   """
 
-  def __new__(cls, last_layer, logits, complexity, persisted_tensors):
+  @deprecation.deprecated_args(
+      None, "`persisted_tensors` is deprecated, please use `shared` instead.",
+      "persisted_tensors")
+  def __new__(cls,
+              last_layer,
+              logits,
+              complexity,
+              persisted_tensors=None,
+              shared=None):
     """Creates a validated `Subnetwork` instance.
 
     Args:
@@ -58,23 +68,25 @@ def __new__(cls, last_layer, logits, complexity, persisted_tensors):
         This field is represented by 'h' in the AdaNet paper.
       logits: `Tensor` logits or dict of string to `Tensor` logits (for
         multi-head) for training the subnetwork. NOTE: These logits are not used
-        in the ensemble's outputs if the mixture weight type is `MATRIX`,
-        instead AdaNet learns its own logits (mixture weights) from the
-        subnetwork's `last_layers` with complexity regularization. The logits
-        are used in the ensemble only when the mixture weights type is `SCALAR`
-        or `VECTOR`. Even though the logits are not used in the ensemble in some
-        cases, they should always be supplied as adanet uses the logits to train
-        the subnetworks.
+          in the ensemble's outputs if the mixture weight type is `MATRIX`,
+          instead AdaNet learns its own logits (mixture weights) from the
+          subnetwork's `last_layers` with complexity regularization. The logits
+          are used in the ensemble only when the mixture weights type is
+          `SCALAR` or `VECTOR`. Even though the logits are not used in the
+          ensemble in some cases, they should always be supplied as adanet uses
+          the logits to train the subnetworks.
       complexity: A scalar `Tensor` representing the complexity of the
         subnetwork's architecture. It is used for choosing the best subnetwork
         at each iteration, and for regularizing the weighted outputs of more
         complex subnetworks.
-      persisted_tensors: Nested dictionary of string to `Tensor` to persist
-        across iterations. At the end of an iteration, the `Tensors` will be
-        available to subnetworks in the next iterations, whereas others that are
-        not part of the `Subnetwork` will be pruned. This allows later
-        `Subnetworks` to dynamically build upon arbitrary `Tensors` from
-        previous `Subnetworks`.
+      persisted_tensors: DEPRECATED: see `shared`. Optional nested dictionary of
+        string to `Tensor` to persist across iterations. At the end of an
+        iteration, the `Tensors` will be available to subnetworks in the next
+        iterations, whereas others that are not part of the `Subnetwork` will be
+        pruned. This allows later `Subnetworks` to dynamically build upon
+        arbitrary `Tensors` from previous `Subnetworks`.
+      shared: Optional Python object, primitive, or function to share with
+        subnetworks within the same iteration or in future iterations.
 
     Returns:
       A validated `Subnetwork` object.
@@ -85,7 +97,7 @@ def __new__(cls, last_layer, logits, complexity, persisted_tensors):
       ValueError: If logits is a dict but last_layer is not.
       ValueError: If last_layer is a dict but logits is not.
       ValueError: If complexity is None.
-      ValueError: If persisted_tensors is not a dictionary.
+      ValueError: If persisted_tensors is present but not a dictionary.
       ValueError: If persisted_tensors contains an empty nested dictionary.
     """
 
@@ -99,15 +111,17 @@ def __new__(cls, last_layer, logits, complexity, persisted_tensors):
       raise ValueError("if last_layer is a dict logits must also be a dict")
     if complexity is None:
       raise ValueError("complexity not provided")
-    if not isinstance(persisted_tensors, dict):
-      raise ValueError("persisted_tensors must be a dict")
-    _validate_nested_persisted_tensors(persisted_tensors)
+    if persisted_tensors is not None:
+      if not isinstance(persisted_tensors, dict):
+        raise ValueError("persisted_tensors must be a dict")
+      _validate_nested_persisted_tensors(persisted_tensors)
     return super(Subnetwork, cls).__new__(
         cls,
         last_layer=last_layer,
         logits=logits,
         complexity=complexity,
-        persisted_tensors=persisted_tensors)
+        persisted_tensors=persisted_tensors,
+        shared=shared)
 
 
 class Builder(object):
diff --git a/adanet/core/subnetwork/generator_test.py b/adanet/core/subnetwork/generator_test.py
@@ -63,15 +63,24 @@ def build_mixture_weights_train_op(self, loss, var_list, logits, labels,
 class SubnetworkTest(parameterized.TestCase, tf.test.TestCase):
 
   @parameterized.named_parameters({
+      "testcase_name": "no_persisted_tensors_nor_shared",
+      "last_layer": dummy_tensor(),
+      "logits": dummy_tensor(),
+      "complexity": dummy_tensor(),
+  }, {
       "testcase_name": "empty_persisted_tensors",
       "last_layer": dummy_tensor(),
       "logits": dummy_tensor(),
       "complexity": dummy_tensor(),
       "persisted_tensors": {},
   }, {
       "testcase_name": "dict_logits_and_last_layer",
-      "last_layer": {"head1": dummy_tensor()},
-      "logits": {"head1": dummy_tensor()},
+      "last_layer": {
+          "head1": dummy_tensor()
+      },
+      "logits": {
+          "head1": dummy_tensor()
+      },
       "complexity": dummy_tensor(),
       "persisted_tensors": {},
   }, {
@@ -96,14 +105,45 @@ class SubnetworkTest(parameterized.TestCase, tf.test.TestCase):
               },
           },
       },
+  }, {
+      "testcase_name": "shared_primitive",
+      "last_layer": dummy_tensor(),
+      "logits": dummy_tensor(),
+      "complexity": dummy_tensor(),
+      "shared": 1,
+  }, {
+      "testcase_name": "shared_dict",
+      "last_layer": dummy_tensor(),
+      "logits": dummy_tensor(),
+      "complexity": dummy_tensor(),
+      "shared": {},
+  }, {
+      "testcase_name": "shared_lambda",
+      "last_layer": dummy_tensor(),
+      "logits": dummy_tensor(),
+      "complexity": dummy_tensor(),
+      "shared": lambda x: x,
+  }, {
+      "testcase_name": "shared_object",
+      "last_layer": dummy_tensor(),
+      "logits": dummy_tensor(),
+      "complexity": dummy_tensor(),
+      "shared": dummy_tensor(),
   })
-  def test_new(self, last_layer, logits, complexity, persisted_tensors):
+  def test_new(self,
+               last_layer,
+               logits,
+               complexity,
+               persisted_tensors=None,
+               shared=None):
     with self.test_session():
-      got = Subnetwork(last_layer, logits, complexity, persisted_tensors)
+      got = Subnetwork(last_layer, logits, complexity, persisted_tensors,
+                       shared)
       self.assertEqual(got.last_layer, last_layer)
       self.assertEqual(got.logits, logits)
       self.assertEqual(got.complexity, complexity)
       self.assertEqual(got.persisted_tensors, persisted_tensors)
+      self.assertEqual(got.shared, shared)
 
   @parameterized.named_parameters({
       "testcase_name": "none_last_layer",
@@ -123,12 +163,6 @@ def test_new(self, last_layer, logits, complexity, persisted_tensors):
       "logits": dummy_tensor(),
       "complexity": None,
       "persisted_tensors": {},
-  }, {
-      "testcase_name": "none_persisted_tensors",
-      "last_layer": dummy_tensor(),
-      "logits": dummy_tensor(),
-      "complexity": dummy_tensor(),
-      "persisted_tensors": None,
   }, {
       "testcase_name": "empty_list_persisted_tensors",
       "last_layer": dummy_tensor(),
@@ -168,12 +202,16 @@ def test_new(self, last_layer, logits, complexity, persisted_tensors):
   }, {
       "testcase_name": "only_dict_logits",
       "last_layer": dummy_tensor(),
-      "logits": {"head": dummy_tensor()},
+      "logits": {
+          "head": dummy_tensor()
+      },
       "complexity": dummy_tensor(),
       "persisted_tensors": {},
   }, {
       "testcase_name": "only_dict_last_layer",
-      "last_layer": {"head": dummy_tensor()},
+      "last_layer": {
+          "head": dummy_tensor()
+      },
       "logits": dummy_tensor(),
       "complexity": dummy_tensor(),
       "persisted_tensors": {},

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,8 @@ py_library(`
`18`	`18`	`py_library(`
`19`	`19`	`name = "generator",`
`20`	`20`	`srcs = ["generator.py"],`
	`21`	`+ deps = [`
	`22`	`+ ],`
`21`	`23`	`)`
`22`	`24`
`23`	`25`	`py_test(`