update influence docstring (#881)

aobo-y · facebook-github-bot · commit b795e83e504c · 2022-03-02T12:59:47.000-08:00
Summary: as title Pull Request resolved: #881 Reviewed By: 99warriors Differential Revision: D34537725 Pulled By: aobo-y fbshipit-source-id: fd4671032ac7824be66ea42194d1468718ba04d3
diff --git a/captum/influence/_core/similarity_influence.py b/captum/influence/_core/similarity_influence.py
@@ -109,16 +109,15 @@ def __init__(
                     we pass in a Tensor with 3 examples, i.e. batch_size_2 = 3. Also,
                     suppose that our inputs and intermediate activations throughout the
                     model will have dimension (N, C, H, W). Then, the feature dimensions
-                    should be flattened within this function. For example, let
+                    should be flattened within this function. For example::
 
-                    ```
-                    av_test.shape = torch.Size([3, N, C, H, W])
-                    av_src.shape = torch.Size([16, N, C, H, W])
-                    ```
-
-                    Then, using `torch.view(av_test.shape[0], -1)` we have
-
-                    `av_test.shape = torch.Size([3, N x C x H x W])
+                        >>> av_test.shape
+                        torch.Size([3, N, C, H, W])
+                        >>> av_src.shape
+                        torch.Size([16, N, C, H, W])
+                        >>> av_test = torch.view(av_test.shape[0], -1)
+                        >>> av_test.shape
+                        torch.Size([3, N x C x H x W])
 
                     and similarly for av_src. The similarity_metric should then use
                     these flattened tensors to return the pairwise similarity matrix.
@@ -172,7 +171,7 @@ def influence(  # type: ignore[override]
                     first dimension in `inputs` tensor or tuple of tensors corresponds
                     to the batch size. A tuple of tensors is only passed in if this
                     is the input form that `module` accepts.
-            top_k (int): The number of top-matchinig activations to return
+            top_k (int): The number of top-matching activations to return
             additional_forward_args (optional):  Additional arguments that will be
                     passed to forward_func after inputs.
             load_src_from_disk (bool): Loads activations for `influence_src_dataset`
@@ -190,18 +189,19 @@ def influence(  # type: ignore[override]
                     implementation of `DataInfluence` abstract class.
 
         Returns:
-            influences (dictionary): Returns the influential instances retrieved from
-                    `influence_src_dataset` for each test example represented through a
-                    tensor or a tuple of tensor in `inputs`. Returned influential
-                    examples are represented as dict, with keys corresponding to
-                    the layer names passed in `layers`. Each value in the dict is a
-                    tuple containing the indices and values for the top k similarities
-                    from `influence_src_dataset` by the chosen metric. The first value
-                    in the tuple corresponds to the indices corresponding to the top k
-                    most similar examples, and the second value is the similarity score.
-                    The batch dimension corresponds to the batch dimension of `inputs`.
-                    If inputs.shape[0] == 5, then dict[`layer_name`][0].shape[0] == 5.
-                    These tensors will be of shape (inputs.shape[0], top_k).
+
+            influences (dict): Returns the influential instances retrieved from
+            `influence_src_dataset` for each test example represented through a
+            tensor or a tuple of tensor in `inputs`. Returned influential
+            examples are represented as dict, with keys corresponding to
+            the layer names passed in `layers`. Each value in the dict is a
+            tuple containing the indices and values for the top k similarities
+            from `influence_src_dataset` by the chosen metric. The first value
+            in the tuple corresponds to the indices corresponding to the top k
+            most similar examples, and the second value is the similarity score.
+            The batch dimension corresponds to the batch dimension of `inputs`.
+            If inputs.shape[0] == 5, then dict[`layer_name`][0].shape[0] == 5.
+            These tensors will be of shape (inputs.shape[0], top_k).
         """
         inputs_batch_size = (
             inputs[0].shape[0] if isinstance(inputs, tuple) else inputs.shape[0]
diff --git a/captum/influence/_core/tracincp.py b/captum/influence/_core/tracincp.py
@@ -249,25 +249,26 @@ def influence(  # type: ignore[override]
     ) -> Union[Tensor, KMostInfluentialResults]:
         r"""
         This is the key method of this class, and can be run in 3 different modes,
-        where the mode that is run depends on the arguments passed to this method.
+        where the mode that is run depends on the arguments passed to this method:
+
         - self influence mode: This mode is used if `inputs` is None. This mode
-                computes the self influence scores for every example in
-                the training dataset `influence_src_dataset`.
+          computes the self influence scores for every example in
+          the training dataset `influence_src_dataset`.
         - influence score mode: This mode is used if `inputs` is not None, and `k` is
-                None. This mode computes the influence score of every example in
-                training dataset `influence_src_dataset` on every example in the test
-                batch represented by `inputs` and `targets`.
+          None. This mode computes the influence score of every example in
+          training dataset `influence_src_dataset` on every example in the test
+          batch represented by `inputs` and `targets`.
         - k-most influential mode: This mode is used if `inputs` is not None, and
-                `k` is not None, and an int. This mode computes the proponents or
-                opponents of every example in the test batch represented by `inputs`
-                and `targets`. In particular, for each test example in the test batch,
-                this mode computes its proponents (resp. opponents), which are the
-                indices in the training dataset `influence_src_dataset` of the training
-                examples with the `k` highest (resp. lowest) influence scores on the
-                test example. Proponents are computed if `proponents` is True.
-                Otherwise, opponents are computed. For each test example, this method
-                also returns the actual influence score of each proponent (resp.
-                opponent) on the test example.
+          `k` is not None, and an int. This mode computes the proponents or
+          opponents of every example in the test batch represented by `inputs`
+          and `targets`. In particular, for each test example in the test batch,
+          this mode computes its proponents (resp. opponents), which are the
+          indices in the training dataset `influence_src_dataset` of the training
+          examples with the `k` highest (resp. lowest) influence scores on the
+          test example. Proponents are computed if `proponents` is True.
+          Otherwise, opponents are computed. For each test example, this method
+          also returns the actual influence score of each proponent (resp.
+          opponent) on the test example.
 
         Args:
             inputs (Any, optional): If not provided or `None`, the self influence mode
@@ -300,33 +301,34 @@ def influence(  # type: ignore[override]
 
         Returns:
             The return value of this method depends on which mode is run.
+
             - self influence mode: if this mode is run (`inputs` is None), returns a 1D
-                    tensor of self influence scores over training dataset
-                    `influence_src_dataset`. The length of this tensor is the number of
-                    examples in `influence_src_dataset`, regardless of whether it is a
-                    Dataset or DataLoader.
+              tensor of self influence scores over training dataset
+              `influence_src_dataset`. The length of this tensor is the number of
+              examples in `influence_src_dataset`, regardless of whether it is a
+              Dataset or DataLoader.
             - influence score mode: if this mode is run (`inputs is not None, `k` is
-                    None), returns a 2D tensor `influence_scores` of shape
-                    `(input_size, influence_src_dataset_size)`, where `input_size` is
-                    the number of examples in the test batch, and
-                    `influence_src_dataset_size` is the number of examples in
-                    training dataset `influence_src_dataset`. In other words,
-                    `influence_scores[i][j]` is the influence score of the `j`-th
-                    example in `influence_src_dataset` on the `i`-th example in the
-                    test batch.
-           - k-most influential mode: if this mode is run (`inputs` is not None,
-                    `k` is an int), returns a namedtuple `(indices, influence_scores)`.
-                    `indices` is a 2D tensor of shape `(input_size, k)`, where
-                    `input_size` is the number of examples in the test batch. If
-                    computing proponents (resp. opponents), `indices[i][j]` is the
-                    index in training dataset `influence_src_dataset` of the example
-                    with the `j`-th highest (resp. lowest) influence score (out of the
-                    examples in `influence_src_dataset`) on the `i`-th example in the
-                    test batch. `influence_scores` contains the corresponding influence
-                    scores. In particular, `influence_scores[i][j]` is the influence
-                    score of example `indices[i][j]` in `influence_src_dataset` on
-                    example `i` in the test batch represented by `inputs` and
-                    `targets`.
+              None), returns a 2D tensor `influence_scores` of shape
+              `(input_size, influence_src_dataset_size)`, where `input_size` is
+              the number of examples in the test batch, and
+              `influence_src_dataset_size` is the number of examples in
+              training dataset `influence_src_dataset`. In other words,
+              `influence_scores[i][j]` is the influence score of the `j`-th
+              example in `influence_src_dataset` on the `i`-th example in the
+              test batch.
+            - k-most influential mode: if this mode is run (`inputs` is not None,
+              `k` is an int), returns a namedtuple `(indices, influence_scores)`.
+              `indices` is a 2D tensor of shape `(input_size, k)`, where
+              `input_size` is the number of examples in the test batch. If
+              computing proponents (resp. opponents), `indices[i][j]` is the
+              index in training dataset `influence_src_dataset` of the example
+              with the `j`-th highest (resp. lowest) influence score (out of the
+              examples in `influence_src_dataset`) on the `i`-th example in the
+              test batch. `influence_scores` contains the corresponding influence
+              scores. In particular, `influence_scores[i][j]` is the influence
+              score of example `indices[i][j]` in `influence_src_dataset` on
+              example `i` in the test batch represented by `inputs` and
+              `targets`.
         """
         _inputs = _format_inputs(inputs, unpack_inputs)
 
diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py
@@ -146,7 +146,7 @@ def __init__(
 
         self.vectorize = vectorize
 
-        "TODO: restore prior state"
+        # TODO: restore prior state
         self.final_fc_layer = final_fc_layer
         if isinstance(self.final_fc_layer, str):
             self.final_fc_layer = _get_module_from_name(model, self.final_fc_layer)
@@ -673,22 +673,25 @@ def influence(  # type: ignore[override]
     ) -> Union[Tensor, KMostInfluentialResults]:
         r"""
         This is the key method of this class, and can be run in 2 different modes,
-        where the mode that is run depends on the arguments passed to this method.
+        where the mode that is run depends on the arguments passed to this method
+
         - influence score mode: This mode is used if `inputs` is not None, and `k` is
-                None. This mode computes the influence score of every example in
-                training dataset `influence_src_dataset` on every example in the test
-                batch represented by `inputs` and `targets`.
+          None. This mode computes the influence score of every example in
+          training dataset `influence_src_dataset` on every example in the test
+          batch represented by `inputs` and `targets`.
+
         - k-most influential mode: This mode is used if `inputs` is not None, and
-                `k` is not None, and an int. This mode computes the proponents or
-                opponents of every example in the test batch represented by `inputs`
-                and `targets`. In particular, for each test example in the test batch,
-                this mode computes its proponents (resp. opponents), which are the
-                indices in the training dataset `influence_src_dataset` of the training
-                examples with the `k` highest (resp. lowest) influence scores on the
-                test example. Proponents are computed if `proponents` is True.
-                Otherwise, opponents are computed. For each test example, this method
-                also returns the actual influence score of each proponent (resp.
-                opponent) on the test example.
+          `k` is not None, and an int. This mode computes the proponents or
+          opponents of every example in the test batch represented by `inputs`
+          and `targets`. In particular, for each test example in the test batch,
+          this mode computes its proponents (resp. opponents), which are the
+          indices in the training dataset `influence_src_dataset` of the training
+          examples with the `k` highest (resp. lowest) influence scores on the
+          test example. Proponents are computed if `proponents` is True.
+          Otherwise, opponents are computed. For each test example, this method
+          also returns the actual influence score of each proponent (resp.
+          opponent) on the test example.
+
         Note that unlike `TracInCPFast`, this class should *not* be run in self
         influence mode.  To compute self influence scores when only considering
         gradients in the last fully-connected layer, please use `TracInCPFast` instead.
@@ -723,24 +726,26 @@ def influence(  # type: ignore[override]
                     Default: True
 
         Returns:
-            The return value of this method depends on which mode is run.
+
+            The return value of this method depends on which mode is run
+
             - influence score mode: if this mode is run (`inputs is not None, `k` is
-                    None), returns a 2D tensor `influence_scores` of shape
-                    `(input_size, influence_src_dataset_size)`, where `input_size` is
-                    the number of examples in the test batch, and
-                    `influence_src_dataset_size` is the number of examples in
-                    training dataset `influence_src_dataset`. In other words,
-                    `influence_scores[i][j]` is the influence score of the `j`-th
-                    example in `influence_src_dataset` on the `i`-th example in the
-                    test batch.
-            - k-most influential mode: if this mode is run (`inputs` is not None,
-                    `k` is an int), returns `indices`, which is a 2D tensor of shape
-                    `(input_size, k)`, where `input_size` is the number of examples
-                    in the test batch. If computing proponents (resp. opponents),
-                    `indices[i][j]` is the index in training dataset
-                    `influence_src_dataset` of the example with the `j`-th highest
-                    (resp. lowest) influence score (out of the examples in
-                    `influence_src_dataset`) on the `i`-th example in the test batch.
+              None), returns a 2D tensor `influence_scores` of shape
+              `(input_size, influence_src_dataset_size)`, where `input_size` is
+              the number of examples in the test batch, and
+              `influence_src_dataset_size` is the number of examples in
+              training dataset `influence_src_dataset`. In other words,
+              `influence_scores[i][j]` is the influence score of the `j`-th
+              example in `influence_src_dataset` on the `i`-th example in the
+              test batch.
+            - most influential mode: if this mode is run (`inputs` is not None,
+              `k` is an int), returns `indices`, which is a 2D tensor of shape
+              `(input_size, k)`, where `input_size` is the number of examples
+              in the test batch. If computing proponents (resp. opponents),
+              `indices[i][j]` is the index in training dataset
+              `influence_src_dataset` of the example with the `j`-th highest
+              (resp. lowest) influence score (out of the examples in
+              `influence_src_dataset`) on the `i`-th example in the test batch.
         """
         msg = (
             "Since `inputs` is None, this suggests `TracInCPFastRandProj` is being "