Merge branch 'master' into ernie

jjuncho · web-flow · commit 382ea27b09be · 2025-03-13T10:11:07.000-04:00
diff --git a/captum/_utils/typing.py b/captum/_utils/typing.py
@@ -24,6 +24,7 @@
 TupleOrTensorOrBoolGeneric = TypeVar(
     "TupleOrTensorOrBoolGeneric", Tuple[Tensor, ...], Tensor, bool
 )
+PassThroughOutputType = TypeVar("PassThroughOutputType")
 ModuleOrModuleList = TypeVar("ModuleOrModuleList", Module, List[Module])
 TargetType = Union[None, int, Tuple[int, ...], Tensor, List[Tuple[int, ...]], List[int]]
 BaselineTupleType = Union[None, Tuple[Union[Tensor, int, float], ...]]
diff --git a/captum/attr/_utils/common.py b/captum/attr/_utils/common.py
@@ -364,7 +364,7 @@ def _find_output_mode_and_verify(
                     "returns a scalar."
                 )
     else:
-        agg_output_mode = False
+        agg_output_mode = perturbations_per_eval == 1
         if not allow_multi_outputs:
             assert (
                 isinstance(initial_eval, torch.Tensor) and initial_eval[0].numel() == 1
diff --git a/captum/testing/helpers/basic_models.py b/captum/testing/helpers/basic_models.py
@@ -7,6 +7,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from captum._utils.typing import PassThroughOutputType
 from torch import Tensor
 from torch.futures import Future
 
@@ -417,6 +418,76 @@ def forward(self, input1, input2, input3=None):
         return self.linear2(self.relu(self.linear1(embeddings))).sum(1)
 
 
+class GradientUnsupportedLayerOutput(nn.Module):
+    """
+    This layer is used to test the case where the model returns a layer that
+    is not supported by the gradient computation.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    @no_type_check
+    def forward(
+        self, unsupported_layer_output: PassThroughOutputType
+    ) -> PassThroughOutputType:
+        return unsupported_layer_output
+
+
+class BasicModel_GradientLayerAttribution(nn.Module):
+    def __init__(
+        self,
+        inplace: bool = False,
+        unsupported_layer_output: PassThroughOutputType = None,
+    ) -> None:
+        super().__init__()
+        # Linear 0 is simply identity transform
+        self.unsupported_layer_output = unsupported_layer_output
+        self.linear0 = nn.Linear(3, 3)
+        self.linear0.weight = nn.Parameter(torch.eye(3))
+        self.linear0.bias = nn.Parameter(torch.zeros(3))
+        self.linear1 = nn.Linear(3, 4)
+        self.linear1.weight = nn.Parameter(torch.ones(4, 3))
+        self.linear1.bias = nn.Parameter(torch.tensor([-10.0, 1.0, 1.0, 1.0]))
+
+        self.linear1_alt = nn.Linear(3, 4)
+        self.linear1_alt.weight = nn.Parameter(torch.ones(4, 3))
+        self.linear1_alt.bias = nn.Parameter(torch.tensor([-10.0, 1.0, 1.0, 1.0]))
+
+        self.relu = nn.ReLU(inplace=inplace)
+        self.relu_alt = nn.ReLU(inplace=False)
+        self.unsupportedLayer = GradientUnsupportedLayerOutput()
+
+        self.linear2 = nn.Linear(4, 2)
+        self.linear2.weight = nn.Parameter(torch.ones(2, 4))
+        self.linear2.bias = nn.Parameter(torch.tensor([-1.0, 1.0]))
+
+        self.linear3 = nn.Linear(4, 2)
+        self.linear3.weight = nn.Parameter(torch.ones(2, 4))
+        self.linear3.bias = nn.Parameter(torch.tensor([-1.0, 1.0]))
+
+    @no_type_check
+    def forward(self, x: Tensor, add_input: Optional[Tensor] = None) -> Tensor:
+        input = x if add_input is None else x + add_input
+        lin0_out = self.linear0(input)
+        lin1_out = self.linear1(lin0_out)
+        lin1_out_alt = self.linear1_alt(lin0_out)
+
+        if self.unsupported_layer_output is not None:
+            self.unsupportedLayer(self.unsupported_layer_output)
+            # unsupportedLayer is unused in the forward func.
+        self.relu_alt(
+            lin1_out_alt
+        )  # relu_alt's output is supported but it's unused in the forward func.
+
+        relu_out = self.relu(lin1_out)
+        lin2_out = self.linear2(relu_out)
+
+        lin3_out = self.linear3(lin1_out_alt).to(torch.int64)
+
+        return torch.cat((lin2_out, lin3_out), dim=1)
+
+
 class MultiRelu(nn.Module):
     def __init__(self, inplace: bool = False) -> None:
         super().__init__()
@@ -429,7 +500,11 @@ def forward(self, arg1: Tensor, arg2: Tensor) -> Tuple[Tensor, Tensor]:
 
 
 class BasicModel_MultiLayer(nn.Module):
-    def __init__(self, inplace: bool = False, multi_input_module: bool = False) -> None:
+    def __init__(
+        self,
+        inplace: bool = False,
+        multi_input_module: bool = False,
+    ) -> None:
         super().__init__()
         # Linear 0 is simply identity transform
         self.multi_input_module = multi_input_module
@@ -461,6 +536,7 @@ def forward(
         input = x if add_input is None else x + add_input
         lin0_out = self.linear0(input)
         lin1_out = self.linear1(lin0_out)
+
         if self.multi_input_module:
             relu_out1, relu_out2 = self.multi_relu(lin1_out, self.linear1_alt(input))
             relu_out = relu_out1 + relu_out2
diff --git a/tests/attr/test_data_parallel.py b/tests/attr/test_data_parallel.py
@@ -41,7 +41,7 @@
 """
 
 # Distributed Data Parallel env setup
-os.environ["MASTER_ADDR"] = "127.0.0.1"
+os.environ["MASTER_ADDR"] = "localhost"
 os.environ["MASTER_PORT"] = "29500"
 dist.init_process_group(backend="gloo", rank=0, world_size=1)
 
diff --git a/tests/attr/test_shapley.py b/tests/attr/test_shapley.py
@@ -806,6 +806,30 @@ def func_future(*inp):
             lambda *inp: func_to_use(*inp), use_future=use_future
         )
 
+    @parameterized.expand([True, False])
+    def test_mutli_inp_shapley_batch_scalar_tensor_expanded(self, use_future) -> None:
+        def func(*inp):
+            sum_val = torch.sum(net(*inp)).item()
+            return torch.tensor([sum_val, sum_val + 2.0, sum_val + 3.0])
+
+        def func_future(*inp):
+            temp = net_fut(*inp)
+            temp.wait()
+            sum_val = torch.sum(temp.value()).item()
+            fut = Future()
+            fut.set_result(torch.tensor([sum_val, sum_val + 2.0, sum_val + 3.0]))
+            return fut
+
+        if use_future:
+            net_fut = BasicModel_MultiLayer_MultiInput_with_Future()
+            func_to_use = func_future
+        else:
+            net = BasicModel_MultiLayer_MultiInput()
+            func_to_use = func
+        self._multi_input_batch_scalar_shapley_assert(
+            lambda *inp: func_to_use(*inp), use_future=use_future, expanded_output=True
+        )
+
     @unittest.mock.patch("sys.stderr", new_callable=io.StringIO)
     def test_shapley_sampling_with_show_progress(self, mock_stderr) -> None:
         net = BasicModel_MultiLayer()
@@ -947,18 +971,19 @@ def _single_int_input_multi_sample_batch_scalar_shapley_assert(
             )
 
     def _multi_input_batch_scalar_shapley_assert(
-        self, func: Callable, use_future: bool = False
+        self, func: Callable, use_future: bool = False, expanded_output: bool = False
     ) -> None:
         inp1 = torch.tensor([[23.0, 100.0, 0.0], [20.0, 50.0, 30.0]])
         inp2 = torch.tensor([[20.0, 50.0, 30.0], [0.0, 100.0, 0.0]])
         inp3 = torch.tensor([[0.0, 100.0, 10.0], [20.0, 10.0, 13.0]])
         mask1 = torch.tensor([[1, 1, 1]])
         mask2 = torch.tensor([[0, 1, 2]])
         mask3 = torch.tensor([[0, 1, 2]])
+        out_mult = 3 if expanded_output else 1
         expected = (
-            [[3850.6666, 3850.6666, 3850.6666]],
-            [[306.6666, 3850.6666, 410.6666]],
-            [[306.6666, 3850.6666, 410.6666]],
+            [[3850.6666, 3850.6666, 3850.6666]] * out_mult,
+            [[306.6666, 3850.6666, 410.6666]] * out_mult,
+            [[306.6666, 3850.6666, 410.6666]] * out_mult,
         )
         if use_future:
             self._shapley_test_assert_future(

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`TupleOrTensorOrBoolGeneric = TypeVar(`
`25`	`25`	`"TupleOrTensorOrBoolGeneric", Tuple[Tensor, ...], Tensor, bool`
`26`	`26`	`)`
	`27`	`+PassThroughOutputType = TypeVar("PassThroughOutputType")`
`27`	`28`	`ModuleOrModuleList = TypeVar("ModuleOrModuleList", Module, List[Module])`
`28`	`29`	`TargetType = Union[None, int, Tuple[int, ...], Tensor, List[Tuple[int, ...]], List[int]]`
`29`	`30`	`BaselineTupleType = Union[None, Tuple[Union[Tensor, int, float], ...]]`
Original file line number	Diff line number	Diff line change
`@@ -364,7 +364,7 @@ def _find_output_mode_and_verify(`
`364`	`364`	`"returns a scalar."`
`365`	`365`	`)`
`366`	`366`	`else:`
`367`		`- agg_output_mode = False`
	`367`	`+ agg_output_mode = perturbations_per_eval == 1`
`368`	`368`	`if not allow_multi_outputs:`
`369`	`369`	`assert (`
`370`	`370`	`isinstance(initial_eval, torch.Tensor) and initial_eval[0].numel() == 1`