openvinotoolkit · alexsu52 · May 1, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 17, 2025
@@ -155,9 +155,8 @@ def backward(ctx, grad_output):
         orig_shape = grad_output.shape
         grad_output = grad_output.reshape(input_shape)
 
-        output = RQ.Quantize_forward(input_, input_low, input_range, levels)
         grad_input, _, grad_scale = RQ.Quantize_backward(
-            grad_output, input_, input_low, input_range, output, level_low, level_high
+            grad_output, input_, input_low, input_range, levels, level_low, level_high
         )
 
         grad_input = grad_input.reshape(orig_shape)
@@ -197,9 +196,8 @@ def backward(ctx, grad_output):
         orig_shape = grad_output.shape
         grad_output = grad_output.reshape(input_shape)
 
-        output = RQ.Quantize_forward(input_, input_low, input_range, levels)
         grad_input, grad_low, grad_range = RQ.Quantize_backward(
-            grad_output, input_, input_low, input_range, output, level_low, level_high
+            grad_output, input_, input_low, input_range, levels, level_low, level_high
         )
 
         grad_input = grad_input.reshape(orig_shape)

@@ -71,7 +71,7 @@ def backward(
         input_: GeneralizedTensor,
         input_low: GeneralizedTensor,
         input_range: GeneralizedTensor,
-        output: GeneralizedTensor,
+        levels: int,
         level_low: int,
         level_high: int,
         is_asymmetric: bool = False,
@@ -84,6 +84,7 @@ def backward(
 
         mask_in = 1 - mask_hi - mask_lo
         range_sign = self._sign(input_range)
+        output = self.forward(input_, input_low, input_range, levels)
         err = (output - input_) * self._reciprocal(input_range * range_sign)
         grad_range = grad_output * (err * mask_in + range_sign * (level_low / level_high) * mask_lo + mask_hi)
         grad_range = sum_like(grad_range, input_range)

@@ -18,6 +18,8 @@
 
 from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode
 from nncf.common.utils.os import is_windows
+from nncf.torch.quantization.extensions import QuantizedFunctionsCPU
+from nncf.torch.quantization.extensions import QuantizedFunctionsCUDA
 from nncf.torch.quantization.quantize_functions import asymmetric_quantize
 from nncf.torch.quantization.quantize_functions import get_scale_zp_from_input_low_input_high
 from nncf.torch.quantization.quantize_functions import symmetric_quantize
@@ -383,7 +385,7 @@ def test_quantize_symmetric_backward(
 
             mock_prev_output_grads = np.ones(input_size, dtype=np.float16 if is_fp16 else np.float32)
             ref_grads = RQ.backward(
-                mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, ref_output, level_low, level_high
+                mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, levels, level_low, level_high
             )
             del ref_grads[1]
             test_value = symmetric_quantize(test_input, levels, level_low, level_high, test_scale, EPS)
@@ -587,7 +589,7 @@ def test_quantize_asymmetric_backward(self, _seed, input_size, bits, use_cuda, i
 
             mock_prev_output_grads = np.ones(input_size, dtype=np.float16 if is_fp16 else np.float32)
             ref_grads = RQ.backward(
-                mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, ref_output, level_low, level_high
+                mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, levels, level_low, level_high
             )
 
             test_value = asymmetric_quantize(
@@ -688,3 +690,127 @@ def test_reference_quantize_compilation():
 
     rqf.Quantize_backward(grad_output, input_, input_low, input_range, output, -128, 127)
     assert rqf.Quantize_backward.is_compilation_successful == compilation_ref
+
+
+class CompatibilityTestDesc:
+    def __init__(self, levels, level_low, level_high, is_asymmetric=False):
+        self.input_ = torch.tensor([[-0.5, 0.25]])
+        self.input_low = torch.tensor([[-0.5]])
+        self.input_range = torch.tensor([[1.0]])
+        self.grad_output = torch.tensor([[-0.5, 0.25]])
+        self.levels = levels
+        self.level_low = level_low
+        self.level_high = level_high
+        self.is_asymmetric = is_asymmetric
+
+
+@pytest.mark.parametrize(
+    "desc",
+    [
+        CompatibilityTestDesc(
+            levels=256,
+            level_low=-128,
+            level_high=127,
+            is_asymmetric=True,
+        ),
+        CompatibilityTestDesc(
+            levels=256,
+            level_low=-128,
+            level_high=127,
+            is_asymmetric=False,
+        ),
+        CompatibilityTestDesc(
+            levels=16,
+            level_low=0,
+            level_high=15,
+            is_asymmetric=True,
+        ),
+        CompatibilityTestDesc(
+            levels=16,
+            level_low=0,
+            level_high=15,
+            is_asymmetric=False,
+        ),
+    ],
+)
+def test_cpu_extension_reference_compatibility(desc):
+    fwd_args = [desc.input_, desc.input_low, desc.input_range, desc.levels]
+    bwd_args = [
+        desc.grad_output,
+        desc.input_,
+        desc.input_low,
+        desc.input_range,
+        desc.levels,
+        desc.level_low,
+        desc.level_high,
+        desc.is_asymmetric,
+    ]
+
+    ext_fwd_output = QuantizedFunctionsCPU.get("Quantize_forward")(*fwd_args)
+    ref_fwd_output = ReferenceQuantizedFunctions.Quantize_forward(*fwd_args)
+
+    assert torch.allclose(ext_fwd_output, ref_fwd_output)
+
+    bwd_grad_input, bwd_grad_low, bwd_grad_range = QuantizedFunctionsCPU.get("Quantize_backward")(*bwd_args)
+    ref_grad_input, ref_grad_low, ref_grad_range = ReferenceQuantizedFunctions.Quantize_backward(*bwd_args)
+
+    assert torch.allclose(bwd_grad_input, ref_grad_input)
+    if desc.is_asymmetric:
+        assert torch.allclose(bwd_grad_low, ref_grad_low)
+    assert torch.allclose(bwd_grad_range, ref_grad_range)
+
+
+@pytest.mark.cuda
+@pytest.mark.parametrize(
+    "desc",
+    [
+        CompatibilityTestDesc(
+            levels=256,
+            level_low=-128,
+            level_high=127,
+        ),
+        CompatibilityTestDesc(
+            levels=256,
+            level_low=-128,
+            level_high=127,
+        ),
+        CompatibilityTestDesc(
+            levels=16,
+            level_low=0,
+            level_high=15,
+        ),
+        CompatibilityTestDesc(
+            levels=16,
+            level_low=0,
+            level_high=15,
+        ),
+    ],
+)
+def test_cuda_extension_reference_compatibility(desc):
+    device = torch.device("cuda")
+    input_low = desc.input_low.to(device)
+    input_range = desc.input_range.to(device)
+    input_ = desc.input_.to(device)
+
+    fwd_args = [input_, input_low, input_range, desc.levels]
+    bwd_args = [
+        desc.grad_output.to(device),
+        input_,
+        input_low,
+        input_range,
+        desc.levels,
+        desc.level_low,
+        desc.level_high,
+    ]
+
+    ext_fwd_output = QuantizedFunctionsCUDA.get("Quantize_forward")(*fwd_args)
+    ref_fwd_output = ReferenceQuantizedFunctions.Quantize_forward(*fwd_args)
+
+    assert torch.allclose(ext_fwd_output, ref_fwd_output)
+
+    bwd_grad_input, bwd_grad_low, bwd_grad_range = QuantizedFunctionsCUDA.get("Quantize_backward")(*bwd_args)
+    ref_grad_input, ref_grad_low, ref_grad_range = ReferenceQuantizedFunctions.Quantize_backward(*bwd_args)
+
+    assert torch.allclose(bwd_grad_input, ref_grad_input)
+    assert torch.allclose(bwd_grad_low, ref_grad_low)
+    assert torch.allclose(bwd_grad_range, ref_grad_range, atol=1e-7)