#2902: Fix rsqrt

VirdhatchaniKN · muthutt · commit b1f6f1a84274 · 2023-10-25T12:58:00.000-07:00
diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_unary.py
@@ -197,7 +197,7 @@ def test_run_eltwise_rsqrt_op(
         output_mem_config,
     ):
         datagen_func = [
-            generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=1, high=1e8), torch.bfloat16)
+            generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=0, high=1e8), torch.bfloat16)
         ]
         test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]
         test_args["fast_and_approx"] = fast_and_approx
diff --git a/tests/tt_eager/python_api_testing/sweep_tests/reference_eltwise/eltwise_unary_rsqrt.py b/tests/tt_eager/python_api_testing/sweep_tests/reference_eltwise/eltwise_unary_rsqrt.py
@@ -8,31 +8,19 @@
 
 torch.manual_seed(2)
 
-
-def rsqrt_approx(x, iterations):
-    # Initial approximation
+def rsqrt(x, iterations):
     y = 1.0 / x
-
+    condition = (x > 0) & (x < 1)
+    y = torch.where(condition, torch.tensor(1.0), y)
     for _ in range(iterations):
         y = y * (1.5 - 0.5 * x * y * y)  # Newton-Raphson iteration
     return y
 
-
-def rsqrt_accurate(x, iterations):
-    # Initial approximation
-    y = 1.0 / x
-
-    for _ in range(iterations):
-        y = y * (1.5 - 0.5 * x * y * y)  # Newton-Raphson iteration
-    return y
-
-
-n = np.linspace(1, 10, 100)
+n = np.linspace(0, 10, 100)
 n = torch.from_numpy(n)
 lhs = torch.rsqrt(n)
-rhs_approx = rsqrt_approx(n, 10)
-rhs_accurate = rsqrt_accurate(n, 25)
-
+rhs_approx = rsqrt(n, 10)
+rhs_accurate = rsqrt(n, 25)
 
 plt.plot(n, lhs, "-r", label="rsqrt")
 plt.plot(n, rhs_accurate, "--g", label="custom rsqrt accurate")
diff --git a/tt_metal/src/ckernels/grayskull/common/inc/ckernel_sfpu.h b/tt_metal/src/ckernels/grayskull/common/inc/ckernel_sfpu.h
@@ -268,14 +268,21 @@ inline void calculate_rsqrt()
 
         vFloat in = dst_reg[0];
         vFloat result = sfpu_reciprocal<false>(in);
+        v_if(dst_reg[0] < 1.0f){
+            result = 1.0f;
+        }v_endif;
 
         for (int r = 0; r < RECIPROCAL_ITERATIONS; r++)
         {
             // y = y * (1.5 - 0.5 * x * y * y) Newton's method iteration.
             result = result * (1.5F - 0.5F  * dst_reg[0] * result * result);
         }
 
-        dst_reg[0] = result;
+        v_if(dst_reg[0] == 0.0f){
+            dst_reg[0] = std::numeric_limits<float>::infinity();
+        }v_else{
+            dst_reg[0] = result;
+        }v_endif;
 
         dst_reg++;
 
diff --git a/tt_metal/src/ckernels/wormhole_b0/common/inc/ckernel_sfpu.h b/tt_metal/src/ckernels/wormhole_b0/common/inc/ckernel_sfpu.h
@@ -190,14 +190,21 @@ inline void calculate_rsqrt()
 
         vFloat in = dst_reg[0];
         vFloat result = sfpu_reciprocal(in);
+        v_if(dst_reg[0] < 1.0f){
+            result = 1.0f;
+        }v_endif;
 
         for (int r = 0; r < RECIPROCAL_ITERATIONS; r++)
         {
             // y = y * (1.5 - 0.5 * x * y * y) Newton's method iteration.
             result = result * (1.5F - 0.5F  * dst_reg[0] * result * result);
         }
 
-        dst_reg[0] = result;
+        v_if(dst_reg[0] == 0.0f){
+            dst_reg[0] = std::numeric_limits<float>::infinity();
+        }v_else{
+            dst_reg[0] = result;
+        }v_endif;
 
         dst_reg++;
 

Original file line number	Diff line number	Diff line change
`@@ -197,7 +197,7 @@ def test_run_eltwise_rsqrt_op(`
`197`	`197`	`output_mem_config,`
`198`	`198`	`):`
`199`	`199`	`datagen_func = [`
`200`		`- generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=1, high=1e8), torch.bfloat16)`
	`200`	`+ generation_funcs.gen_func_with_cast(partial(generation_funcs.gen_rand, low=0, high=1e8), torch.bfloat16)`
`201`	`201`	`]`
`202`	`202`	`test_args = generation_funcs.gen_default_dtype_layout_device(input_shapes)[0]`
`203`	`203`	`test_args["fast_and_approx"] = fast_and_approx`