Fixed accuracy issue with hpu graph and dynamicity

schoi-habana · schoi-habana · commit cb82e5ca4c75 · 2025-09-09T18:47:58.000Z
diff --git a/optimum/habana/transformers/models/gpt_oss/modeling_gpt_oss.py b/optimum/habana/transformers/models/gpt_oss/modeling_gpt_oss.py
@@ -135,7 +135,9 @@ def eager_attention_forward(
     probs = F.softmax(combined_logits, dim=-1, dtype=combined_logits.dtype)
 
     if token_idx is not None:
-        probs[..., token_idx] = 0
+        # index_copy_() was used to avoid dynamicity in probs[..., token_idx]
+        zeros = torch.zeros(probs.shape[:-1] + (1,), dtype=probs.dtype, device=probs.device)
+        probs.index_copy_(-1, token_idx, zeros)
         scores = probs
     else:
         scores = probs[..., :-1]
diff --git a/optimum/habana/transformers/models/mistral/configuration_mistral.py b/optimum/habana/transformers/models/mistral/configuration_mistral.py
@@ -55,6 +55,7 @@ def __init__(
             attention_dropout,
             **kwargs,
         )
+
         self.rope_scaling = rope_scaling
 
         # Validate the correctness of rotary position embeddings parameters
diff --git a/tests/baselines/fixture/tests/test_text_generation_example.json b/tests/baselines/fixture/tests/test_text_generation_example.json
@@ -423,6 +423,14 @@
       "throughput": 45.90538768350833
     }
   },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[unsloth/gpt-oss-20b-BF16-1-False-False]": {
+    "gaudi2": {
+      "throughput": 49.2845966607741
+    },
+    "gaudi3": {
+      "throughput": 59.51780208740626
+    }
+  },
   "tests/test_text_generation_example.py::test_text_generation_contrastive_search[gpt2-xl-1-False]": {
     "gaudi1": {
       "throughput": 34.48141280163397
diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
@@ -64,6 +64,7 @@
             ("moonshotai/Moonlight-16B-A3B", 1, False, False),
             ("Qwen/Qwen3-8B", 1, False, False),
             ("Qwen/Qwen3-30B-A3B", 1, False, False),
+            ("unsloth/gpt-oss-20b-BF16", 1, False, False),
         ],
         "fp8": [
             pytest.param("tiiuae/falcon-180B", 4, 950, True, 128, 128, marks=pytest.mark.x4),

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ def __init__(`
`55`	`55`	`attention_dropout,`
`56`	`56`	`**kwargs,`
`57`	`57`	`)`
	`58`	`+`
`58`	`59`	`self.rope_scaling = rope_scaling`
`59`	`60`
`60`	`61`	`# Validate the correctness of rotary position embeddings parameters`