diff --git a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py index e8cb0801d..390fdeb15 100644 --- a/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +++ b/src/optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py @@ -234,8 +234,6 @@ def _compile_model( wrapped_model.phase = phase if quantization: quantization.maybe_set_quantization_env() - original_linear = torch.nn.functional.linear - torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear compiled_model = cls.compile( wrapped_model, compile_config, @@ -246,7 +244,6 @@ def _compile_model( ) return compiled_model finally: - torch.nn.functional.linear = original_linear if quantization: quantization.maybe_reset_quantization_env() diff --git a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py index 4197fce07..9273d6dc6 100644 --- a/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +++ b/src/optimum/rbln/transformers/models/gemma3/modeling_gemma3.py @@ -504,8 +504,6 @@ def compile_model(wrapped_model, compile_config, example_inputs, compile_context try: if quantization: quantization.maybe_set_quantization_env() - original_linear = torch.nn.functional.linear - torch.nn.functional.linear = torch.ops.rbln_custom_ops.linear compiled_model = cls.compile( wrapped_model, compile_config, @@ -516,7 +514,6 @@ def compile_model(wrapped_model, compile_config, example_inputs, compile_context ) return compiled_model finally: - torch.nn.functional.linear = original_linear if quantization: quantization.maybe_reset_quantization_env()