Fix GatherBlockQuantized node to support symmetric quantized LM_HEAD (#1951)

sushraja-msft · web-flow · commit 6cf92ae4b3ab · 2026-01-21T18:43:03.000-08:00
Today models created with 

python -m onnxruntime_genai.models.builder -p int4 -e webgpu
--extra_options shared_embeddings=true int4_algo_config=rtn_last
int4_is_symmetric=true

have invalid GatherBlockQuanntized nodes because the zero point
attribute of the node points to a non-existent tensor
lm_head.MatMul.weight_zp.

This change fixes builder.py, so that we are selective about adding that
attribute to the GatherBlockQuantized node.
diff --git a/src/python/py/models/builders/base.py b/src/python/py/models/builders/base.py
@@ -1324,9 +1324,12 @@ def make_embedding(self, embedding):
             self.make_reshape(
                 weight_reshape_name, weight_reshape_inputs, dtype=ir.DataType.UINT8, shape=[self.vocab_size, flat_dim]
             )
+            input_names = [weight_reshape_output, "input_ids", "lm_head.MatMul.weight_scale"];
+            if not self.quant_attrs["int4"]["is_symmetric"]:
+                input_names.append("lm_head.MatMul.weight_zp")
             self.make_node(
                 "GatherBlockQuantized",
-                inputs=[weight_reshape_output, "input_ids", "lm_head.MatMul.weight_scale", "lm_head.MatMul.weight_zp"],
+                inputs=input_names,
                 outputs=[gather_output],
                 name=gather_name,
                 domain="com.microsoft",