Emit torch.vtensor.literal for small initializers

jtuyls · claude · jtuyls · commit 13d73483cf5b · 2026-03-30T01:18:47.000-07:00
Small initializers (&lt;=256 bytes) now emit torch.vtensor.literal instead
of flow.tensor.constant + torch_c.from_builtin_tensor. This allows
torch-mlir conversion patterns to directly match constant values.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
Signed-off-by: Jorn &lt;jorn.tuyls@gmail.com&gt;
diff --git a/src/mlir_gen.cc b/src/mlir_gen.cc
@@ -156,7 +156,8 @@ ErrorOr<std::string> FormatTensorType(const Ort::ConstTypeInfo& type_info) {
 // Formats a tensor type as tensor<dimsxdtype> (standard MLIR format).
 // Uses signless integer types as required by MLIR tensor dialect.
 // Dynamic dims are always emitted as "?".
-ErrorOr<std::string> FormatMlirTensorType(const Ort::ConstTypeInfo& type_info) {
+ErrorOr<std::string> FormatMlirTensorType(const Ort::ConstTypeInfo& type_info,
+                                          bool signless = true) {
   if (type_info.GetONNXType() != ONNX_TYPE_TENSOR) {
     return errorWithCode(ErrorCode::kNotImplemented,
                          "Non-tensor type {} not supported",
@@ -178,7 +179,7 @@ ErrorOr<std::string> FormatMlirTensorType(const Ort::ConstTypeInfo& type_info) {
     ss << "x";
   }
   IREE_EP_ASSIGN_OR_RETURN(std::string elem_type,
-                           GetElementType(dtype, /*signless=*/true));
+                           GetElementType(dtype, signless));
   ss << elem_type << ">";
   return ss.str();
 }
@@ -414,7 +415,7 @@ class MlirGenerator {
     // Emit dim constraints (util.assume.int + flow.tensor.tie_shape).
     IREE_EP_RETURN_IF_ERROR(EmitDimConstraints());
 
-    // Emit initializers as flow.tensor.constant ops.
+    // Emit initializers (small as vtensor.literal, large as flow.parameter).
     for (const auto& init : initializers_) {
       IREE_EP_RETURN_IF_ERROR(EmitInitializer(init));
     }
@@ -429,15 +430,14 @@ class MlirGenerator {
     return EmitReturn();
   }
 
-  // Emits an initializer as a flow.tensor.constant with a
-  // torch_c.from_builtin_tensor cast. Small initializers use dense<> with
-  // inline hex-encoded data. Large initializers use #flow.parameter.named
-  // (data stored in IRPA archive).
+  // Emits an initializer. Small initializers (<=256 bytes) use
+  // torch.vtensor.literal so torch-mlir conversion patterns can directly
+  // match constant values. Large initializers use flow.tensor.constant
+  // with #flow.parameter.named (data stored in IRPA archive).
   //
   // Output format (small):
-  //   %__raw_name = flow.tensor.constant dense<"0x..."> : tensor<...>
-  //   %name = torch_c.from_builtin_tensor %__raw_name : tensor<...>
-  //       -> !torch.vtensor<[...],dtype>
+  //   %name = torch.vtensor.literal(dense<"0x..."> : tensor<...>)
+  //       : !torch.vtensor<[...],dtype>
   //
   // Output format (large):
   //   %__raw_name = flow.tensor.constant
@@ -456,18 +456,24 @@ class MlirGenerator {
                        OnnxElementTypeSize(tensor_info.GetElementType());
 
     if (byte_size <= kMaxInlineInitializerSize) {
-      // Small: inline with dense<> DenseElementsAttr.
+      // Small: emit as torch.vtensor.literal so torch-mlir conversion
+      // patterns can directly match constant values.
       Ort::ConstValue tensor_value{nullptr};
       IREE_EP_RETURN_IF_ORT_STATUS(init.GetInitializer(tensor_value).release());
       const auto* data =
           static_cast<const uint8_t*>(tensor_value.GetTensorRawData());
       std::string hex = HexEncode(data, tensor_value.GetTensorSizeInBytes());
 
+      // vtensor.literal requires signed integer types (si64, si32, etc.)
+      // in the inner dense attribute, not signless (i64, i32).
+      IREE_EP_ASSIGN_OR_RETURN(
+          std::string signed_tensor_type,
+          FormatMlirTensorType(init.TypeInfo(), /*signless=*/false));
+
       constexpr std::string_view schema =
-          R"(    %__raw_{0} = flow.tensor.constant dense<"{3}"> : {1}
-    %{0} = torch_c.from_builtin_tensor %__raw_{0} : {1} -> {2}
+          R"(    %{0} = torch.vtensor.literal(dense<"{2}"> : {1}) : {3}
 )";
-      out_ << std::format(schema, name, tensor_type, vtensor_type, hex);
+      out_ << std::format(schema, name, signed_tensor_type, hex, vtensor_type);
     } else {
       // Large: parameter reference. Data stored in IRPA archive.
       constexpr std::string_view schema =
diff --git a/test/test_initializers.py b/test/test_initializers.py
@@ -14,19 +14,21 @@
 # Fixed seed for reproducibility.
 np.random.seed(42)
 
-# Test data. Four initializers, each handled differently:
-#   D_small:     [1, 64] float32 = 256 bytes   -> inline dense<>
+# Test data. Five initializers, each handled differently:
+#   D_small:     [1, 64] float32 = 256 bytes   -> vtensor.literal (inline)
 #   D_large:     [64, 64] float32 = 16384 bytes -> IRPA parameter
 #   D_ext:       [64, 64] float32 = 16384 bytes -> external file (parameter, not in IRPA)
-#   D_ext_small: [1, 64] float32 = 256 bytes    -> external file (inlined as dense<>)
-# Graph: C = (((A + D_small) + D_large) + D_ext) + D_ext_small
+#   D_ext_small: [1, 64] float32 = 256 bytes    -> external file (vtensor.literal)
+#   axes:        [1] int64 = 8 bytes             -> vtensor.literal (int, tests si64 type)
+# Graph: C = ReduceMean((((A + D_small) + D_large) + D_ext) + D_ext_small, axes=[1])
 SHAPE = [64, 64]
 A_DATA = np.random.rand(*SHAPE).astype(np.float32)
 B_SMALL = np.random.rand(1, 64).astype(np.float32)
 B_LARGE = np.random.rand(*SHAPE).astype(np.float32)
 B_EXT = np.random.rand(*SHAPE).astype(np.float32)
 B_EXT_SMALL = np.random.rand(1, 64).astype(np.float32)
-EXPECTED = (((A_DATA + B_SMALL) + B_LARGE) + B_EXT) + B_EXT_SMALL
+SUM_ALL = (((A_DATA + B_SMALL) + B_LARGE) + B_EXT) + B_EXT_SMALL
+EXPECTED = np.mean(SUM_ALL, axis=1, keepdims=True)
 
 
 def _create_model():
@@ -71,7 +73,7 @@ def _create_model():
     ext_tensor.ClearField("raw_data")
     ext_tensor.data_location = TensorProto.EXTERNAL
 
-    # Small external initializer (should be inlined as dense<>).
+    # Small external initializer (should be inlined as vtensor.literal).
     ext_small_filename = "ext_small_weights.bin"
     ext_small_path = os.path.join(model_dir, ext_small_filename)
     ext_small_tensor = from_array(B_EXT_SMALL, name="D_ext_small")
@@ -84,27 +86,33 @@ def _create_model():
     ext_small_tensor.ClearField("raw_data")
     ext_small_tensor.data_location = TensorProto.EXTERNAL
 
+    # Int64 axes initializer for ReduceMean (8 bytes — tests si64 signedness).
+    axes_tensor = from_array(np.array([1], dtype=np.int64), name="axes")
+
     input_a = helper.make_tensor_value_info("A", TensorProto.FLOAT, SHAPE)
-    output = helper.make_tensor_value_info("C", TensorProto.FLOAT, SHAPE)
+    output = helper.make_tensor_value_info("C", TensorProto.FLOAT, [64, 1])
 
     add1 = helper.make_node("Add", inputs=["A", "D_small"], outputs=["T1"])
     add2 = helper.make_node("Add", inputs=["T1", "D_large"], outputs=["T2"])
     add3 = helper.make_node("Add", inputs=["T2", "D_ext"], outputs=["T3"])
-    add4 = helper.make_node("Add", inputs=["T3", "D_ext_small"], outputs=["C"])
+    add4 = helper.make_node("Add", inputs=["T3", "D_ext_small"], outputs=["T4"])
+    reduce_mean = helper.make_node(
+        "ReduceMean", inputs=["T4", "axes"], outputs=["C"], keepdims=1
+    )
 
     graph = helper.make_graph(
-        [add1, add2, add3, add4, const_small, const_large],
+        [add1, add2, add3, add4, reduce_mean, const_small, const_large],
         "test_graph",
         [input_a],
         [output],
-        initializer=[ext_tensor, ext_small_tensor],
+        initializer=[ext_tensor, ext_small_tensor, axes_tensor],
     )
     model = helper.make_model(
         graph,
         producer_name="iree_test",
-        opset_imports=[helper.make_opsetid("", 17)],
+        opset_imports=[helper.make_opsetid("", 18)],
     )
-    model.ir_version = 8
+    model.ir_version = 9
 
     model_path = os.path.join(model_dir, "model.onnx")
     onnx.save(model, model_path)
@@ -164,10 +172,17 @@ def test_with_save_intermediates(iree_device):
 
         mlir_content = open(list(new_mlir)[0]).read()
 
-        # D_small and D_ext_small should be inlined via dense<>.
+        # D_small, D_ext_small, and axes should be vtensor.literal.
+        assert (
+            "torch.vtensor.literal" in mlir_content
+        ), "MLIR should contain torch.vtensor.literal for small constants"
         assert (
             'dense<"0x' in mlir_content
         ), "MLIR should contain inline dense<> attributes"
+        # Int64 axes initializer should use signed type (si64).
+        assert (
+            "si64" in mlir_content
+        ), "int64 initializer should use signed type (si64) in vtensor.literal"
         assert (
             "dense_resource" not in mlir_content
         ), "MLIR should not contain dense_resource (replaced by dense<>)"