Xilinx · ttjost · May 27, 2026 · May 26, 2026
diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainAtoF.cpp
@@ -2249,6 +2249,17 @@ void mlir::torch::onnx_c::populateDefaultDomainAtoF(
         Value scale = operands[1];
         Value zeropoint = operands[2];
 
+        int64_t blockSize, outputDtype;
+        if (binder.s64IntegerAttr(blockSize, "block_size", 0) ||
+            binder.s64IntegerAttr(outputDtype, "output_dtype", 0))
+          return failure();
+        if (blockSize != 0)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: DequantizeLinear block_size != 0");
+        if (outputDtype != 0)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: DequantizeLinear output_dtype != 0");
+
         auto operandTy = cast<Torch::ValueTensorType>(operand.getType());
 
         auto operandETy = operandTy.getDtype();

diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainQtoZ.cpp
@@ -256,6 +256,26 @@ void mlir::torch::onnx_c::populateDefaultDomainQtoZ(
           return failure();
 
         auto loc = binder.getLoc();
+
+        int64_t blockSize, outputDtype, precision, saturate;
+        if (binder.s64IntegerAttr(blockSize, "block_size", 0) ||
+            binder.s64IntegerAttr(outputDtype, "output_dtype", 0) ||
+            binder.s64IntegerAttr(precision, "precision", 0) ||
+            binder.s64IntegerAttr(saturate, "saturate", 1))
+          return failure();
+        if (blockSize != 0)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: QuantizeLinear block_size != 0");
+        if (outputDtype != 0)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: QuantizeLinear output_dtype != 0");
+        if (precision != 0)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: QuantizeLinear precision != 0");
+        if (saturate != 1)
+          return rewriter.notifyMatchFailure(
+              binder.op, "unimplemented: QuantizeLinear saturate != 1");
+
         Value operand = operands[0];
         Value scale = operands[1];
         Value zeropoint = operands[2];

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,5 +1,5 @@
 pillow
 dill
 multiprocess
-onnx==1.16.1
+onnx==1.19.1
 mpmath==1.3.0
diff --git a/test/Conversion/TorchOnnxToTorch/qdq_unsupported_rejection.mlir b/test/Conversion/TorchOnnxToTorch/qdq_unsupported_rejection.mlir
@@ -0,0 +1,96 @@
+// RUN: torch-mlir-opt <%s --split-input-file -verify-diagnostics -convert-torch-onnx-to-torch
+
+
+func.func @test_quantizelinear_reject_block_size(
+    %arg0: !torch.vtensor<[6],f32>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],si8>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.QuantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.block_size = 32 : si64}
+      : (!torch.vtensor<[6],f32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],si8>
+  return %0 : !torch.vtensor<[6],si8>
+}
+
+// -----
+
+func.func @test_quantizelinear_reject_output_dtype(
+    %arg0: !torch.vtensor<[6],f32>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],si8>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.QuantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.output_dtype = 3 : si64}
+      : (!torch.vtensor<[6],f32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],si8>
+  return %0 : !torch.vtensor<[6],si8>
+}
+
+// -----
+
+func.func @test_quantizelinear_reject_precision(
+    %arg0: !torch.vtensor<[6],f32>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],si8>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.QuantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.precision = 1 : si64}
+      : (!torch.vtensor<[6],f32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],si8>
+  return %0 : !torch.vtensor<[6],si8>
+}
+
+// -----
+
+func.func @test_quantizelinear_reject_saturate(
+    %arg0: !torch.vtensor<[6],f32>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],si8>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.QuantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.saturate = 0 : si64}
+      : (!torch.vtensor<[6],f32>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],si8>
+  return %0 : !torch.vtensor<[6],si8>
+}
+
+// -----
+
+func.func @test_dequantizelinear_reject_block_size(
+    %arg0: !torch.vtensor<[6],si8>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.block_size = 32 : si64}
+      : (!torch.vtensor<[6],si8>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],f32>
+  return %0 : !torch.vtensor<[6],f32>
+}
+
+// -----
+
+func.func @test_dequantizelinear_reject_output_dtype(
+    %arg0: !torch.vtensor<[6],si8>,
+    %arg1: !torch.vtensor<[],f32>,
+    %arg2: !torch.vtensor<[],si8>) -> !torch.vtensor<[6],f32>
+    attributes {torch.onnx_meta.ir_version = 10 : si64,
+                torch.onnx_meta.opset_version = 23 : si64} {
+  // expected-error @below {{failed to legalize operation 'torch.operator'}}
+  %0 = torch.operator "onnx.DequantizeLinear"(%arg0, %arg1, %arg2)
+      {torch.onnx.output_dtype = 1 : si64}
+      : (!torch.vtensor<[6],si8>, !torch.vtensor<[],f32>, !torch.vtensor<[],si8>)
+      -> !torch.vtensor<[6],f32>
+  return %0 : !torch.vtensor<[6],f32>
+}
diff --git a/test/python/onnx_importer/function_expansion/ReduceSumSquare_keepdims=0.runlit b/test/python/onnx_importer/function_expansion/ReduceSumSquare_keepdims=0.runlit
@@ -19,4 +19,4 @@
 #
 # CHECK-LABEL: func.func private @"{{.*}}ReduceSumSquare{{.*}}"
 # CHECK:         %0 = torch.operator "onnx.Mul"
-# CHECK:         %1 = torch.operator "onnx.ReduceSum"{{.*}}{torch.onnx.keepdims = 0 : si64}
+# CHECK:         %1 = torch.operator "onnx.ReduceSum"{{.*}}{torch.onnx.keepdims = 0 : si64, torch.onnx.noop_with_empty_axes = 0 : si64}
diff --git a/test/python/onnx_importer/function_expansion/ReduceSumSquare_no_attrs.runlit b/test/python/onnx_importer/function_expansion/ReduceSumSquare_no_attrs.runlit
@@ -20,4 +20,4 @@
 #
 # CHECK-LABEL: func.func private @"{{.*}}ReduceSumSquare{{.*}}"
 # CHECK:         %0 = torch.operator "onnx.Mul"
-# CHECK:         %1 = torch.operator "onnx.ReduceSum"{{.*}}{torch.onnx.keepdims = 1 : si64}
+# CHECK:         %1 = torch.operator "onnx.ReduceSum"{{.*}}{torch.onnx.keepdims = 1 : si64, torch.onnx.noop_with_empty_axes = 0 : si64}
diff --git a/test/python/onnx_importer/import_smoke_test.py b/test/python/onnx_importer/import_smoke_test.py
@@ -46,6 +46,26 @@
 TEST_CAST_XFAILS = [
     "node_test_ai_onnx_ml_label_encoder_tensor_mapping_model",
     "node_test_if_opt_model",
+    # Opset 23/24 new element types (float4e2m1=23, float8e8m0=24) not yet
+    # mapped in ELEM_TYPE_TO_IR_TYPE_CB; marked xfail for now.
+    "node_test_cast_FLOAT16_to_FLOAT4E2M1_model",
+    "node_test_cast_FLOAT4E2M1_to_FLOAT_model",
+    "node_test_cast_FLOAT4E2M1_to_FLOAT16_model",
+    "node_test_cast_FLOAT_to_FLOAT4E2M1_model",
+    "node_test_cast_e8m0_FLOAT16_to_FLOAT8E8M0_model",
+    "node_test_cast_e8m0_FLOAT8E8M0_to_FLOAT_model",
+    "node_test_cast_e8m0_FLOAT8E8M0_to_FLOAT16_model",
+    "node_test_cast_e8m0_FLOAT_to_FLOAT8E8M0_model",
+    "node_test_castlike_FLOAT16_to_FLOAT4E2M1_expanded_model",
+    "node_test_castlike_FLOAT16_to_FLOAT4E2M1_model",
+    "node_test_castlike_FLOAT4E2M1_to_FLOAT_model",
+    "node_test_castlike_FLOAT4E2M1_to_FLOAT16_model",
+    "node_test_castlike_FLOAT4E2M1_to_FLOAT16_expanded_model",
+    "node_test_castlike_FLOAT4E2M1_to_FLOAT_expanded_model",
+    "node_test_castlike_FLOAT_to_FLOAT4E2M1_expanded_model",
+    "node_test_castlike_FLOAT_to_FLOAT4E2M1_model",
+    "node_test_dequantizelinear_float4e2m1_model",
+    "node_test_quantizelinear_float4e2m1_model",
 ]