Added new type interafce to let UniformQuantizeType accept other than built in types. Updated parser and printer in Quant dialect

Roman-Pevnyi · Roman-Pevnyi · commit d6153a36e3ad · 2025-07-30T11:00:37.000+02:00
diff --git a/mlir/cmake/modules/AddMLIR.cmake b/mlir/cmake/modules/AddMLIR.cmake
@@ -196,6 +196,14 @@ function(add_mlir_interface interface)
   add_dependencies(mlir-generic-headers MLIR${interface}IncGen)
 endfunction()
 
+# Declare a dialect in the include directory
+function(add_mlir_type_interface interface)
+  set(LLVM_TARGET_DEFINITIONS ${interface}.td)
+  mlir_tablegen(${interface}.h.inc -gen-type-interface-decls)
+  mlir_tablegen(${interface}.cpp.inc -gen-type-interface-defs)
+  add_public_tablegen_target(MLIR${interface}IncGen)
+  add_dependencies(mlir-generic-headers MLIR${interface}IncGen)
+endfunction()
 
 # Generate Documentation
 function(add_mlir_doc doc_filename output_file output_directory command)
diff --git a/mlir/include/mlir/IR/BuiltinTypes.h b/mlir/include/mlir/IR/BuiltinTypes.h
@@ -91,6 +91,9 @@ class FloatType : public Type {
 // Tablegen Type Declarations
 //===----------------------------------------------------------------------===//
 
+// Include QuantizationInterface before BuiltinTypes to resolve dependencies
+#include "mlir/IR/QuantizationInterface.h"
+
 #define GET_TYPEDEF_CLASSES
 #include "mlir/IR/BuiltinTypes.h.inc"
 
diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -17,6 +17,7 @@
 include "mlir/IR/AttrTypeBase.td"
 include "mlir/IR/BuiltinDialect.td"
 include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/IR/QuantizationInterface.td"
 
 // TODO: Currently the types defined in this file are prefixed with `Builtin_`.
 // This is to differentiate the types here with the ones in OpBase.td. We should
@@ -78,8 +79,8 @@ def Builtin_Complex : Builtin_Type<"Complex", "complex"> {
 //===----------------------------------------------------------------------===//
 
 // Base class for Builtin dialect float types.
-class Builtin_FloatType<string name, string mnemonic>
-    : Builtin_Type<name, mnemonic, /*traits=*/[], "::mlir::FloatType"> {
+class Builtin_FloatType<string name, string mnemonic, list<Trait> traits = []>
+    : Builtin_Type<name, mnemonic, traits, "::mlir::FloatType"> {
   let extraClassDeclaration = [{
     static }] # name # [{Type get(MLIRContext *context);
   }];
@@ -88,7 +89,8 @@ class Builtin_FloatType<string name, string mnemonic>
 //===----------------------------------------------------------------------===//
 // Float8E5M2Type
 
-def Builtin_Float8E5M2 : Builtin_FloatType<"Float8E5M2", "f8E5M2"> {
+def Builtin_Float8E5M2 : Builtin_FloatType<"Float8E5M2", "f8E5M2", 
+                                   [QuantizationInterface]> {
   let summary = "8-bit floating point with 2 bit mantissa";
   let description = [{
     An 8-bit floating point type with 1 sign bit, 5 bits exponent and 2 bits
@@ -104,6 +106,23 @@ def Builtin_Float8E5M2 : Builtin_FloatType<"Float8E5M2", "f8E5M2"> {
 
     Described in: https://arxiv.org/abs/2209.05433
   }];
+
+  let extraClassDeclaration = [{
+    static Float8E5M2Type get(MLIRContext *context);
+
+    /// QuantizationInterface method implementations
+    bool isStorageSigned() const { return true; }
+    unsigned getStorageWidth() const { return 8; }
+    int64_t getDefaultMaximum([[maybe_unused]] bool isSigned, [[maybe_unused]] unsigned integralWidth) const {
+      return 448;
+    }
+    int64_t getDefaultMinimum(bool isSigned, unsigned integralWidth) const {
+      return -getDefaultMaximum(isSigned, integralWidth);
+    }
+    std::string printStorageType([[maybe_unused]] bool isSigned, [[maybe_unused]] unsigned storageWidth) const {
+      return "f8E5M2";
+    }
+  }];
 }
 
 //===----------------------------------------------------------------------===//
@@ -128,7 +147,8 @@ def Builtin_Float8E4M3 : Builtin_FloatType<"Float8E4M3", "f8E4M3"> {
 //===----------------------------------------------------------------------===//
 // Float8E4M3FNType
 
-def Builtin_Float8E4M3FN : Builtin_FloatType<"Float8E4M3FN", "f8E4M3FN"> {
+def Builtin_Float8E4M3FN : Builtin_FloatType<"Float8E4M3FN", "f8E4M3FN", 
+                                   [QuantizationInterface]> {
   let summary = "8-bit floating point with 3 bit mantissa";
   let description = [{
     An 8-bit floating point type with 1 sign bit, 4 bits exponent and 3 bits
@@ -145,6 +165,23 @@ def Builtin_Float8E4M3FN : Builtin_FloatType<"Float8E4M3FN", "f8E4M3FN"> {
 
     Described in: https://arxiv.org/abs/2209.05433
   }];
+
+  let extraClassDeclaration = [{
+    static Float8E4M3FNType get(MLIRContext *context);
+
+    /// QuantizationInterface method implementations
+    bool isStorageSigned() const { return true; }
+    unsigned getStorageWidth() const { return 8; }
+    int64_t getDefaultMaximum([[maybe_unused]] bool isSigned, [[maybe_unused]] unsigned integralWidth) const {
+      return 57344;
+    }
+    int64_t getDefaultMinimum(bool isSigned, unsigned integralWidth) const{
+      return -getDefaultMaximum(isSigned, integralWidth);
+    }
+    std::string printStorageType([[maybe_unused]] bool isSigned, [[maybe_unused]] unsigned storageWidth) const {
+      return "f8E4M3FN";
+    }
+  }];
 }
 
 //===----------------------------------------------------------------------===//
@@ -358,7 +395,8 @@ def Builtin_Index : Builtin_Type<"Index", "index"> {
 // IntegerType
 //===----------------------------------------------------------------------===//
 
-def Builtin_Integer : Builtin_Type<"Integer", "integer"> {
+def Builtin_Integer : Builtin_Type<"Integer", "integer", 
+                                   [QuantizationInterface]> {
   let summary = "Integer type with arbitrary precision up to a fixed limit";
   let description = [{
     Syntax:
@@ -415,6 +453,25 @@ def Builtin_Integer : Builtin_Type<"Integer", "integer"> {
     /// Integer representation maximal bitwidth.
     /// Note: This is aligned with the maximum width of llvm::IntegerType.
     static constexpr unsigned kMaxWidth = (1 << 24) - 1;
+
+    /// QuantizationInterface method implementations
+    bool isStorageSigned() const { return !isUnsigned(); }
+    unsigned getStorageWidth() const { return getWidth(); }
+    int64_t getDefaultMinimum(bool isSigned, unsigned integralWidth) const {
+      if (isSigned) {
+        return llvm::minIntN(integralWidth);
+      }
+      return 0;
+    }
+    int64_t getDefaultMaximum(bool isSigned, unsigned integralWidth) const {
+      if (isSigned) {
+        return llvm::maxIntN(integralWidth);
+      }
+      return llvm::maxUIntN(integralWidth);
+    }
+    std::string printStorageType(bool isSigned, unsigned storageWidth) const {
+      return (isSigned ? "i" : "u") + std::to_string(storageWidth);
+    }
   }];
 }
 
diff --git a/mlir/include/mlir/IR/CMakeLists.txt b/mlir/include/mlir/IR/CMakeLists.txt
@@ -2,6 +2,8 @@ add_mlir_interface(OpAsmInterface)
 add_mlir_interface(SymbolInterfaces)
 add_mlir_interface(RegionKindInterface)
 
+add_mlir_type_interface(QuantizationInterface)
+
 set(LLVM_TARGET_DEFINITIONS BuiltinAttributes.td)
 mlir_tablegen(BuiltinAttributes.h.inc -gen-attrdef-decls)
 mlir_tablegen(BuiltinAttributes.cpp.inc -gen-attrdef-defs)
diff --git a/mlir/include/mlir/IR/QuantizationInterface.h b/mlir/include/mlir/IR/QuantizationInterface.h
@@ -0,0 +1,23 @@
+//===- QuantizationInterface.h - Quantile Float Interfaces --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_IR_QuantizationInterface_H
+#define MLIR_IR_QuantizationInterface_H
+
+#include "mlir/IR/Types.h"
+
+// Forward declarations for the types we need in the implementation
+namespace mlir {
+class IntegerType;
+class FloatType;
+} // namespace mlir
+
+#include "mlir/IR/QuantizationInterface.h.inc"
+
+#endif // MLIR_IR_QuantizationInterface_H
diff --git a/mlir/include/mlir/IR/QuantizationInterface.td b/mlir/include/mlir/IR/QuantizationInterface.td
@@ -0,0 +1,45 @@
+#ifndef MLIR_IR_QUANTIZATIONINTERFACE
+#define MLIR_IR_QUANTIZATIONINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def QuantizationInterface : TypeInterface<"QuantizationInterface"> {
+  let description = [{
+    Interface for types that can be used as quantile storage types.
+    This interface provides methods to determine storage characteristics
+    like width and signedness for quantization purposes.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    InterfaceMethod<[{
+      Get the storage type width in bits.
+      Returns the number of bits used to store values of this type.
+    }],
+    "unsigned", "getStorageWidth", (ins)>,
+    
+    InterfaceMethod<[{
+      Check if the storage type is signed.
+      Returns true if the type represents signed values, false for unsigned.
+    }],
+    "bool", "isStorageSigned", (ins)>,
+
+    InterfaceMethod<[{
+      Get the default minimum value for the storage type.
+    }],
+    "int64_t", "getDefaultMinimum", (ins "bool":$isSigned, "unsigned":$integralWidth)>,
+
+    InterfaceMethod<[{
+      Get the default maximum value for the storage type.
+    }],
+    "int64_t", "getDefaultMaximum", (ins "bool":$isSigned, "unsigned":$integralWidth)>,
+
+    InterfaceMethod<[{
+      Get the name of the storage type.
+    }],
+    "std::string", "printStorageType", (ins "bool":$isSigned, "unsigned":$storageWidth)>
+  ];
+
+}
+
+#endif // MLIR_IR_QUANTIZATIONINTERFACE
diff --git a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp
@@ -12,9 +12,11 @@
 
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/QuantizationInterface.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/MathExtras.h"
+#include <iostream>
 
 using namespace mlir;
 using namespace mlir::quant;
@@ -32,6 +34,7 @@ LogicalResult
 QuantizedType::verify(function_ref<InFlightDiagnostic()> emitError,
                       unsigned flags, Type storageType, Type expressedType,
                       int64_t storageTypeMin, int64_t storageTypeMax) {
+  std::cout << "verify QuantizedType" << std::endl;
 
   bool isSigned =
       (flags & QuantizationFlags::Signed) == QuantizationFlags::Signed;
@@ -45,17 +48,16 @@ QuantizedType::verify(function_ref<InFlightDiagnostic()> emitError,
       return emitError() << "illegal storage type size: " << integralWidth;
   }
 
+  std::cout << "Before quantile cast" << std::endl;
   int64_t defaultMin, defaultMax;
-  if (storageType.isa<IntegerType>()) {
-    const auto width = llvm::dyn_cast<IntegerType>(storageType).getWidth();
-    defaultMin = QuantizedType::getDefaultMinimumForInteger(isSigned, width);
-    defaultMax = QuantizedType::getDefaultMaximumForInteger(isSigned, width);
-  } else if (storageType.isa<Float8E5M2Type>()) {
-    defaultMin = QuantizedType::getDefaultMinimumForF8E5M2();
-    defaultMax = QuantizedType::getDefaultMaximumForF8E5M2();
-  } else if (storageType.isa<Float8E4M3FNType>()) {
-    defaultMin = QuantizedType::getDefaultMinimumForF8E4M3FN();
-    defaultMax = QuantizedType::getDefaultMaximumForF8E4M3FN();
+  if (auto quantizationInterface =
+          llvm::dyn_cast<QuantizationInterface>(storageType)) {
+    // const auto width = llvm::dyn_cast<IntegerType>(storageType).getWidth();
+    const auto width = quantizationInterface.getStorageWidth();
+    defaultMin = quantizationInterface.getDefaultMinimum(isSigned, width);
+    defaultMax = quantizationInterface.getDefaultMaximum(isSigned, width);
+    std::cout << "defaultMin: " << defaultMin << ", defaultMax: " << defaultMax
+              << std::endl;
   } else {
     return emitError() << "illegal storage type, supported types are: integral "
                           "types, Float8E4M3FNType and Float8E5M2Type ";
@@ -67,6 +69,7 @@ QuantizedType::verify(function_ref<InFlightDiagnostic()> emitError,
     return emitError() << "illegal storage min and storage max: ("
                        << storageTypeMin << ":" << storageTypeMax << ")";
   }
+  std::cout << "verify QuantizedType END" << std::endl;
   return success();
 }
 
@@ -75,17 +78,42 @@ Type QuantizedType::getStorageType() const {
 }
 
 int64_t QuantizedType::getStorageTypeMin() const {
+  Type storageType = static_cast<ImplType *>(impl)->storageType;
+
+  if (auto quantizationInterface =
+          llvm::dyn_cast<QuantizationInterface>(storageType)) {
+    unsigned storageWidth = quantizationInterface.getStorageWidth();
+    bool isSigned = quantizationInterface.isStorageSigned();
+    return quantizationInterface.getDefaultMinimum(isSigned, storageWidth);
+  }
+
   return static_cast<ImplType *>(impl)->storageTypeMin;
 }
 
 int64_t QuantizedType::getStorageTypeMax() const {
+  Type storageType = static_cast<ImplType *>(impl)->storageType;
+
+  if (auto quantizationInterface =
+          llvm::dyn_cast<QuantizationInterface>(storageType)) {
+    unsigned storageWidth = quantizationInterface.getStorageWidth();
+    bool isSigned = quantizationInterface.isStorageSigned();
+    return quantizationInterface.getDefaultMaximum(isSigned, storageWidth);
+  }
+
   return static_cast<ImplType *>(impl)->storageTypeMax;
 }
 
 unsigned QuantizedType::getStorageTypeIntegralWidth() const {
   // NOTE: If ever supporting non-integral storage types, some other scheme
   // for determining the width will be needed.
-  return static_cast<ImplType *>(impl)->storageType.getIntOrFloatBitWidth();
+  Type storageType = static_cast<ImplType *>(impl)->storageType;
+
+  if (auto quantizationInterface =
+          llvm::dyn_cast<QuantizationInterface>(storageType)) {
+    return quantizationInterface.getStorageWidth();
+  }
+
+  return storageType.getIntOrFloatBitWidth();
 }
 
 Type QuantizedType::getExpressedType() const {
@@ -265,6 +293,7 @@ UniformQuantizedType UniformQuantizedType::get(unsigned flags, Type storageType,
                                                int64_t zeroPoint,
                                                int64_t storageTypeMin,
                                                int64_t storageTypeMax) {
+  std::cout << "Creating UniformQuantizedType" << std::endl;
   return Base::get(storageType.getContext(), flags, storageType, expressedType,
                    scale, zeroPoint, storageTypeMin, storageTypeMax);
 }
@@ -273,6 +302,7 @@ UniformQuantizedType UniformQuantizedType::getChecked(
     function_ref<InFlightDiagnostic()> emitError, unsigned flags,
     Type storageType, Type expressedType, double scale, int64_t zeroPoint,
     int64_t storageTypeMin, int64_t storageTypeMax) {
+  std::cout << "getChecked UniformQuantizedType" << std::endl;
   return Base::getChecked(emitError, storageType.getContext(), flags,
                           storageType, expressedType, scale, zeroPoint,
                           storageTypeMin, storageTypeMax);
@@ -282,6 +312,8 @@ LogicalResult UniformQuantizedType::verify(
     function_ref<InFlightDiagnostic()> emitError, unsigned flags,
     Type storageType, Type expressedType, double scale, int64_t zeroPoint,
     int64_t storageTypeMin, int64_t storageTypeMax) {
+  std::cout << "verifying UniformQuantizedType" << std::endl;
+
   if (failed(QuantizedType::verify(emitError, flags, storageType, expressedType,
                                    storageTypeMin, storageTypeMax))) {
     return failure();
@@ -301,6 +333,7 @@ LogicalResult UniformQuantizedType::verify(
   // Verify scale.
   if (std::isinf(scale) || std::isnan(scale))
     return emitError() << "illegal scale: " << scale;
+  std::cout << "verifying UniformQuantizedType END" << std::endl;
 
   return success();
 }
diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt
diff --git a/mlir/lib/IR/QuantizationInterface.cpp b/mlir/lib/IR/QuantizationInterface.cpp