Xilinx
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 11 additions & 5 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎externals/llvm-project‎ b/‎externals/llvm-project‎
diff --git a/‎externals/stablehlo‎ b/‎externals/stablehlo‎
diff --git a/‎include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h‎
Lines changed: 0 additions & 1 deletion b/‎include/torch-mlir/Conversion/TorchToTosa/TosaLegalizeUtils.h‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎lib/Conversion/TorchToLinalg/Uncategorized.cpp‎
Lines changed: 3 additions & 3 deletions b/‎lib/Conversion/TorchToLinalg/Uncategorized.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lib/Conversion/TorchToTosa/TorchToTosa.cpp‎
Lines changed: 54 additions & 30 deletions b/‎lib/Conversion/TorchToTosa/TorchToTosa.cpp‎
Lines changed: 54 additions & 30 deletions
diff --git a/‎lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp‎
Lines changed: 4 additions & 2 deletions b/‎lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎lib/Dialect/TMTensor/Transforms/Bufferize.cpp‎
Lines changed: 49 additions & 1 deletion b/‎lib/Dialect/TMTensor/Transforms/Bufferize.cpp‎
Lines changed: 49 additions & 1 deletion
diff --git a/‎lib/Dialect/TMTensor/Transforms/ConvertToLoops.cpp‎
Lines changed: 1 addition & 2 deletions b/‎lib/Dialect/TMTensor/Transforms/ConvertToLoops.cpp‎
Lines changed: 1 addition & 2 deletions
@@ -45,13 +45,11 @@ jobs:
           restore-keys: |
             build-test-cpp-asserts-manylinux-${{ matrix.torch-version }}-v2-
 
-      - name: "Setting up Python"
+      - name: "Setting up Python" # AMD: python 3.10 and not 3.11
         run: |
           sudo apt update
-          sudo apt install software-properties-common -y
-          sudo add-apt-repository ppa:deadsnakes/ppa -y
-          sudo apt install python3.11 python3-pip -y
-          sudo apt-get install python3.11-dev python3.11-venv build-essential -y
+          sudo apt install python3.10 python3-pip -y
+          sudo apt-get install python3.10-dev python3.10-venv build-essential -y
 
       - name: Install python deps (torch-${{ matrix.torch-version }})
         run: |
@@ -77,10 +75,18 @@ jobs:
           key: build-test-cpp-asserts-manylinux-${{ matrix.torch-version }}-v2-${{ github.sha }}
 
       - name: Integration tests (torch-${{ matrix.torch-version }})
+        if: ${{ matrix.torch-version == 'nightly' }}
+        continue-on-error: true
+        run: |
+          bash build_tools/ci/test_posix.sh ${{ matrix.torch-version }}
+
+      - name: Integration tests (torch-${{ matrix.torch-version }})
+        if: ${{ matrix.torch-version != 'nightly' }}
         run: |
           bash build_tools/ci/test_posix.sh ${{ matrix.torch-version }}
 
       - name: Check generated sources (torch-nightly only)
         if: ${{ matrix.torch-version == 'nightly' }}
+        continue-on-error: true
         run: |
           bash build_tools/ci/check_generated_sources.sh
@@ -131,7 +131,6 @@ TypedValue<RankedTensorType> transposeBy(Location loc,
 // Get accumulator type for AvgPool2dOp.
 LogicalResult getAvgPool2dAccType(PatternRewriter &rewriter, Value input,
                                   TypeAttr &accType);
-
 } // namespace tosa
 } // namespace mlir
 
 
@@ -549,7 +549,7 @@ static Value createLinalgPayloadCalculationForElementwiseOp(
   }
   if (isa<AtenLogicalOrOp, AtenLogicalAndOp, AtenLogicalXorOp>(op)) {
     MLIRContext *context = op->getContext();
-    Type floatDtype = mlir::FloatType::getF64(context);
+    Type floatDtype = mlir::Float64Type::get(context);
     Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], floatDtype);
     Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], floatDtype);
     Value zero =
@@ -569,7 +569,7 @@ static Value createLinalgPayloadCalculationForElementwiseOp(
   }
   if (isa<AtenLogicalNotOp>(op)) {
     MLIRContext *context = op->getContext();
-    Type floatDtype = mlir::FloatType::getF64(context);
+    Type floatDtype = mlir::Float64Type::get(context);
     Value self = convertScalarToDtype(b, loc, payloadArgs[0], floatDtype);
     Value zero =
         b.create<arith::ConstantOp>(loc, b.getFloatAttr(floatDtype, 0));
@@ -1028,7 +1028,7 @@ static Value createLinalgPayloadCalculationForElementwiseOp(
     Type powType = dtype;
     if (payloadArgs[0].getType().isInteger() ||
         payloadArgs[1].getType().isInteger())
-      powType = mlir::FloatType::getF64(op->getContext());
+      powType = mlir::Float64Type::get(op->getContext());
     Value lhs = convertScalarToDtype(b, loc, payloadArgs[0], powType);
     Value rhs = convertScalarToDtype(b, loc, payloadArgs[1], powType);
     auto powOp = b.create<math::PowFOp>(loc, lhs, rhs);
 
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
+#include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "torch-mlir/Conversion/TorchToTosa/TosaLegalizeCommon.h"
@@ -2280,9 +2281,9 @@ Value createConvInGroups(PatternRewriter &rewriter, Operation *op,
                          Type &resultType,
                          const llvm::ArrayRef<int64_t> weightShape,
                          Value &input, Value &weights, Value &bias,
-                         const int64_t groups, DenseI64ArrayAttr &pads,
-                         DenseI64ArrayAttr &strides,
-                         DenseI64ArrayAttr &dilations) {
+                         const int64_t groups, DenseI64ArrayAttr pads,
+                         DenseI64ArrayAttr strides, DenseI64ArrayAttr dilations,
+                         TypeAttr accType) {
   // Set up constants outside of loop
   const int64_t sizeOfSliceInput = weightShape[1];
   const int64_t sizeOfSliceKernel = weightShape[0] / groups;
@@ -2312,7 +2313,7 @@ Value createConvInGroups(PatternRewriter &rewriter, Operation *op,
     // Create conv
     Value tempConv2D = tosa::CreateOpAndInfer<mlir::tosa::Conv2DOp>(
         rewriter, input.getLoc(), outputType, sliceInput, sliceWeight,
-        sliceBias, pads, strides, dilations);
+        sliceBias, pads, strides, dilations, accType);
     // Add value to vector
     sliceValues.push_back(tempConv2D);
   }
@@ -2420,6 +2421,12 @@ LogicalResult ConvertAtenOp<AtenConvolutionOp>::matchAndRewrite(
     return rewriter.notifyMatchFailure(op,
                                        "non-const dilation list unsupported");
 
+  TypeAttr accType;
+  if (failed(tosa::getConvOpsAccType(rewriter, inputTy, weightTy, outputTy,
+                                     accType)))
+    return rewriter.notifyMatchFailure(
+        op, "failed to get accumulator type for convolution ops");
+
   // TOSA works in NHWC and takes OHWI (conv) / HWIM (depthwise conv) weights.
   // Perform the necessary transformations.
   std::optional<Value> nchwToNhwcTransposeConst =
@@ -2523,22 +2530,19 @@ LogicalResult ConvertAtenOp<AtenConvolutionOp>::matchAndRewrite(
   // quantized input is i32, which gets rescaled down to quantized output range.
   SmallVector<int64_t> outputShape = {transposedInputShape[0], outputHDim,
                                       outputWDim, outputCDim};
-
-  DenseI64ArrayAttr paddingAttr = rewriter.getDenseI64ArrayAttr(padding);
-  DenseI64ArrayAttr strideAttr = rewriter.getDenseI64ArrayAttr(stride);
-  DenseI64ArrayAttr dilationAttr = rewriter.getDenseI64ArrayAttr(dilation);
-
   Value convOpResult;
   if (groups == 1) {
     // full convolution
     auto convOpTy =
         RankedTensorType::get(makeShapeLLVMCompatible(outputShape), biasElemTy);
     convOpResult =
         rewriter
-            .create<tosa::Conv2DOp>(op->getLoc(),
-                                    getTypeConverter()->convertType(convOpTy),
-                                    transposedInput, transformedWeight, bias,
-                                    paddingAttr, strideAttr, dilationAttr)
+            .create<tosa::Conv2DOp>(
+                op->getLoc(), getTypeConverter()->convertType(convOpTy),
+                transposedInput, transformedWeight, bias,
+                rewriter.getDenseI64ArrayAttr(padding),
+                rewriter.getDenseI64ArrayAttr(stride),
+                rewriter.getDenseI64ArrayAttr(dilation), accType)
             .getResult();
   } else if (weightShape[1] == 1) {
     // depthwise convolution
@@ -2548,14 +2552,18 @@ LogicalResult ConvertAtenOp<AtenConvolutionOp>::matchAndRewrite(
         rewriter
             .create<tosa::DepthwiseConv2DOp>(
                 op->getLoc(), getTypeConverter()->convertType(convOpTy),
-                transposedInput, transformedWeight, bias, paddingAttr,
-                strideAttr, dilationAttr)
+                transposedInput, transformedWeight, bias,
+                rewriter.getDenseI64ArrayAttr(padding),
+                rewriter.getDenseI64ArrayAttr(stride),
+                rewriter.getDenseI64ArrayAttr(dilation), accType)
             .getResult();
   } else {
     // general group convolution
     convOpResult = createConvInGroups(
         rewriter, op, outputTy, weightShape, transposedInput, transformedWeight,
-        bias, groups, paddingAttr, strideAttr, dilationAttr);
+        bias, groups, rewriter.getDenseI64ArrayAttr(padding),
+        rewriter.getDenseI64ArrayAttr(stride),
+        rewriter.getDenseI64ArrayAttr(dilation), accType);
   }
 
   std::optional<Value> nhwcToNchwTransposeConst =
@@ -4103,9 +4111,11 @@ LogicalResult ConvertAtenOp<AtenBroadcastToOp>::matchAndRewrite(
       }
     }
 
-    auto result = rewriter.create<tosa::TileOp>(
-        op->getLoc(), resultType, reshapedInput,
-        rewriter.getDenseI64ArrayAttr(tileOpShape));
+    auto tileOpMultiples =
+        tosa::getTosaConstShape(rewriter, op->getLoc(), tileOpShape);
+
+    auto result = rewriter.create<tosa::TileOp>(op->getLoc(), resultType,
+                                                reshapedInput, tileOpMultiples);
 
     rewriter.replaceOp(op, {result.getResult()});
   }
@@ -4298,9 +4308,11 @@ LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
       RankedTensorType::get(makeShapeLLVMCompatible(expandedIndicesShape),
                             rewriter.getIntegerType(32));
 
+  auto tileOpMultiples =
+      tosa::getTosaConstShape(rewriter, op->getLoc(), tileShape);
+
   auto expandedIndices = rewriter.create<tosa::TileOp>(
-      op->getLoc(), tileType, reshapedIndices.getResult(),
-      rewriter.getDenseI64ArrayAttr(tileShape));
+      op->getLoc(), tileType, reshapedIndices.getResult(), tileOpMultiples);
 
   // convert torch style index and dim into tf style indices
   // tensor<[1,4,2],si64> -> tensor<[1,4,2,3],si64>
@@ -4639,17 +4651,23 @@ LogicalResult ConvertAtenOp<AtenIndexTensorHackedTwinOp>::matchAndRewrite(
         if (needsTiling) {
           auto idxType =
               dyn_cast<RankedTensorType>(indicesTfConcatTensors[i].getType());
+
           // indicesTfConcatTensors has a trailing [1] dim for the final concat.
           auto maxRankMaxDimShapeTf(maxRankMaxDimShape);
           maxRankMaxDimShapeTf.push_back(1);
+
           auto tileOpShapeTf(tileOpShape);
           tileOpShapeTf.push_back(1);
+
           auto tileOutputTy = RankedTensorType::get(maxRankMaxDimShapeTf,
                                                     idxType.getElementType());
           auto reshapedIdxTensor = indicesTfConcatTensors[i];
+
+          auto tileOpMultiples =
+              tosa::getTosaConstShape(rewriter, op->getLoc(), tileOpShapeTf);
+
           indicesTfConcatTensors[i] = rewriter.create<tosa::TileOp>(
-              op->getLoc(), tileOutputTy, reshapedIdxTensor,
-              rewriter.getDenseI64ArrayAttr(tileOpShapeTf));
+              op->getLoc(), tileOutputTy, reshapedIdxTensor, tileOpMultiples);
         }
 
         // Every index tensor now has the same rank and shape
@@ -6220,12 +6238,14 @@ class ConvertAtenFillOp : public OpConversionPattern<AtenOpT> {
           op->getLoc(), fillValueMatchedInputRankType, fillValue,
           rewriter.getDenseI64ArrayAttr(fillValueMatchedInputRankShape));
 
+      auto tileOpMultiples =
+          tosa::getTosaConstShape(rewriter, op->getLoc(), outType.getShape());
+
       fillValueTargetTensor = rewriter.create<tosa::TileOp>(
           op->getLoc(),
           RankedTensorType::get(makeShapeTorchCompatible(outType.getShape()),
                                 fillValueElemTy),
-          fillValueMatchedInputRankTensor.getResult(),
-          makeShapeTorchCompatible(outType.getShape()));
+          fillValueMatchedInputRankTensor.getResult(), tileOpMultiples);
     } else {
       if (failed(torchScalarToTosaTensor(
               rewriter, op, op.getValue(), fillValueTargetTensor, outElemTy,
@@ -6376,7 +6396,7 @@ LogicalResult ConvertAtenOp<AtenConstantPadNdOp>::matchAndRewrite(
   }
 
   DenseElementsAttr paddingAttr = DenseIntElementsAttr::get(
-      RankedTensorType::get({rank, 2}, rewriter.getI64Type()),
+      RankedTensorType::get({2 * rank}, rewriter.getI64Type()),
       translatePadsList);
 
   Value padsList1 = rewriter.create<mlir::tosa::ConstOp>(
@@ -8033,9 +8053,11 @@ LogicalResult ConvertAtenOp<AtenOuterOp>::matchAndRewrite(
                             resultType.getElementType()),
       self, rewriter.getDenseI64ArrayAttr(resultShapeIndex1Replaced));
 
+  auto selfTileOpMultiples = tosa::getTosaConstShape(rewriter, op->getLoc(),
+                                                     resultShapeIndex0Replaced);
+
   auto selfTiled = rewriter.create<tosa::TileOp>(
-      op->getLoc(), resultType, selfReshaped.getResult(),
-      rewriter.getDenseI64ArrayAttr(resultShapeIndex0Replaced));
+      op->getLoc(), resultType, selfReshaped.getResult(), selfTileOpMultiples);
 
   // Reshape and tile vec2 to shape {resultShape[0], vec2Shape[0]}
   auto vec2Reshaped = rewriter.create<tosa::ReshapeOp>(
@@ -8044,9 +8066,11 @@ LogicalResult ConvertAtenOp<AtenOuterOp>::matchAndRewrite(
                             resultType.getElementType()),
       vec2, rewriter.getDenseI64ArrayAttr(resultShapeIndex0Replaced));
 
+  auto vec2TileOpMultiples = tosa::getTosaConstShape(rewriter, op->getLoc(),
+                                                     resultShapeIndex1Replaced);
+
   auto vec2Tiled = rewriter.create<tosa::TileOp>(
-      op->getLoc(), resultType, vec2Reshaped.getResult(),
-      rewriter.getDenseI64ArrayAttr(resultShapeIndex1Replaced));
+      op->getLoc(), resultType, vec2Reshaped.getResult(), vec2TileOpMultiples);
 
   auto result =
       tosa::createMulOpAndCast(rewriter, op, resultType, selfTiled.getResult(),
 
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "torch-mlir/Conversion/TorchToTosa/TosaLegalizeCommon.h"
+#include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "torch-mlir/Conversion/Utils/Utils.h"
 #include "torch-mlir/Dialect/Torch/IR/TorchOps.h"
 
@@ -566,11 +567,12 @@ std::optional<Value> convertScatterNdOp(PatternRewriter &rewriter,
 
     // [0] -> [0,0,0]
     SmallVector<int64_t, 1> tileShape({W}); // {3}
+    auto tileOpMultiples =
+        tosa::getTosaConstShape(rewriter, op->getLoc(), tileShape);
     auto tosaFillValuesTileOp = tosa::CreateOpAndInfer<tosa::TileOp>(
         rewriter, op->getLoc(),
         GetTypeFromTensorShape(tileShape, fillValuesType.getElementType()),
-        tosaFillValuesOneReshapeOp.getResult(),
-        rewriter.getDenseI64ArrayAttr(tileShape));
+        tosaFillValuesOneReshapeOp.getResult(), tileOpMultiples);
 
     // [0,0,0] -> [[0,0,0]]
     SmallVector<int64_t, 2> newTosaFillValuesShape({N, W}); // {1,3}
 
@@ -121,6 +121,14 @@ class BufferizeAnyTMTensorOp : public OpInterfaceConversionPattern<TMTensorOp> {
 };
 
 namespace {
+
+static Value materializeToTensor(OpBuilder &builder, TensorType type,
+                                 ValueRange inputs, Location loc) {
+  assert(inputs.size() == 1);
+  assert(isa<BaseMemRefType>(inputs[0].getType()));
+  return builder.create<bufferization::ToTensorOp>(loc, type, inputs[0]);
+}
+
 /// Converts TMTensor operations that work on tensor-type operands or results to
 /// work on buffers.
 struct TMTensorBufferizePass
@@ -133,7 +141,47 @@ struct TMTensorBufferizePass
   void runOnOperation() override {
     MLIRContext &context = getContext();
     ConversionTarget target(context);
-    bufferization::BufferizeTypeConverter typeConverter;
+    // Since the `BufferizeTypeConverter` has been removed here
+    // https://github.com/llvm/llvm-project/commit/2ff2e871f5e632ea493efaf4f2192f8b18a54ab1,
+    // hence we have inlined the converter here.
+    TypeConverter typeConverter;
+    typeConverter.addConversion([](Type type) { return type; });
+    // Convert RankedTensorType to MemRefType.
+    typeConverter.addConversion([](RankedTensorType type) -> Type {
+      return MemRefType::get(type.getShape(), type.getElementType());
+    });
+    // Convert UnrankedTensorType to UnrankedMemRefType.
+    typeConverter.addConversion([](UnrankedTensorType type) -> Type {
+      return UnrankedMemRefType::get(type.getElementType(), 0);
+    });
+    typeConverter.addArgumentMaterialization(materializeToTensor);
+    typeConverter.addSourceMaterialization(materializeToTensor);
+    typeConverter.addTargetMaterialization([](OpBuilder &builder,
+                                              BaseMemRefType type,
+                                              ValueRange inputs,
+                                              Location loc) -> Value {
+      assert(inputs.size() == 1 && "expected exactly one input");
+      if (auto inputType = dyn_cast<MemRefType>(inputs[0].getType())) {
+        // MemRef to MemRef cast.
+        assert(inputType != type && "expected different types");
+        // Ranked to unranked casts must be explicit.
+        auto rankedDestType = dyn_cast<MemRefType>(type);
+        if (!rankedDestType)
+          return nullptr;
+        bufferization::BufferizationOptions options;
+        options.bufferAlignment = 0;
+        FailureOr<Value> replacement = castOrReallocMemRefValue(
+            builder, inputs[0], rankedDestType, options);
+        if (failed(replacement))
+          return nullptr;
+        return *replacement;
+      }
+      if (isa<TensorType>(inputs[0].getType())) {
+        // Tensor to MemRef cast.
+        return builder.create<bufferization::ToMemrefOp>(loc, type, inputs[0]);
+      }
+      llvm_unreachable("only tensor/memref input types supported");
+    });
 
     // Mark all Standard operations legal.
     target.addLegalDialect<arith::ArithDialect, func::FuncDialect,
 
@@ -110,8 +110,7 @@ struct TMTensorToLoopsPass : public TMTensorToLoopsBase<TMTensorToLoopsPass> {
 
     RewritePatternSet patterns(context);
     patterns.insert<ScalarLoopOpInterfaceLowerToLoopsPattern>(context);
-    if (failed(applyPatternsAndFoldGreedily(getOperation(),
-                                            std::move(patterns)))) {
+    if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {
       return signalPassFailure();
     }
   }
Original file line number	Diff line number	Diff line change
`@@ -110,8 +110,7 @@ struct TMTensorToLoopsPass : public TMTensorToLoopsBase<TMTensorToLoopsPass> {`
`110`	`110`
`111`	`111`	`RewritePatternSet patterns(context);`
`112`	`112`	`patterns.insert<ScalarLoopOpInterfaceLowerToLoopsPattern>(context);`
`113`		`- if (failed(applyPatternsAndFoldGreedily(getOperation(),`
`114`		`- std::move(patterns)))) {`
	`113`	`+ if (failed(applyPatternsGreedily(getOperation(), std::move(patterns)))) {`
`115`	`114`	`return signalPassFailure();`
`116`	`115`	`}`
`117`	`116`	`}`