[MLIR][Affine] Add test pass for affine isContiguousAccess (llvm#82923)

bondhugula · mylai-mtk · commit ffa970b164cd · 2024-05-14T15:02:32.000+08:00
`isContiguousAccess` is an important affine analysis utility but is only
tested very indirectly via passes like vectorization and is not exposed.
Expose it and add a test pass for it that'll make it easier/feasible to
write test cases.  This is especially needed since the utility can be
significantly enhanced in power, and we need a test pass to exercise it
directly.

This pass can in the future be used to test the utility of invariant
accesses as well.
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
@@ -60,6 +60,26 @@ uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);
 DenseSet<Value, DenseMapInfo<Value>>
 getInvariantAccesses(Value iv, ArrayRef<Value> indices);
 
+/// Given:
+///   1. an induction variable `iv` of type AffineForOp;
+///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
+/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
+/// is defined as either invariant or varying only along a unique MemRef dim.
+/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
+/// convey the memRef access is invariant along `iv`).
+///
+/// Prerequisites:
+///   1. `memRefDim` ~= nullptr;
+///   2. `iv` of the proper type;
+///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
+///      identity layout map.
+///
+/// Currently only supports no layout map or identity layout map in the memref.
+/// Returns false if the memref has a non-identity layoutMap. This behavior is
+/// conservative.
+template <typename LoadOrStoreOp>
+bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim);
+
 using VectorizableLoopFun = std::function<bool(AffineForOp)>;
 
 /// Checks whether the loop is structurally vectorizable; i.e.:
diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
@@ -195,43 +195,25 @@ DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
   return res;
 }
 
-/// Given:
-///   1. an induction variable `iv` of type AffineForOp;
-///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
-/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
-/// is defined as either invariant or varying only along a unique MemRef dim.
-/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
-/// convey the memRef access is invariant along `iv`).
-///
-/// Prerequisites:
-///   1. `memRefDim` ~= nullptr;
-///   2. `iv` of the proper type;
-///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
-///      identity layout map.
-///
-/// Currently only supports no layoutMap or identity layoutMap in the MemRef.
-/// Returns false if the MemRef has a non-identity layoutMap or more than 1
-/// layoutMap. This is conservative.
-///
-// TODO: check strides.
+// TODO: check access stride.
 template <typename LoadOrStoreOp>
-static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
-                               int *memRefDim) {
-  static_assert(
-      llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value,
-      "Must be called on either LoadOp or StoreOp");
+bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
+                                      int *memRefDim) {
+  static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
+                                AffineWriteOpInterface>::value,
+                "Must be called on either an affine read or write op");
   assert(memRefDim && "memRefDim == nullptr");
   auto memRefType = memoryOp.getMemRefType();
 
   if (!memRefType.getLayout().isIdentity())
-    return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
+    return memoryOp.emitError("NYI: non-trivial layout map"), false;
 
   int uniqueVaryingIndexAlongIv = -1;
   auto accessMap = memoryOp.getAffineMap();
   SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
   unsigned numDims = accessMap.getNumDims();
   for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
-    // Gather map operands used result expr 'i' in 'exprOperands'.
+    // Gather map operands used in result expr 'i' in 'exprOperands'.
     SmallVector<Value, 4> exprOperands;
     auto resultExpr = accessMap.getResult(i);
     resultExpr.walk([&](AffineExpr expr) {
@@ -241,7 +223,7 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
         exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
     });
     // Check access invariance of each operand in 'exprOperands'.
-    for (auto exprOperand : exprOperands) {
+    for (Value exprOperand : exprOperands) {
       if (!isAccessIndexInvariant(iv, exprOperand)) {
         if (uniqueVaryingIndexAlongIv != -1) {
           // 2+ varying indices -> do not vectorize along iv.
@@ -259,6 +241,13 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
   return true;
 }
 
+template bool mlir::affine::isContiguousAccess(Value iv,
+                                               AffineReadOpInterface loadOp,
+                                               int *memRefDim);
+template bool mlir::affine::isContiguousAccess(Value iv,
+                                               AffineWriteOpInterface loadOp,
+                                               int *memRefDim);
+
 template <typename LoadOrStoreOp>
 static bool isVectorElement(LoadOrStoreOp memoryOp) {
   auto memRefType = memoryOp.getMemRefType();
@@ -344,10 +333,13 @@ bool mlir::affine::isVectorizableLoopBody(
     auto load = dyn_cast<AffineLoadOp>(op);
     auto store = dyn_cast<AffineStoreOp>(op);
     int thisOpMemRefDim = -1;
-    bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load,
-                                                  &thisOpMemRefDim)
-                             : isContiguousAccess(loop.getInductionVar(), store,
-                                                  &thisOpMemRefDim);
+    bool isContiguous =
+        load ? isContiguousAccess(loop.getInductionVar(),
+                                  cast<AffineReadOpInterface>(*load),
+                                  &thisOpMemRefDim)
+             : isContiguousAccess(loop.getInductionVar(),
+                                  cast<AffineWriteOpInterface>(*store),
+                                  &thisOpMemRefDim);
     if (thisOpMemRefDim != -1) {
       // If memory accesses vary across different dimensions then the loop is
       // not vectorizable.
diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: func @loop_1d
+func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
+   %c0 = arith.constant 0 : index
+   %M = memref.dim %A, %c0 : memref<?x?xf32>
+   affine.for %i = 0 to %M {
+     affine.for %j = 0 to %M {
+       affine.load %A[%c0, %i] : memref<?x?xf32>
+       // expected-remark@above {{contiguous along loop 0}}
+       affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
+       // expected-remark@above {{contiguous along loop 1}}
+       // Note/FIXME: access stride isn't being checked.
+       // expected-remark@-3 {{contiguous along loop 0}}
+
+       // These are all non-contiguous along both loops. Nothing is emitted.
+       affine.load %A[%i, %c0] : memref<?x?xf32>
+       // Note/FIXME: access stride isn't being checked.
+       affine.load %A[%i, 8 * %j] : memref<?x?xf32>
+       // expected-remark@above {{contiguous along loop 1}}
+       affine.load %A[%j, 4 * %i] : memref<?x?xf32>
+       // expected-remark@above {{contiguous along loop 0}}
+     }
+   }
+   return
+}
+
+// -----
+
+#map = affine_map<(d0) -> (d0 * 16)>
+#map1 = affine_map<(d0) -> (d0 * 16 + 16)>
+#map2 = affine_map<(d0) -> (d0)>
+#map3 = affine_map<(d0) -> (d0 + 1)>
+
+func.func @tiled(%arg0: memref<*xf32>) {
+  %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x224x224x64xf32>
+  %cast = memref.cast %arg0 : memref<*xf32> to memref<64xf32>
+  affine.for %arg1 = 0 to 4 {
+    affine.for %arg2 = 0 to 224 {
+      affine.for %arg3 = 0 to 14 {
+        %alloc_0 = memref.alloc() : memref<1x16x1x16xf32>
+        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
+          affine.for %arg5 = #map(%arg3) to #map1(%arg3) {
+            %0 = affine.load %cast[%arg4] : memref<64xf32>
+            // expected-remark@above {{contiguous along loop 3}}
+            affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
+            // expected-remark@above {{contiguous along loop 4}}
+            // expected-remark@above {{contiguous along loop 2}}
+          }
+        }
+        affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
+          affine.for %arg5 = #map2(%arg2) to #map3(%arg2) {
+            affine.for %arg6 = #map(%arg3) to #map1(%arg3) {
+              %0 = affine.load %alloc_0[0, %arg1 * -16 + %arg4, -%arg2 + %arg5, %arg3 * -16 + %arg6] : memref<1x16x1x16xf32>
+              // expected-remark@above {{contiguous along loop 5}}
+              // expected-remark@above {{contiguous along loop 2}}
+              affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
+              // expected-remark@above {{contiguous along loop 3}}
+            }
+          }
+        }
+        memref.dealloc %alloc_0 : memref<1x16x1x16xf32>
+      }
+    }
+  }
+  return
+}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,6 +3,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   TestAffineDataCopy.cpp
   TestAffineLoopUnswitching.cpp
   TestAffineLoopParametricTiling.cpp
+  TestAccessAnalysis.cpp
   TestDecomposeAffineOps.cpp
   TestReifyValueBounds.cpp
   TestLoopFusion.cpp
@@ -21,6 +22,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses
 
   LINK_LIBS PUBLIC
   MLIRArithTransforms
+  MLIRAffineAnalysis
   MLIRAffineTransforms
   MLIRAffineUtils
   MLIRIR
diff --git a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
@@ -0,0 +1,83 @@
+//===- TestAccessAnalysis.cpp - Test affine access analysis utility -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to test affine access analysis utilities.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+
+#define PASS_NAME "test-affine-access-analysis"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+namespace {
+
+struct TestAccessAnalysis
+    : public PassWrapper<TestAccessAnalysis, OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAccessAnalysis)
+
+  StringRef getArgument() const final { return PASS_NAME; }
+  StringRef getDescription() const final {
+    return "Tests affine memory access analysis utility";
+  }
+
+  void runOnOperation() override;
+};
+
+} // namespace
+
+/// Gathers all affine load/store ops in loop nest rooted at 'forOp' into
+/// 'loadAndStoreOps'.
+static void
+gatherLoadsAndStores(AffineForOp forOp,
+                     SmallVectorImpl<Operation *> &loadAndStoreOps) {
+  forOp.walk([&](Operation *op) {
+    if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
+      loadAndStoreOps.push_back(op);
+  });
+}
+
+void TestAccessAnalysis::runOnOperation() {
+  SmallVector<Operation *> loadStores;
+  SmallVector<AffineForOp> enclosingOps;
+  // Go over all top-level affine.for ops and test each contained affine
+  // access's contiguity along every surrounding loop IV.
+  for (auto forOp : getOperation().getOps<AffineForOp>()) {
+    loadStores.clear();
+    gatherLoadsAndStores(forOp, loadStores);
+    for (Operation *memOp : loadStores) {
+      enclosingOps.clear();
+      getAffineForIVs(*memOp, &enclosingOps);
+      for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) {
+        int memRefDim;
+        bool isContiguous;
+        if (auto read = dyn_cast<AffineReadOpInterface>(memOp)) {
+          isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
+                                            read, &memRefDim);
+        } else {
+          isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
+                                            cast<AffineWriteOpInterface>(memOp),
+                                            &memRefDim);
+        }
+        if (isContiguous && memRefDim == 0)
+          memOp->emitRemark("contiguous along loop ") << d << '\n';
+      }
+    }
+  }
+}
+
+namespace mlir {
+void registerTestAffineAccessAnalysisPass() {
+  PassRegistration<TestAccessAnalysis>();
+}
+} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -43,6 +43,7 @@ void registerSliceAnalysisTestPass();
 void registerSymbolTestPasses();
 void registerRegionTestPasses();
 void registerTestAffineDataCopyPass();
+void registerTestAffineAccessAnalysisPass();
 void registerTestAffineReifyValueBoundsPass();
 void registerTestAffineLoopUnswitchingPass();
 void registerTestAffineWalk();
@@ -169,6 +170,7 @@ void registerTestPasses() {
   registerSymbolTestPasses();
   registerRegionTestPasses();
   registerTestAffineDataCopyPass();
+  registerTestAffineAccessAnalysisPass();
   registerTestAffineLoopUnswitchingPass();
   registerTestAffineReifyValueBoundsPass();
   registerTestAffineWalk();