Fix issues in lift-array-alloc (#2570)

annagrin · web-flow · commit a304c4fff3fc · 2025-02-05T19:39:51.000Z
* Fix issues in lif-array-alloc * DCO Remediation Commit for Anna Gringauze <agringauze@nvidia.com> I, Anna Gringauze <agringauze@nvidia.com>, hereby add my Signed-off-by to this commit: c1592b8 Signed-off-by: Anna Gringauze <agringauze@nvidia.com> * Addressed CR comments Signed-off-by: Anna Gringauze <agringauze@nvidia.com> * Address CR comments Signed-off-by: Anna Gringauze <agringauze@nvidia.com> * Add new pass Signed-off-by: Anna Gringauze <agringauze@nvidia.com> * Fix null deref Signed-off-by: Anna Gringauze <agringauze@nvidia.com> --------- Signed-off-by: Anna Gringauze <agringauze@nvidia.com>
diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp
@@ -40,8 +40,10 @@ class AllocaPattern : public OpRewritePattern<cudaq::cc::AllocaOp> {
       return failure();
 
     LLVM_DEBUG(llvm::dbgs() << "Candidate was found\n");
-    auto eleTy = alloc.getElementType();
-    auto arrTy = cast<cudaq::cc::ArrayType>(eleTy);
+    auto allocTy = alloc.getElementType();
+    auto arrTy = cast<cudaq::cc::ArrayType>(allocTy);
+    auto eleTy = arrTy.getElementType();
+
     SmallVector<Attribute> values;
 
     // Every element of `stores` must be a cc::StoreOp with a ConstantOp as the
@@ -89,6 +91,8 @@ class AllocaPattern : public OpRewritePattern<cudaq::cc::AllocaOp> {
         cannotEraseAlloc = isLive = true;
       } else {
         for (auto *useuser : user->getUsers()) {
+          if (!useuser)
+            continue;
           if (auto load = dyn_cast<cudaq::cc::LoadOp>(useuser)) {
             rewriter.setInsertionPointAfter(useuser);
             LLVM_DEBUG(llvm::dbgs() << "replaced load\n");
@@ -160,14 +164,13 @@ class AllocaPattern : public OpRewritePattern<cudaq::cc::AllocaOp> {
         if (!u)
           return nullptr;
         if (auto store = dyn_cast<cudaq::cc::StoreOp>(u)) {
-          if (op.getOperation() == store.getPtrvalue().getDefiningOp() &&
-              isa_and_present<arith::ConstantOp, complex::ConstantOp>(
-                  store.getValue().getDefiningOp())) {
+          if (op.getOperation() == store.getPtrvalue().getDefiningOp()) {
             if (theStore) {
               LLVM_DEBUG(llvm::dbgs()
                          << "more than 1 store to element of array\n");
               return nullptr;
             }
+            LLVM_DEBUG(llvm::dbgs() << "found store: " << store << "\n");
             theStore = u;
           }
           continue;
@@ -182,7 +185,13 @@ class AllocaPattern : public OpRewritePattern<cudaq::cc::AllocaOp> {
         }
         return nullptr;
       }
-      return theStore;
+      return theStore &&
+                     isa_and_present<arith::ConstantOp, complex::ConstantOp>(
+                         dyn_cast<cudaq::cc::StoreOp>(theStore)
+                             .getValue()
+                             .getDefiningOp())
+                 ? theStore
+                 : nullptr;
     };
 
     auto unsizedArrTy = cudaq::cc::ArrayType::get(arrEleTy);
diff --git a/targettests/execution/state_preparation_vector_sizes.cpp b/targettests/execution/state_preparation_vector_sizes.cpp
@@ -21,9 +21,6 @@
 #include <cudaq.h>
 #include <iostream>
 
-#include <cudaq.h>
-#include <iostream>
-
 __qpu__ void test(std::vector<cudaq::complex> inState) {
   cudaq::qvector q1 = inState;
 }
diff --git a/test/Quake/lift_array.qke b/test/Quake/lift_array.qke
@@ -125,3 +125,73 @@ func.func @test2() -> !quake.veq<2> {
 // GLOBAL-DAG:     cc.global constant private @__nvqpp__mlirgen__function_test_complex_constant_array._Z27test_complex_constant_arrayv.rodata_{{[0-9]+}} (dense<[(0.707106769,0.000000e+00), (0.707106769,0.000000e+00), (0.000000e+00,0.000000e+00), (0.000000e+00,0.000000e+00)]> : tensor<4xcomplex<f32>>) : !cc.array<complex<f32> x 4>
 // GLOBAL-DAG:     cc.global constant private @__nvqpp__mlirgen__function_custom_h_generator_1._Z20custom_h_generator_1v.rodata_{{[0-9]+}} (dense<[(0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (0.70710678118654757,0.000000e+00), (-0.70710678118654757,0.000000e+00)]> : tensor<4xcomplex<f64>>) : !cc.array<complex<f64> x 4>
 // GLOBAL-DAG:     cc.global constant private @test2.rodata_{{[0-9]+}} (dense<[1.000000e+00, 2.000000e+00, 6.000000e+00, 9.000000e+00]>" : tensor<4xf64>) : !cc.array<f64 x 4>
+
+func.func @test_two_stores() {
+  %c0_i64 = arith.constant 0 : i64
+  %c1_i64 = arith.constant 1 : i64
+
+  // qubits = cudaq.qvector(2)
+  %0 = quake.alloca !quake.veq<2>
+
+  // arr1 = [1]
+  %1 = cc.alloca !cc.array<i64 x 1>
+  %2 = cc.cast %1 : (!cc.ptr<!cc.array<i64 x 1>>) -> !cc.ptr<i64>
+  cc.store %c1_i64, %2 : !cc.ptr<i64>
+
+  // t = arr1[0]
+  %3 = cc.load %2 : !cc.ptr<i64>
+
+  // arr2 = [0]
+  %4 = cc.alloca !cc.array<i64 x 1>
+  %5 = cc.cast %4 : (!cc.ptr<!cc.array<i64 x 1>>) -> !cc.ptr<i64>
+  cc.store %c0_i64, %5 : !cc.ptr<i64> // Dominates the next store, don't lift
+
+  // arr2[0] = t
+  cc.store %3, %5 : !cc.ptr<i64>
+
+  // b = arr2[0]
+  %6 = cc.load %5 : !cc.ptr<i64>
+
+  // x(qubits[b])
+  %7 = quake.extract_ref %0[%6] : (!quake.veq<2>, i64) -> !quake.ref
+  quake.x %7 : (!quake.ref) -> ()
+  return
+}
+
+// CHECK-LABEL:   func.func @test_two_stores() {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_1:.*]] = quake.alloca !quake.veq<2>
+// CHECK:           %[[VAL_2:.*]] = cc.const_array [1] : !cc.array<i64 x 1>
+// CHECK:           %[[VAL_3:.*]] = cc.extract_value %[[VAL_2]][0] : (!cc.array<i64 x 1>) -> i64
+// CHECK:           %[[VAL_4:.*]] = cc.alloca !cc.array<i64 x 1>
+// CHECK:           %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr<!cc.array<i64 x 1>>) -> !cc.ptr<i64>
+// CHECK:           cc.store %[[VAL_0]], %[[VAL_5]] : !cc.ptr<i64>
+// CHECK:           cc.store %[[VAL_3]], %[[VAL_5]] : !cc.ptr<i64>
+// CHECK:           %[[VAL_6:.*]] = cc.load %[[VAL_5]] : !cc.ptr<i64>
+// CHECK:           %[[VAL_7:.*]] = quake.extract_ref %[[VAL_1]][%[[VAL_6]]] : (!quake.veq<2>, i64) -> !quake.ref
+// CHECK:           quake.x %[[VAL_7]] : (!quake.ref) -> ()
+// CHECK:           return
+// CHECK:         }
+
+func.func @test_complex_array() {
+  %cst = complex.constant [0.000000e+00 : f32, 1.000000e+00 : f32] : complex<f32>
+  %cst_0 = complex.constant [1.000000e+00 : f32, 0.000000e+00 : f32] : complex<f32>
+  %0 = cc.alloca !cc.array<complex<f32> x 2>
+  %1 = cc.cast %0 : (!cc.ptr<!cc.array<complex<f32> x 2>>) -> !cc.ptr<complex<f32>>
+  cc.store %cst_0, %1 : !cc.ptr<complex<f32>>
+  %2 = cc.compute_ptr %0[1] : (!cc.ptr<!cc.array<complex<f32> x 2>>) -> !cc.ptr<complex<f32>>
+  cc.store %cst, %2 : !cc.ptr<complex<f32>>
+  %3 = quake.alloca !quake.veq<1>
+  %4 = quake.init_state %3, %1 : (!quake.veq<1>, !cc.ptr<complex<f32>>) -> !quake.veq<1>
+  return
+}
+
+// CHECK-LABEL:   func.func @test_complex_array() {
+// CHECK:           %[[VAL_0:.*]] = cc.const_array {{\[}}[1.000000e+00 : f32, 0.000000e+00 : f32], [0.000000e+00 : f32, 1.000000e+00 : f32]{{\]}} : !cc.array<complex<f32> x 2>
+// CHECK:           %[[VAL_1:.*]] = cc.alloca !cc.array<complex<f32> x 2>
+// CHECK:           cc.store %[[VAL_0]], %[[VAL_1]] : !cc.ptr<!cc.array<complex<f32> x 2>>
+// CHECK:           %[[VAL_2:.*]] = cc.cast %[[VAL_1]] : (!cc.ptr<!cc.array<complex<f32> x 2>>) -> !cc.ptr<complex<f32>>
+// CHECK:           %[[VAL_3:.*]] = quake.alloca !quake.veq<1>
+// CHECK:           %[[VAL_4:.*]] = quake.init_state %[[VAL_3]], %[[VAL_2]] : (!quake.veq<1>, !cc.ptr<complex<f32>>) -> !quake.veq<1>
+// CHECK:           return
+// CHECK:         }

Original file line number	Diff line number	Diff line change
`@@ -21,9 +21,6 @@`
`21`	`21`	`#include <cudaq.h>`
`22`	`22`	`#include <iostream>`
`23`	`23`
`24`		`-#include <cudaq.h>`
`25`		`-#include <iostream>`
`26`		`-`
`27`	`24`	`__qpu__ void test(std::vector<cudaq::complex> inState) {`
`28`	`25`	`cudaq::qvector q1 = inState;`
`29`	`26`	`}`