NVIDIA
diff --git a/‎include/cudaq/Optimizer/CodeGen/Pipelines.h‎
Lines changed: 0 additions & 17 deletions b/‎include/cudaq/Optimizer/CodeGen/Pipelines.h‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp‎
Lines changed: 14 additions & 4 deletions b/‎lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎lib/Optimizer/CodeGen/Pipelines.cpp‎
Lines changed: 0 additions & 31 deletions b/‎lib/Optimizer/CodeGen/Pipelines.cpp‎
Lines changed: 0 additions & 31 deletions
diff --git a/‎lib/Optimizer/Dialect/Quake/QuakeOps.cpp‎
Lines changed: 13 additions & 10 deletions b/‎lib/Optimizer/Dialect/Quake/QuakeOps.cpp‎
Lines changed: 13 additions & 10 deletions
diff --git a/‎lib/Optimizer/Transforms/GlobalizeArrayValues.cpp‎
Lines changed: 69 additions & 16 deletions b/‎lib/Optimizer/Transforms/GlobalizeArrayValues.cpp‎
Lines changed: 69 additions & 16 deletions
diff --git a/‎python/cudaq/kernel/ast_bridge.py‎
Lines changed: 72 additions & 3 deletions b/‎python/cudaq/kernel/ast_bridge.py‎
Lines changed: 72 additions & 3 deletions
@@ -30,35 +30,18 @@ void commonPipelineConvertToQIR(mlir::PassManager &pm,
                                 mlir::StringRef codeGenFor = "qir",
                                 mlir::StringRef passConfigAs = "qir");
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-void commonPipelineConvertToQIR_PythonWorkaround(
-    mlir::PassManager &pm, const std::optional<mlir::StringRef> &convertTo);
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Does not specify a particular QIR profile.
 inline void addPipelineConvertToQIR(mlir::PassManager &pm) {
   commonPipelineConvertToQIR(pm);
 }
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, std::nullopt);
-}
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Specifies a particular QIR profile in \p convertTo.
 /// \p pm Pass manager to append passes to
 /// \p convertTo name of QIR profile (e.g., `qir-base`, `qir-adaptive`, ...)
 void addPipelineConvertToQIR(mlir::PassManager &pm, mlir::StringRef convertTo);
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void
-addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm,
-                                         mlir::StringRef convertTo) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, convertTo);
-  addQIRProfilePipeline(pm, convertTo);
-}
-
 void addLowerToCCPipeline(mlir::OpPassManager &pm);
 
 void addPipelineTranslateToOpenQASM(mlir::PassManager &pm);
 
@@ -1092,7 +1092,7 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
 
     // Process the controls, sorting them by type.
     for (auto pr : llvm::zip(op.getControls(), adaptor.getControls())) {
-      if (isa<quake::VeqType>(std::get<0>(pr).getType())) {
+      if (isaVeqArgument(std::get<0>(pr).getType())) {
         numArrayCtrls++;
         auto sizeCall = rewriter.create<func::CallOp>(
             loc, i64Ty, cudaq::opt::QIRArrayGetSize,
@@ -1155,6 +1155,18 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
     return forwardOrEraseOp();
   }
 
+  static bool isaVeqArgument(Type ty) {
+    // TODO: Need a way to identify arrays when using the opaque pointer
+    // variant. (In Python, the arguments may already be converted.)
+    auto alreadyConverted = [](Type ty) {
+      if (auto ptrTy = dyn_cast<cudaq::cc::PointerType>(ty))
+        if (auto strTy = dyn_cast<LLVM::LLVMStructType>(ptrTy.getElementType()))
+          return strTy.isIdentified() && strTy.getName() == "Array";
+      return false;
+    };
+    return isa<quake::VeqType>(ty) || alreadyConverted(ty);
+  }
+
   static bool conformsToIntendedCall(std::size_t numControls, Value ctrl, OP op,
                                      StringRef qirFunctionName) {
     if (numControls != 1)
@@ -1819,9 +1831,7 @@ struct QuakeToQIRAPIPrepPass
   }
 
   void guaranteeMzIsLabeled(quake::MzOp mz, int &counter, OpBuilder &builder) {
-    if (mz.getRegisterNameAttr() &&
-        /* FIXME: issue 2538: the name should never be empty. */
-        !mz.getRegisterNameAttr().getValue().empty()) {
+    if (mz.getRegisterNameAttr()) {
       mz->setAttr(cudaq::opt::MzAssignedNameAttrName, builder.getUnitAttr());
       return;
     }
 
@@ -51,37 +51,6 @@ void cudaq::opt::commonPipelineConvertToQIR(PassManager &pm,
   pm.addPass(createCCToLLVM());
 }
 
-void cudaq::opt::commonPipelineConvertToQIR_PythonWorkaround(
-    PassManager &pm, const std::optional<StringRef> &convertTo) {
-  pm.addNestedPass<func::FuncOp>(createApplyControlNegations());
-  addAggressiveEarlyInlining(pm);
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createUnwindLoweringPass());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addPass(createApplyOpSpecializationPass());
-  pm.addNestedPass<func::FuncOp>(createExpandMeasurementsPass());
-  pm.addNestedPass<func::FuncOp>(createClassicalMemToReg());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddDeallocs());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddMetadata());
-  pm.addNestedPass<func::FuncOp>(createLoopNormalize());
-  LoopUnrollOptions luo;
-  luo.allowBreak = convertTo && (*convertTo == "qir-adaptive");
-  pm.addNestedPass<func::FuncOp>(createLoopUnroll(luo));
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createLowerToCFGPass());
-  pm.addNestedPass<func::FuncOp>(createCombineQuantumAllocations());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  if (convertTo && (*convertTo == "qir-base"))
-    pm.addNestedPass<func::FuncOp>(createDelayMeasurementsPass());
-  pm.addPass(createConvertMathToFuncs());
-  pm.addPass(createSymbolDCEPass());
-  pm.addPass(createConvertToQIR());
-}
-
 void cudaq::opt::addPipelineTranslateToOpenQASM(PassManager &pm) {
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createCSEPass());
 
@@ -517,38 +517,41 @@ void quake::WrapOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
 //===----------------------------------------------------------------------===//
 
 // Common verification for measurement operations.
-static LogicalResult verifyMeasurements(Operation *const op,
-                                        TypeRange targetsType,
-                                        const Type bitsType) {
+template <typename MEAS>
+LogicalResult verifyMeasurements(MEAS op, TypeRange targetsType,
+                                 const Type bitsType) {
   if (failed(verifyWireResultsAreLinear(op)))
     return failure();
   bool mustBeStdvec =
       targetsType.size() > 1 ||
       (targetsType.size() == 1 && isa<quake::VeqType>(targetsType[0]));
   if (mustBeStdvec) {
-    if (!isa<cudaq::cc::StdvecType>(op->getResult(0).getType()))
-      return op->emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
-                             "measuring a qreg, a series of qubits, or both");
+    if (!isa<cudaq::cc::StdvecType>(op.getMeasOut().getType()))
+      return op.emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
+                            "measuring a qreg, a series of qubits, or both");
   } else {
-    if (!isa<quake::MeasureType>(op->getResult(0).getType()))
+    if (!isa<quake::MeasureType>(op.getMeasOut().getType()))
       return op->emitOpError(
           "must return `!quake.measure` when measuring exactly one qubit");
   }
+  if (op.getRegisterName())
+    if (op.getRegisterName()->empty())
+      return op->emitError("quake measurement name cannot be empty.");
   return success();
 }
 
 LogicalResult quake::MxOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MyOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MzOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 
@@ -87,6 +87,23 @@ convertArrayAttrToGlobalConstant(MLIRContext *ctx, Location loc,
 }
 
 namespace {
+
+// This pattern replaces a cc.const_array with a global constant. It can
+// recognize a couple of usage patterns and will generate efficient IR in those
+// cases.
+//
+// Pattern 1: The entire constant array is stored to a stack variable(s). Here
+// we can eliminate the stack allocation and use the global constant.
+//
+// Pattern 2: Individual elements at dynamic offsets are extracted from the
+// constant array and used. This can be replaced with a compute pointer
+// operation using the global constant and a load of the element at the computed
+// offset.
+//
+// Default: If the usage is not recognized, the constant array value is replaced
+// with a load of the entire global variable. In this case, LLVM's optimizations
+// are counted on to help demote the (large?) sequence value to primitive memory
+// address arithmetic.
 struct ConstantArrayPattern
     : public OpRewritePattern<cudaq::cc::ConstantArrayOp> {
   explicit ConstantArrayPattern(MLIRContext *ctx, ModuleOp module,
@@ -95,21 +112,30 @@ struct ConstantArrayPattern
 
   LogicalResult matchAndRewrite(cudaq::cc::ConstantArrayOp conarr,
                                 PatternRewriter &rewriter) const override {
+    auto func = conarr->getParentOfType<func::FuncOp>();
+    if (!func)
+      return failure();
+
     SmallVector<cudaq::cc::AllocaOp> allocas;
     SmallVector<cudaq::cc::StoreOp> stores;
+    SmallVector<cudaq::cc::ExtractValueOp> extracts;
+    bool loadAsValue = false;
     for (auto *usr : conarr->getUsers()) {
       auto store = dyn_cast<cudaq::cc::StoreOp>(usr);
-      if (!store)
-        return failure();
-      auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
-      if (!alloca)
-        return failure();
-      stores.push_back(store);
-      allocas.push_back(alloca);
+      auto extract = dyn_cast<cudaq::cc::ExtractValueOp>(usr);
+      if (store) {
+        auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
+        if (alloca) {
+          stores.push_back(store);
+          allocas.push_back(alloca);
+          continue;
+        }
+      } else if (extract) {
+        extracts.push_back(extract);
+        continue;
+      }
+      loadAsValue = true;
     }
-    auto func = conarr->getParentOfType<func::FuncOp>();
-    if (!func)
-      return failure();
     std::string globalName =
         func.getName().str() + ".rodata_" + std::to_string(counter++);
     auto *ctx = rewriter.getContext();
@@ -118,12 +144,39 @@ struct ConstantArrayPattern
     if (failed(convertArrayAttrToGlobalConstant(ctx, conarr.getLoc(), valueAttr,
                                                 module, globalName, eleTy)))
       return failure();
-    for (auto alloca : allocas)
-      rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
-          alloca, alloca.getType(), globalName);
-    for (auto store : stores)
-      rewriter.eraseOp(store);
-    rewriter.eraseOp(conarr);
+    auto loc = conarr.getLoc();
+    if (!extracts.empty()) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      auto elePtrTy = cudaq::cc::PointerType::get(eleTy);
+      for (auto extract : extracts) {
+        SmallVector<cudaq::cc::ComputePtrArg> args;
+        unsigned i = 0;
+        for (auto arg : extract.getRawConstantIndices()) {
+          if (arg == cudaq::cc::ExtractValueOp::getDynamicIndexValue())
+            args.push_back(extract.getDynamicIndices()[i++]);
+          else
+            args.push_back(arg);
+        }
+        OpBuilder::InsertionGuard guard(rewriter);
+        rewriter.setInsertionPoint(extract);
+        auto addrVal =
+            rewriter.create<cudaq::cc::ComputePtrOp>(loc, elePtrTy, base, args);
+        rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(extract, addrVal);
+      }
+    }
+    if (!stores.empty()) {
+      for (auto alloca : allocas)
+        rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
+            alloca, alloca.getType(), globalName);
+      for (auto store : stores)
+        rewriter.eraseOp(store);
+    }
+    if (loadAsValue) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(conarr, base);
+    }
     return success();
   }
 
 
@@ -1749,9 +1749,11 @@ def bodyBuilder(iterVal):
                     self.ctx) if len(qubits) == 1 and quake.RefType.isinstance(
                         qubits[0].type) else cc.StdvecType.get(
                             self.ctx, quake.MeasureType.get(self.ctx))
-                measureResult = opCtor(measTy, [],
-                                       qubits,
-                                       registerName=registerName).result
+                label = registerName
+                if not label:
+                    label = None
+                measureResult = opCtor(measTy, [], qubits,
+                                       registerName=label).result
                 if pushResultToStack:
                     self.pushValue(
                         quake.DiscriminateOp(resTy, measureResult).result)
@@ -3152,6 +3154,73 @@ def bodyBuilder(iterVar):
                                             isDecrementing=isDecrementing)
                 return
 
+        # We can simplify `for i,j in enumerate(L)` MLIR code immensely
+        # by just building a for loop over the iterable object L and using
+        # the index into that iterable and the element.
+        if isinstance(node.iter, ast.Call):
+            if node.iter.func.id == 'enumerate':
+                [self.visit(arg) for arg in node.iter.args]
+                if len(self.valueStack) == 2:
+                    iterable = self.popValue()
+                    self.popValue()
+                else:
+                    assert len(self.valueStack) == 1
+                    iterable = self.popValue()
+                iterable = self.ifPointerThenLoad(iterable)
+                totalSize = None
+                extractFunctor = None
+                varNames = []
+                for elt in node.target.elts:
+                    varNames.append(elt.id)
+
+                beEfficient = False
+                if quake.VeqType.isinstance(iterable.type):
+                    totalSize = quake.VeqSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        q = quake.ExtractRefOp(self.getRefType(),
+                                               seq,
+                                               -1,
+                                               index=idx).result
+                        return [idx, q]
+
+                    extractFunctor = functor
+                    beEfficient = True
+                elif cc.StdvecType.isinstance(iterable.type):
+                    totalSize = cc.StdvecSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        vecTy = cc.StdvecType.getElementType(seq.type)
+                        dataTy = cc.PointerType.get(self.ctx, vecTy)
+                        arrTy = vecTy
+                        if not cc.ArrayType.isinstance(arrTy):
+                            arrTy = cc.ArrayType.get(self.ctx, vecTy)
+                        dataArrTy = cc.PointerType.get(self.ctx, arrTy)
+                        data = cc.StdvecDataOp(dataArrTy, seq).result
+                        v = cc.ComputePtrOp(
+                            dataTy, data, [idx],
+                            DenseI32ArrayAttr.get([kDynamicPtrIndex],
+                                                  context=self.ctx)).result
+                        return [idx, v]
+
+                    extractFunctor = functor
+                    beEfficient = True
+
+                if beEfficient:
+
+                    def bodyBuilder(iterVar):
+                        self.symbolTable.pushScope()
+                        values = extractFunctor(iterable, iterVar)
+                        for i, v in enumerate(values):
+                            self.symbolTable[varNames[i]] = v
+                        [self.visit(b) for b in node.body]
+                        self.symbolTable.popScope()
+
+                    self.createInvariantForLoop(totalSize, bodyBuilder)
+                    return
+
         self.visit(node.iter)
         assert len(self.valueStack) > 0 and len(self.valueStack) < 3