From 4897adcafcdb2ee4aa69be89d7d734879d474bdc Mon Sep 17 00:00:00 2001
From: Nimish Mishra <neelam.nimish@gmail.com>
Date: Thu, 17 Apr 2025 19:09:48 +0530
Subject: [PATCH] [flang][llvm][OpenMP][OpenACC] Add implicit casts to
 omp.atomic and acc.atomic

---
 flang/include/flang/Lower/DirectivesCommon.h  |  37 +++++-
 .../acc-atomic-capture-implicit-cast-todo.f90 |  48 ++++++++
 .../test/Lower/OpenACC/acc-atomic-capture.f90 | 115 ------------------
 flang/test/Lower/OpenACC/acc-atomic-read.f90  |   5 +-
 .../Todo/atomic-capture-implicit-cast.f90     |  48 ++++++++
 .../Lower/OpenMP/atomic-implicit-cast.f90     |  56 +++++++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  31 -----
 mlir/test/Target/LLVMIR/openmp-llvm.mlir      |  21 ++--
 8 files changed, 197 insertions(+), 164 deletions(-)
 create mode 100644 flang/test/Lower/OpenACC/acc-atomic-capture-implicit-cast-todo.f90
 create mode 100644 flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
 create mode 100644 flang/test/Lower/OpenMP/atomic-implicit-cast.f90

diff --git a/flang/include/flang/Lower/DirectivesCommon.h b/flang/include/flang/Lower/DirectivesCommon.h
index 6e24343cebd3a..2013341e39630 100644
--- a/flang/include/flang/Lower/DirectivesCommon.h
+++ b/flang/include/flang/Lower/DirectivesCommon.h
@@ -386,9 +386,36 @@ void genOmpAccAtomicRead(Fortran::lower::AbstractConverter &converter,
       fir::getBase(converter.genExprAddr(fromExpr, stmtCtx));
   mlir::Value toAddress = fir::getBase(converter.genExprAddr(
       *Fortran::semantics::GetExpr(assignmentStmtVariable), stmtCtx));
-  genOmpAccAtomicCaptureStatement(converter, fromAddress, toAddress,
-                                  leftHandClauseList, rightHandClauseList,
-                                  elementType, loc);
+  if (fromAddress.getType() != toAddress.getType()) {
+    // Emit an implicit cast
+    mlir::Type toType = fir::unwrapRefType(toAddress.getType());
+    mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
+    fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+    auto oldIP = builder.saveInsertionPoint();
+    builder.setInsertionPointToStart(builder.getAllocaBlock());
+    mlir::Value alloca = builder.create<fir::AllocaOp>(loc, fromType);
+    builder.restoreInsertionPoint(oldIP);
+    genOmpAccAtomicCaptureStatement(converter, fromAddress, alloca,
+                                    leftHandClauseList, rightHandClauseList,
+                                    elementType, loc);
+    auto load = builder.create<fir::LoadOp>(loc, alloca);
+    if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
+      // Emit an additional `ExtractValueOp` if `fromAddress` is of complex
+      // type, but `toAddress` is not.
+      auto extract = builder.create<fir::ExtractValueOp>(
+          loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
+          builder.getArrayAttr(
+              builder.getIntegerAttr(builder.getIndexType(), 0)));
+      auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
+      builder.create<fir::StoreOp>(loc, cvt, toAddress);
+    } else {
+      auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
+      builder.create<fir::StoreOp>(loc, cvt, toAddress);
+    }
+  } else
+    genOmpAccAtomicCaptureStatement(converter, fromAddress, toAddress,
+                                    leftHandClauseList, rightHandClauseList,
+                                    elementType, loc);
 }
 
 /// Processes an atomic construct with update clause.
@@ -481,6 +508,10 @@ void genOmpAccAtomicCapture(Fortran::lower::AbstractConverter &converter,
   mlir::Type stmt2VarType =
       fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();
 
+  // Check if implicit type is needed
+  if (stmt1VarType != stmt2VarType)
+    TODO(loc, "atomic capture requiring implicit type casts");
+
   mlir::Operation *atomicCaptureOp = nullptr;
   if constexpr (std::is_same<AtomicListT,
                              Fortran::parser::OmpAtomicClauseList>()) {
diff --git a/flang/test/Lower/OpenACC/acc-atomic-capture-implicit-cast-todo.f90 b/flang/test/Lower/OpenACC/acc-atomic-capture-implicit-cast-todo.f90
new file mode 100644
index 0000000000000..14e1c21a2aae7
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-atomic-capture-implicit-cast-todo.f90
@@ -0,0 +1,48 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenacc -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: atomic capture requiring implicit type casts 
+subroutine capture_with_convert_f32_to_i32()
+    implicit none
+    integer :: k, v, i
+
+    k = 1
+    v = 0
+
+    !$acc atomic capture
+    v = k
+    k = (i + 1) * 3.14
+    !$acc end atomic
+end subroutine
+
+subroutine capture_with_convert_i32_to_f64()
+    real(8) :: x
+    integer :: v
+    x = 1.0
+    v = 0
+    !$acc atomic capture
+    v = x
+    x = v
+    !$acc end atomic
+end subroutine capture_with_convert_i32_to_f64
+
+subroutine capture_with_convert_f64_to_i32()
+    integer :: x
+    real(8) :: v
+    x = 1
+    v = 0
+    !$acc atomic capture
+    x = v * v
+    v = x
+    !$acc end atomic
+end subroutine capture_with_convert_f64_to_i32
+
+subroutine capture_with_convert_i32_to_f32()
+    real(4) :: x
+    integer :: v
+    x = 1.0
+    v = 0
+    !$acc atomic capture
+    v = x
+    x = x + v
+    !$acc end atomic
+end subroutine capture_with_convert_i32_to_f32
diff --git a/flang/test/Lower/OpenACC/acc-atomic-capture.f90 b/flang/test/Lower/OpenACC/acc-atomic-capture.f90
index 6cf39cde6ca0d..4aeef0be95565 100644
--- a/flang/test/Lower/OpenACC/acc-atomic-capture.f90
+++ b/flang/test/Lower/OpenACC/acc-atomic-capture.f90
@@ -96,121 +96,6 @@ subroutine pointers_in_atomic_capture()
 end subroutine
 
 
-subroutine capture_with_convert_f32_to_i32()
-  implicit none
-  integer :: k, v, i
-
-  k = 1
-  v = 0
-
-  !$acc atomic capture
-  v = k
-  k = (i + 1) * 3.14
-  !$acc end atomic
-end subroutine
-
-! CHECK-LABEL: func.func @_QPcapture_with_convert_f32_to_i32()
-! CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFcapture_with_convert_f32_to_i32Ek"}
-! CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]] {uniq_name = "_QFcapture_with_convert_f32_to_i32Ek"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[V:.*]] = fir.alloca i32 {bindc_name = "v", uniq_name = "_QFcapture_with_convert_f32_to_i32Ev"}
-! CHECK: %[[V_DECL:.*]]:2 = hlfir.declare %[[V]] {uniq_name = "_QFcapture_with_convert_f32_to_i32Ev"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[CST:.*]] = arith.constant 3.140000e+00 : f32
-! CHECK: %[[MUL:.*]] = arith.mulf %{{.*}}, %[[CST]] fastmath<contract> : f32
-! CHECK: %[[CONV:.*]] = fir.convert %[[MUL]] : (f32) -> i32
-! CHECK: acc.atomic.capture {
-! CHECK:   acc.atomic.read %[[V_DECL]]#0 = %[[K_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>, i32
-! CHECK:   acc.atomic.write %[[K_DECL]]#0 = %[[CONV]] : !fir.ref<i32>, i32
-! CHECK: }
-
-subroutine capture_with_convert_i32_to_f64()
-  real(8) :: x
-  integer :: v
-  x = 1.0
-  v = 0
-  !$acc atomic capture
-  v = x
-  x = v
-  !$acc end atomic
-end subroutine capture_with_convert_i32_to_f64
-
-! CHECK-LABEL: func.func @_QPcapture_with_convert_i32_to_f64()
-! CHECK: %[[V:.*]] = fir.alloca i32 {bindc_name = "v", uniq_name = "_QFcapture_with_convert_i32_to_f64Ev"}
-! CHECK: %[[V_DECL:.*]]:2 = hlfir.declare %[[V]] {uniq_name = "_QFcapture_with_convert_i32_to_f64Ev"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[X:.*]] = fir.alloca f64 {bindc_name = "x", uniq_name = "_QFcapture_with_convert_i32_to_f64Ex"}
-! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFcapture_with_convert_i32_to_f64Ex"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f64
-! CHECK: hlfir.assign %[[CST]] to %[[X_DECL]]#0 : f64, !fir.ref<f64>
-! CHECK: %c0_i32 = arith.constant 0 : i32
-! CHECK: hlfir.assign %c0_i32 to %[[V_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK: %[[LOAD:.*]] = fir.load %[[V_DECL]]#0 : !fir.ref<i32>
-! CHECK: %[[CONV:.*]] = fir.convert %[[LOAD]] : (i32) -> f64
-! CHECK: acc.atomic.capture {
-! CHECK:   acc.atomic.read %[[V_DECL]]#0 = %[[X_DECL]]#0 : !fir.ref<i32>, !fir.ref<f64>, f64
-! CHECK:   acc.atomic.write %[[X_DECL]]#0 = %[[CONV]] : !fir.ref<f64>, f64
-! CHECK: }
-
-subroutine capture_with_convert_f64_to_i32()
-  integer :: x
-  real(8) :: v
-  x = 1
-  v = 0
-  !$acc atomic capture
-  x = v * v
-  v = x
-  !$acc end atomic
-end subroutine capture_with_convert_f64_to_i32
-
-! CHECK-LABEL: func.func @_QPcapture_with_convert_f64_to_i32()
-! CHECK: %[[V:.*]] = fir.alloca f64 {bindc_name = "v", uniq_name = "_QFcapture_with_convert_f64_to_i32Ev"}
-! CHECK: %[[V_DECL:.*]]:2 = hlfir.declare %[[V]] {uniq_name = "_QFcapture_with_convert_f64_to_i32Ev"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
-! CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFcapture_with_convert_f64_to_i32Ex"}
-! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFcapture_with_convert_f64_to_i32Ex"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %c1_i32 = arith.constant 1 : i32
-! CHECK: hlfir.assign %c1_i32 to %[[X_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f64
-! CHECK: hlfir.assign %[[CST]] to %[[V_DECL]]#0 : f64, !fir.ref<f64>
-! CHECK: %[[LOAD:.*]] = fir.load %[[V_DECL]]#0 : !fir.ref<f64>
-! CHECK: acc.atomic.capture {
-! CHECK:   acc.atomic.update %[[X_DECL]]#0 : !fir.ref<i32> {
-! CHECK:   ^bb0(%arg0: i32):
-! CHECK:     %[[MUL:.*]] = arith.mulf %[[LOAD]], %[[LOAD]] fastmath<contract> : f64
-! CHECK:     %[[CONV:.*]] = fir.convert %[[MUL]] : (f64) -> i32
-! CHECK:     acc.yield %[[CONV]] : i32
-! CHECK:   }
-! CHECK:   acc.atomic.read %[[V_DECL]]#0 = %[[X_DECL]]#0 : !fir.ref<f64>, !fir.ref<i32>, i32
-! CHECK: }
-
-subroutine capture_with_convert_i32_to_f32()
-  real(4) :: x
-  integer :: v
-  x = 1.0
-  v = 0
-  !$acc atomic capture
-  v = x
-  x = x + v
-  !$acc end atomic
-end subroutine capture_with_convert_i32_to_f32
-
-! CHECK-LABEL: func.func @_QPcapture_with_convert_i32_to_f32()
-! CHECK: %[[V:.*]] = fir.alloca i32 {bindc_name = "v", uniq_name = "_QFcapture_with_convert_i32_to_f32Ev"}
-! CHECK: %[[V_DECL:.*]]:2 = hlfir.declare %[[V]] {uniq_name = "_QFcapture_with_convert_i32_to_f32Ev"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[X:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFcapture_with_convert_i32_to_f32Ex"}
-! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFcapture_with_convert_i32_to_f32Ex"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-! CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK: hlfir.assign %[[CST]] to %[[X_DECL]]#0 : f32, !fir.ref<f32>
-! CHECK: %c0_i32 = arith.constant 0 : i32
-! CHECK: hlfir.assign %c0_i32 to %[[V_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK: %[[LOAD:.*]] = fir.load %[[V_DECL]]#0 : !fir.ref<i32>
-! CHECK: acc.atomic.capture {
-! CHECK:   acc.atomic.read %[[V_DECL]]#0 = %[[X_DECL]]#0 : !fir.ref<i32>, !fir.ref<f32>, f32
-! CHECK:   acc.atomic.update %[[X_DECL]]#0 : !fir.ref<f32> {
-! CHECK:   ^bb0(%arg0: f32):
-! CHECK:     %[[CONV:.*]] = fir.convert %[[LOAD]] : (i32) -> f32
-! CHECK:     %[[ADD:.*]] = arith.addf %arg0, %[[CONV]] fastmath<contract> : f32
-! CHECK:     acc.yield %[[ADD]] : f32
-! CHECK:   }
-! CHECK: }
-
 subroutine array_ref_in_atomic_capture1
   integer :: x(10), v
   !$acc atomic capture
diff --git a/flang/test/Lower/OpenACC/acc-atomic-read.f90 b/flang/test/Lower/OpenACC/acc-atomic-read.f90
index 639a98051e3a2..22270ed6379fb 100644
--- a/flang/test/Lower/OpenACC/acc-atomic-read.f90
+++ b/flang/test/Lower/OpenACC/acc-atomic-read.f90
@@ -51,8 +51,11 @@ subroutine atomic_read_with_cast()
 end
 
 ! CHECK-LABEL: func.func @_QPatomic_read_with_cast() {
+! CHECK: %[[ALLOCA:.*]] = fir.alloca i32
 ! CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFatomic_read_with_castEx"}
 ! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFatomic_read_with_castEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK: %[[Y:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFatomic_read_with_castEy"}
 ! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFatomic_read_with_castEy"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
-! CHECK: acc.atomic.read %[[Y_DECL]]#0 = %[[X_DECL]]#0 : !fir.ref<i64>, !fir.ref<i32>, i32
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref<i32>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (i32) -> i64
+! CHECK: fir.store %[[CVT]] to %[[Y_DECL]]#0 : !fir.ref<i64>
diff --git a/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90 b/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
new file mode 100644
index 0000000000000..5b61f1169308f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90
@@ -0,0 +1,48 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: atomic capture requiring implicit type casts 
+subroutine capture_with_convert_f32_to_i32()
+    implicit none
+    integer :: k, v, i
+
+    k = 1
+    v = 0
+
+    !$omp atomic capture
+    v = k
+    k = (i + 1) * 3.14
+    !$omp end atomic
+end subroutine
+
+subroutine capture_with_convert_i32_to_f64()
+    real(8) :: x
+    integer :: v
+    x = 1.0
+    v = 0
+    !$omp atomic capture
+    v = x
+    x = v
+    !$omp end atomic
+end subroutine capture_with_convert_i32_to_f64
+
+subroutine capture_with_convert_f64_to_i32()
+    integer :: x
+    real(8) :: v
+    x = 1
+    v = 0
+    !$omp atomic capture
+    x = v
+    v = x
+    !$omp end atomic
+end subroutine capture_with_convert_f64_to_i32
+
+subroutine capture_with_convert_i32_to_f32()
+    real(4) :: x
+    integer :: v
+    x = 1.0
+    v = 0
+    !$omp atomic capture
+    v = x
+    x = x + v
+    !$omp end atomic
+end subroutine capture_with_convert_i32_to_f32
diff --git a/flang/test/Lower/OpenMP/atomic-implicit-cast.f90 b/flang/test/Lower/OpenMP/atomic-implicit-cast.f90
new file mode 100644
index 0000000000000..2fe4cc799e8de
--- /dev/null
+++ b/flang/test/Lower/OpenMP/atomic-implicit-cast.f90
@@ -0,0 +1,56 @@
+! REQUIRES : openmp_runtime
+
+! RUN: %flang_fc1 -emit-hlfir %openmp_flags %s -o - | FileCheck %s
+
+! CHECK: func.func @_QPatomic_implicit_cast_read() {
+subroutine atomic_implicit_cast_read
+! CHECK: %[[ALLOCA3:.*]] = fir.alloca complex<f32>
+! CHECK: %[[ALLOCA2:.*]] = fir.alloca complex<f32>
+! CHECK: %[[ALLOCA1:.*]] = fir.alloca i32
+! CHECK: %[[ALLOCA0:.*]] = fir.alloca f32
+
+! CHECK: %[[M:.*]] = fir.alloca complex<f64> {bindc_name = "m", uniq_name = "_QFatomic_implicit_cast_readEm"}
+! CHECK: %[[M_DECL:.*]]:2 = hlfir.declare %[[M]] {uniq_name = "_QFatomic_implicit_cast_readEm"} : (!fir.ref<complex<f64>>) -> (!fir.ref<complex<f64>>, !fir.ref<complex<f64>>)
+! CHECK: %[[W:.*]] = fir.alloca complex<f32> {bindc_name = "w", uniq_name = "_QFatomic_implicit_cast_readEw"}
+! CHECK: %[[W_DECL:.*]]:2 = hlfir.declare %[[W]] {uniq_name = "_QFatomic_implicit_cast_readEw"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+! CHECK: %[[X:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFatomic_implicit_cast_readEx"}
+! CHECK: %[[X_DECL:.*]]:2 = hlfir.declare %[[X]] {uniq_name = "_QFatomic_implicit_cast_readEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[Y:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFatomic_implicit_cast_readEy"}
+! CHECK: %[[Y_DECL:.*]]:2 = hlfir.declare %[[Y]] {uniq_name = "_QFatomic_implicit_cast_readEy"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[Z:.*]] = fir.alloca f64 {bindc_name = "z", uniq_name = "_QFatomic_implicit_cast_readEz"}
+! CHECK: %[[Z_DECL:.*]]:2 = hlfir.declare %[[Z]] {uniq_name = "_QFatomic_implicit_cast_readEz"} : (!fir.ref<f64>) -> (!fir.ref<f64>, !fir.ref<f64>)
+    integer :: x
+    real :: y
+    double precision :: z
+    complex :: w
+    complex(8) :: m
+
+! CHECK: omp.atomic.read %[[ALLOCA0:.*]] = %[[Y_DECL]]#0 : !fir.ref<f32>, !fir.ref<f32>, f32
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA0]] : !fir.ref<f32>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (f32) -> i32
+! CHECK: fir.store %[[CVT]] to %[[X_DECL]]#0 : !fir.ref<i32>
+    !$omp atomic read
+        x = y
+
+! CHECK: omp.atomic.read %[[ALLOCA1:.*]] = %[[X_DECL]]#0 : !fir.ref<i32>, !fir.ref<i32>, i32
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA1]] : !fir.ref<i32>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (i32) -> f64
+! CHECK: fir.store %[[CVT]] to %[[Z_DECL]]#0 : !fir.ref<f64>
+    !$omp atomic read
+        z = x
+
+! CHECK: omp.atomic.read %[[ALLOCA2:.*]] = %[[W_DECL]]#0 : !fir.ref<complex<f32>>, !fir.ref<complex<f32>>, complex<f32>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA2]] : !fir.ref<complex<f32>>
+! CHECK: %[[EXTRACT:.*]] = fir.extract_value %[[LOAD]], [0 : index] : (complex<f32>) -> f32
+! CHECK: %[[CVT:.*]] = fir.convert %[[EXTRACT]] : (f32) -> i32
+! CHECK: fir.store %[[CVT]] to %[[X_DECL]]#0 : !fir.ref<i32>
+    !$omp atomic read
+        x = w
+
+! CHECK: omp.atomic.read %[[ALLOCA3:.*]] = %[[W_DECL]]#0 : !fir.ref<complex<f32>>, !fir.ref<complex<f32>>, complex<f32>
+! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA3]] : !fir.ref<complex<f32>>
+! CHECK: %[[CVT:.*]] = fir.convert %[[LOAD]] : (complex<f32>) -> complex<f64>
+! CHECK: fir.store %[[CVT]] to %[[M_DECL]]#0 : !fir.ref<complex<f64>>
+    !$omp atomic read
+        m = w
+end subroutine
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 13b727d226738..fa0d82783d0fa 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -268,33 +268,6 @@ computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
   return Result;
 }
 
-/// Emit an implicit cast to convert \p XRead to type of variable \p V
-static llvm::Value *emitImplicitCast(IRBuilder<> &Builder, llvm::Value *XRead,
-                                     llvm::Value *V) {
-  // TODO: Add this functionality to the `AtomicInfo` interface
-  llvm::Type *XReadType = XRead->getType();
-  llvm::Type *VType = V->getType();
-  if (llvm::AllocaInst *vAlloca = dyn_cast<llvm::AllocaInst>(V))
-    VType = vAlloca->getAllocatedType();
-
-  if (XReadType->isStructTy() && VType->isStructTy())
-    // No need to extract or convert. A direct
-    // `store` will suffice.
-    return XRead;
-
-  if (XReadType->isStructTy())
-    XRead = Builder.CreateExtractValue(XRead, /*Idxs=*/0);
-  if (VType->isIntegerTy() && XReadType->isFloatingPointTy())
-    XRead = Builder.CreateFPToSI(XRead, VType);
-  else if (VType->isFloatingPointTy() && XReadType->isIntegerTy())
-    XRead = Builder.CreateSIToFP(XRead, VType);
-  else if (VType->isIntegerTy() && XReadType->isIntegerTy())
-    XRead = Builder.CreateIntCast(XRead, VType, true);
-  else if (VType->isFloatingPointTy() && XReadType->isFloatingPointTy())
-    XRead = Builder.CreateFPCast(XRead, VType);
-  return XRead;
-}
-
 /// Make \p Source branch to \p Target.
 ///
 /// Handles two situations:
@@ -8683,8 +8656,6 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
     }
   }
   checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
-  if (XRead->getType() != V.Var->getType())
-    XRead = emitImplicitCast(Builder, XRead, V.Var);
   Builder.CreateStore(XRead, V.Var, V.IsVolatile);
   return Builder.saveIP();
 }
@@ -8969,8 +8940,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
     return AtomicResult.takeError();
   Value *CapturedVal =
       (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
-  if (CapturedVal->getType() != V.Var->getType())
-    CapturedVal = emitImplicitCast(Builder, CapturedVal, V.Var);
   Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
 
   checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 15cd429c7cf9a..671d4d4e18771 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -1396,42 +1396,35 @@ llvm.func @omp_atomic_read_implicit_cast () {
 
 //CHECK: call void @__atomic_load(i64 8, ptr %[[X_ELEMENT]], ptr %[[ATOMIC_LOAD_TEMP]], i32 0)
 //CHECK: %[[LOAD:.*]] = load { float, float }, ptr %[[ATOMIC_LOAD_TEMP]], align 8
-//CHECK: %[[EXT:.*]] = extractvalue { float, float } %[[LOAD]], 0
-//CHECK: store float %[[EXT]], ptr %[[Y]], align 4
+//CHECK: store { float, float } %[[LOAD]], ptr %[[Y]], align 4
   omp.atomic.read %3 = %17 : !llvm.ptr, !llvm.ptr, !llvm.struct<(f32, f32)>
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[Z]] monotonic, align 4
 //CHECK: %[[CAST:.*]] = bitcast i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: %[[LOAD:.*]] = fpext float %[[CAST]] to double
-//CHECK: store double %[[LOAD]], ptr %[[Y]], align 8
+//CHECK: store float %[[CAST]], ptr %[[Y]], align 4
   omp.atomic.read %3 = %1 : !llvm.ptr, !llvm.ptr, f32
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[W]] monotonic, align 4
-//CHECK: %[[LOAD:.*]] = sitofp i32 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: store double %[[LOAD]], ptr %[[Y]], align 8
+//CHECK: store i32 %[[ATOMIC_LOAD_TEMP]], ptr %[[Y]], align 4
   omp.atomic.read %3 = %7 : !llvm.ptr, !llvm.ptr, i32
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i64, ptr %[[Y]] monotonic, align 4
 //CHECK: %[[CAST:.*]] = bitcast i64 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: %[[LOAD:.*]] = fptrunc double %[[CAST]] to float
-//CHECK: store float %[[LOAD]], ptr %[[Z]], align 4
+//CHECK: store double %[[CAST]], ptr %[[Z]], align 8
   omp.atomic.read %1 = %3 : !llvm.ptr, !llvm.ptr, f64
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[W]] monotonic, align 4
-//CHECK: %[[LOAD:.*]] = sitofp i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: store float %[[LOAD]], ptr %[[Z]], align 4
+//CHECK: store i32 %[[ATOMIC_LOAD_TEMP]], ptr %[[Z]], align 4
   omp.atomic.read %1 = %7 : !llvm.ptr, !llvm.ptr, i32
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i64, ptr %[[Y]] monotonic, align 4
 //CHECK: %[[CAST:.*]] = bitcast i64 %[[ATOMIC_LOAD_TEMP]] to double
-//CHECK: %[[LOAD:.*]] = fptosi double %[[CAST]] to i32
-//CHECK: store i32 %[[LOAD]], ptr %[[W]], align 4
+//CHECK: store double %[[CAST]], ptr %[[W]], align 8
   omp.atomic.read %7 = %3 : !llvm.ptr, !llvm.ptr, f64
 
 //CHECK: %[[ATOMIC_LOAD_TEMP:.*]] = load atomic i32, ptr %[[Z]] monotonic, align 4
 //CHECK: %[[CAST:.*]] = bitcast i32 %[[ATOMIC_LOAD_TEMP]] to float
-//CHECK: %[[LOAD:.*]] = fptosi float %[[CAST]] to i32
-//CHECK: store i32 %[[LOAD]], ptr %[[W]], align 4
+//CHECK: store float %[[CAST]], ptr %[[W]], align 4
   omp.atomic.read %7 = %1 : !llvm.ptr, !llvm.ptr, f32
   llvm.return
 }