Skip to content

Commit 035cac0

Browse files
committed
[Flang][OpenMP] Minimize host ops remaining in device compilation
This patch updates the function filtering OpenMP pass intended to remove host functions from the MLIR module created by Flang lowering when targeting an OpenMP target device. Host functions holding target regions must be kept, so that the target regions within them can be translated for the device. The issue is that non-target operations inside these functions cannot be discarded because some of them hold information that is also relevant during target device codegen. Specifically, mapping information resides outside of `omp.target` regions. This patch updates the previous behavior where all host operations were preserved to then ignore all of those that are not actually needed by target device codegen. This, in practice, means only keeping target regions and mapping information needed by the device. Arguments for some of these remaining operations are replaced by placeholder allocations and `fir.undefined`, since they are only actually defined inside of the target regions themselves. As a result, this set of changes makes it possible to later simplify target device codegen, as it is no longer necessary to handle host operations differently to avoid issues.
1 parent 0cd7e8a commit 035cac0

File tree

7 files changed

+996
-32
lines changed

7 files changed

+996
-32
lines changed

flang/include/flang/Optimizer/OpenMP/Passes.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ def FunctionFilteringPass : Pass<"omp-function-filtering"> {
4646
"for the target device.";
4747
let dependentDialects = [
4848
"mlir::func::FuncDialect",
49-
"fir::FIROpsDialect"
49+
"fir::FIROpsDialect",
50+
"mlir::omp::OpenMPDialect"
5051
];
5152
}
5253

flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp

Lines changed: 448 additions & 0 deletions
Large diffs are not rendered by default.

flang/test/Lower/OpenMP/declare-target-link-tarop-cap.f90

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
2-
!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s
3-
!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
4-
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
1+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
2+
!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
3+
!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s --check-prefixes=BOTH,HOST
4+
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefixes=BOTH,DEVICE
55

66
program test_link
77

@@ -20,13 +20,14 @@ program test_link
2020
integer, pointer :: test_ptr2
2121
!$omp declare target link(test_ptr2)
2222

23-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_int"}
23+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_int"}
2424
!$omp target
2525
test_int = test_int + 1
2626
!$omp end target
2727

2828

29-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
29+
!HOST-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{%.*}}) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
30+
!DEVICE-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.array<3xi32>>, !fir.array<3xi32>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<!fir.array<3xi32>> {name = "test_array_1d"}
3031
!$omp target
3132
do i = 1,3
3233
test_array_1d(i) = i * 2
@@ -35,18 +36,18 @@ program test_link
3536

3637
allocate(test_ptr1)
3738
test_ptr1 = 1
38-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
39+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr1"}
3940
!$omp target
4041
test_ptr1 = test_ptr1 + 1
4142
!$omp end target
4243

43-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_target"}
44+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<i32>, i32) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref<i32> {name = "test_target"}
4445
!$omp target
4546
test_target = test_target + 1
4647
!$omp end target
4748

4849

49-
!CHECK-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
50+
!BOTH-DAG: {{%.*}} = omp.map.info var_ptr({{%.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(implicit, to) capture(ByRef) members({{%.*}} : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "test_ptr2"}
5051
test_ptr2 => test_target
5152
!$omp target
5253
test_ptr2 = test_ptr2 + 1

flang/test/Lower/OpenMP/host-eval.f90

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ subroutine teams()
2222

2323
!$omp end target
2424

25-
! BOTH: omp.teams
26-
! BOTH-SAME: num_teams({{.*}}) thread_limit({{.*}}) {
25+
! HOST: omp.teams
26+
! HOST-SAME: num_teams({{.*}}) thread_limit({{.*}}) {
27+
28+
! DEVICE-NOT: omp.teams
2729
!$omp teams num_teams(1) thread_limit(2)
2830
call foo()
2931
!$omp end teams
@@ -76,13 +78,18 @@ subroutine distribute_parallel_do()
7678
!$omp end distribute parallel do
7779
!$omp end target teams
7880

79-
! BOTH: omp.teams
81+
! HOST: omp.teams
82+
! DEVICE-NOT: omp.teams
8083
!$omp teams
8184

82-
! BOTH: omp.parallel
83-
! BOTH-SAME: num_threads({{.*}})
84-
! BOTH: omp.distribute
85-
! BOTH-NEXT: omp.wsloop
85+
! HOST: omp.parallel
86+
! HOST-SAME: num_threads({{.*}})
87+
! HOST: omp.distribute
88+
! HOST-NEXT: omp.wsloop
89+
90+
! DEVICE-NOT: omp.parallel
91+
! DEVICE-NOT: omp.distribute
92+
! DEVICE-NOT: omp.wsloop
8693
!$omp distribute parallel do num_threads(1)
8794
do i=1,10
8895
call foo()
@@ -140,14 +147,20 @@ subroutine distribute_parallel_do_simd()
140147
!$omp end distribute parallel do simd
141148
!$omp end target teams
142149

143-
! BOTH: omp.teams
150+
! HOST: omp.teams
151+
! DEVICE-NOT: omp.teams
144152
!$omp teams
145153

146-
! BOTH: omp.parallel
147-
! BOTH-SAME: num_threads({{.*}})
148-
! BOTH: omp.distribute
149-
! BOTH-NEXT: omp.wsloop
150-
! BOTH-NEXT: omp.simd
154+
! HOST: omp.parallel
155+
! HOST-SAME: num_threads({{.*}})
156+
! HOST: omp.distribute
157+
! HOST-NEXT: omp.wsloop
158+
! HOST-NEXT: omp.simd
159+
160+
! DEVICE-NOT: omp.parallel
161+
! DEVICE-NOT: omp.distribute
162+
! DEVICE-NOT: omp.wsloop
163+
! DEVICE-NOT: omp.simd
151164
!$omp distribute parallel do simd num_threads(1)
152165
do i=1,10
153166
call foo()
@@ -194,10 +207,12 @@ subroutine distribute()
194207
!$omp end distribute
195208
!$omp end target teams
196209

197-
! BOTH: omp.teams
210+
! HOST: omp.teams
211+
! DEVICE-NOT: omp.teams
198212
!$omp teams
199213

200-
! BOTH: omp.distribute
214+
! HOST: omp.distribute
215+
! DEVICE-NOT: omp.distribute
201216
!$omp distribute
202217
do i=1,10
203218
call foo()
@@ -246,11 +261,15 @@ subroutine distribute_simd()
246261
!$omp end distribute simd
247262
!$omp end target teams
248263

249-
! BOTH: omp.teams
264+
! HOST: omp.teams
265+
! DEVICE-NOT: omp.teams
250266
!$omp teams
251267

252-
! BOTH: omp.distribute
253-
! BOTH-NEXT: omp.simd
268+
! HOST: omp.distribute
269+
! HOST-NEXT: omp.simd
270+
271+
! DEVICE-NOT: omp.distribute
272+
! DEVICE-NOT: omp.simd
254273
!$omp distribute simd
255274
do i=1,10
256275
call foo()

flang/test/Lower/OpenMP/real10.f90

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
!CHECK: hlfir.declare %{{.*}} {uniq_name = "_QFEx"} : (!fir.ref<f80>) -> (!fir.ref<f80>, !fir.ref<f80>)
66

77
program p
8+
!$omp declare target
89
real(10) :: x
9-
!$omp target
10-
continue
11-
!$omp end target
1210
end
13-

0 commit comments

Comments
 (0)