|
5 | 5 | typedef int v4i __attribute__((ext_vector_type(4))); |
6 | 6 | typedef int v8i __attribute__((ext_vector_type(8))); |
7 | 7 |
|
| 8 | +static v4i v4i_zeros = (v4i){0,0,0,0}; |
| 9 | +static v8i v8i_zeros = (v8i){0,0,0,0,0,0,0,0}; |
| 10 | + |
8 | 11 | // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds( |
9 | 12 | // CHECK-GFX1250-NEXT: entry: |
10 | | -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], i32 0) |
| 13 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> zeroinitializer, i32 0) |
11 | 14 | // CHECK-GFX1250-NEXT: ret void |
12 | 15 | // |
13 | 16 | void test_amdgcn_tensor_load_to_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3) |
14 | 17 | { |
15 | | - __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, 0); |
| 18 | + __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, v8i_zeros, 0); |
16 | 19 | } |
17 | 20 |
|
18 | 21 | // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds_d2( |
19 | 22 | // CHECK-GFX1250-NEXT: entry: |
20 | | -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], i32 27) |
| 23 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 27) |
21 | 24 | // CHECK-GFX1250-NEXT: ret void |
22 | 25 | // |
23 | 26 | void test_amdgcn_tensor_load_to_lds_d2(v4i sg0, v8i sg1) |
24 | 27 | { |
25 | | - __builtin_amdgcn_tensor_load_to_lds_d2(sg0, sg1, 27); |
| 28 | + __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, v4i_zeros, v4i_zeros, v8i_zeros, 27); |
26 | 29 | } |
27 | 30 |
|
28 | 31 | // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds( |
29 | 32 | // CHECK-GFX1250-NEXT: entry: |
30 | | -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], i32 22) |
| 33 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> zeroinitializer, i32 22) |
31 | 34 | // CHECK-GFX1250-NEXT: ret void |
32 | 35 | // |
33 | 36 | void test_amdgcn_tensor_store_from_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3) |
34 | 37 | { |
35 | | - __builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, 22); |
| 38 | + __builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, v8i_zeros, 22); |
36 | 39 | } |
37 | 40 |
|
38 | 41 | // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds_d2( |
39 | 42 | // CHECK-GFX1250-NEXT: entry: |
40 | | -// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], i32 0) |
| 43 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 0) |
41 | 44 | // CHECK-GFX1250-NEXT: ret void |
42 | 45 | // |
43 | 46 | void test_amdgcn_tensor_store_from_lds_d2(v4i sg0, v8i sg1) |
44 | 47 | { |
45 | | - __builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, 0); |
| 48 | + __builtin_amdgcn_tensor_store_from_lds(sg0, sg1, v4i_zeros, v4i_zeros, v8i_zeros, 0); |
| 49 | +} |
| 50 | + |
| 51 | +//======================================================================= |
| 52 | +// It is fine to pass 5 arguments as tensor descriptor, but the fifth one |
| 53 | +// will be ignored by llvm CodeGen for gfx1250, which only supports D# up |
| 54 | +// to 4 groups. |
| 55 | +//======================================================================== |
| 56 | + |
| 57 | + |
| 58 | +// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds_d5( |
| 59 | +// CHECK-GFX1250-NEXT: entry: |
| 60 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> [[SG4:%.*]], i32 0) |
| 61 | +// CHECK-GFX1250-NEXT: ret void |
| 62 | +// |
| 63 | +void test_amdgcn_tensor_load_to_lds_d5(v4i sg0, v8i sg1, v4i sg2, v4i sg3, v8i sg4) |
| 64 | +{ |
| 65 | + __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, sg4, 0); |
| 66 | +} |
| 67 | + |
| 68 | +// CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds_d5( |
| 69 | +// CHECK-GFX1250-NEXT: entry: |
| 70 | +// CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], <8 x i32> [[SG4:%.*]], i32 0) |
| 71 | +// CHECK-GFX1250-NEXT: ret void |
| 72 | +// |
| 73 | +void test_amdgcn_tensor_store_from_lds_d5(v4i sg0, v8i sg1, v4i sg2, v4i sg3, v8i sg4) |
| 74 | +{ |
| 75 | + __builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, sg4, 0); |
46 | 76 | } |
0 commit comments