@@ -7,53 +7,53 @@ RWByteAddressBuffer rw_matrix_buffer;
7
7
[Shader ("compute" )]
8
8
[NumThreads (1 ,1 ,1 )]
9
9
void cs_main ()
10
- {
11
- vector <float , 4 > output_vector;
12
- static const uint is_output_unsigned = 0 ;
13
-
14
- vector <float , 4 > input_vector;
15
- const uint is_input_unsigned = 0 ;
16
- const uint input_interpretation = 9 ; /*F32*/
17
-
18
- const uint matrix_offset = 0 ;
19
- const uint matrix_interpretation = 9 ; /*F32*/
20
- const uint matrix_dimM = 4 ;
21
- const uint matrix_dimK = 4 ;
22
- const uint matrix_layout = 0 ; /*RowMajor*/
23
- const bool matrix_is_transposed = false ;
24
- const uint matrix_stride = 64 ;
25
-
26
- // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 363,
27
- __builtin_MatVecMul (output_vector, is_output_unsigned, input_vector,
28
- is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
29
- matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
30
- matrix_is_transposed, matrix_stride);
31
-
32
- const uint bias_offset = 0 ;
33
- const uint bias_interpretation = 9 ; /*F32*/
34
-
35
- // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 364,
36
- __builtin_MatVecMulAdd (output_vector, is_output_unsigned, input_vector,
37
- is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
38
- matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
39
- matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
40
- bias_interpretation);
41
-
42
- vector <uint , 8 > input_vector1;
43
- vector <uint , 8 > input_vector2;
44
- const uint opa_matrix_offset = 0 ;
45
- const uint opa_matrix_interpretation = 5 ; /*U32*/
46
- const uint opa_matrix_layout = 3 ; /*OuterProductOptimal*/
47
- const uint opa_matrix_stride = 64 ;
48
-
49
- // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 365,
50
- __builtin_OuterProductAccumulate (input_vector1, input_vector2,
51
- rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
52
- opa_matrix_layout, opa_matrix_stride);
53
-
54
- const uint va_matrix_offset = 0 ;
55
-
56
- // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 366,
57
- __builtin_VectorAccumulate (input_vector1, rw_matrix_buffer,
58
- va_matrix_offset);
10
+ {
11
+ vector <float , 4 > output_vector;
12
+ static const uint is_output_unsigned = 0 ;
13
+
14
+ vector <float , 4 > input_vector;
15
+ const uint is_input_unsigned = 0 ;
16
+ const uint input_interpretation = 9 ; /*F32*/
17
+
18
+ const uint matrix_offset = 0 ;
19
+ const uint matrix_interpretation = 9 ; /*F32*/
20
+ const uint matrix_dimM = 4 ;
21
+ const uint matrix_dimK = 4 ;
22
+ const uint matrix_layout = 0 ; /*RowMajor*/
23
+ const bool matrix_is_transposed = false ;
24
+ const uint matrix_stride = 64 ;
25
+
26
+ // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 363,
27
+ __builtin_MatVecMul (output_vector, is_output_unsigned, input_vector,
28
+ is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
29
+ matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
30
+ matrix_is_transposed, matrix_stride);
31
+
32
+ const uint bias_offset = 0 ;
33
+ const uint bias_interpretation = 9 ; /*F32*/
34
+
35
+ // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 364,
36
+ __builtin_MatVecMulAdd (output_vector, is_output_unsigned, input_vector,
37
+ is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
38
+ matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
39
+ matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
40
+ bias_interpretation);
41
+
42
+ vector <uint , 8 > input_vector1;
43
+ vector <uint , 8 > input_vector2;
44
+ const uint opa_matrix_offset = 0 ;
45
+ const uint opa_matrix_interpretation = 5 ; /*U32*/
46
+ const uint opa_matrix_layout = 3 ; /*OuterProductOptimal*/
47
+ const uint opa_matrix_stride = 64 ;
48
+
49
+ // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 365,
50
+ __builtin_OuterProductAccumulate (input_vector1, input_vector2,
51
+ rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
52
+ opa_matrix_layout, opa_matrix_stride);
53
+
54
+ const uint va_matrix_offset = 0 ;
55
+
56
+ // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 366,
57
+ __builtin_VectorAccumulate (input_vector1, rw_matrix_buffer,
58
+ va_matrix_offset);
59
59
}
0 commit comments