wmma tf32 tests

carstenbauer · carstenbauer · commit b93511f73137 · 2022-06-13T23:39:57.000+02:00
diff --git a/test/device/intrinsics/wmma.jl b/test/device/intrinsics/wmma.jl
@@ -6,8 +6,9 @@ map_ptx_to_jl_frag = Dict(
                             "u32" => UInt32(42),
                             "s32" => Int32(42),
                             "f16" => ntuple(i -> VecElement{Float16}(42), 2),
-                            "f32" => Float32(42)
-                            )  
+                            "f32" => Float32(42),
+                            "tf32" => Float32(42)
+                            )
 # Return specific matrix shape given operation configuration
 function get_array_shape(mat, mnk, layout)
     if !(mat in ["a","b","c","d"])
@@ -46,13 +47,13 @@ end
             # Type-dependent variables
             array_ty = CUDA.WMMA.map_ptx_to_jl_array[elem_type]
             expected = map_ptx_to_jl_frag[elem_type]
-            
+
             # Address-space dependent variables
             do_shared_test = (addr_space == "_shared")
 
             # Get the function name
             func = Symbol("llvm_wmma_load_$(mat)_$(layout)_$(shape)$(addr_space)_stride_$(elem_type)")
-            
+
             input_shape = get_array_shape(mat, mnk, layout)
             input       = array_ty(42) * ones(array_ty, input_shape)
             input_dev   = CuArray(input)
@@ -96,7 +97,7 @@ end
             elem_type in ops[3],
             addr_space in ["", "_global", "_shared"],
             stride in ["stride"]
-            
+
             # Skip all but d matrices
             if mat != "d"
                 continue
@@ -169,9 +170,9 @@ end
             ldc_func = getfield(Main, Symbol("llvm_wmma_load_c_col_$(shape)_global_stride_$(c_elem_type)"))
             # Account for half and int/subint mma different naming conventions
             # Int/subint mma functions are distinguished by the a/b element type
-            mma_sym = d_ty == Int32 ? Symbol("llvm_wmma_mma_$(a_layout)_$(b_layout)_$(shape)_$(ab_elem_type)") :
+            mma_sym = (d_ty == Int32 || ab_elem_type == "tf32") ? Symbol("llvm_wmma_mma_$(a_layout)_$(b_layout)_$(shape)_$(ab_elem_type)") :
                                       Symbol("llvm_wmma_mma_$(a_layout)_$(b_layout)_$(shape)_$(d_elem_type)_$(c_elem_type)")
-            mma_func = getfield(Main, mma_sym)               
+            mma_func = getfield(Main, mma_sym)
             std_func = getfield(Main, Symbol("llvm_wmma_store_d_col_$(shape)_global_stride_$(d_elem_type)"))
 
             a_shape   = get_array_shape("a", mnk, a_layout)
@@ -205,9 +206,9 @@ end
             new_a = (a_layout == "col" ? a : transpose(a))
             new_b = (b_layout == "col" ? b : transpose(b))
             # Alter test depending on a/b element Type
-            if ab_ty == Float16
+            if ab_ty == Float16 || ab_elem_type == "tf32"
                 @test new_a * new_b + c ≈ Array(d_dev) rtol=Base.rtoldefault(Float16)
-            else # Cast a and b to prevent UInt8 rollover of resultant data            
+            else # Cast a and b to prevent UInt8 rollover of resultant data
                 @test Int32.(new_a) * Int32.(new_b) + c == Array(d_dev)
             end
         end
@@ -344,4 +345,4 @@ end
         @test !occursin(r"wmma.store.d.sync(.aligned)?.col.m16n16k16.f32", ptx)
         @test  occursin(r"wmma.store.d.sync(.aligned)?.col.m16n16k16.shared.f32", ptx)
     end
-end
+end