From 672f2e3c96f4bc6e03a6d67668abcf87b5437c13 Mon Sep 17 00:00:00 2001 From: James Newling Date: Fri, 28 Feb 2025 09:00:00 -0800 Subject: [PATCH] first commit --- .../matmul_template/matmul_generator.py | 17 ++++++- .../matmul_trunci_scaling_MxK_KxN.mlir | 13 ++--- build_tools/ci/cpu_comparison/run.py | 50 +++++++++++-------- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/build_tools/ci/cpu_comparison/matmul_template/matmul_generator.py b/build_tools/ci/cpu_comparison/matmul_template/matmul_generator.py index f02e17023..6511b0b83 100644 --- a/build_tools/ci/cpu_comparison/matmul_template/matmul_generator.py +++ b/build_tools/ci/cpu_comparison/matmul_template/matmul_generator.py @@ -10,7 +10,19 @@ def get_higher_order_element_type(element_type): def generate_matmul_test( - output_fn, input_fn, m, n, k, lhs_rhs_type, acc_type, b=0, m0=0, n0=0, k0=0 + output_fn, + input_fn, + m, + n, + k, + lhs_rhs_type, + acc_type, + b=0, + m0=0, + n0=0, + k0=0, + trunci_scale=None, + trunci_shift=None, ): """ Generate mlir file (output_fn) from the template file (input_fn). @@ -34,6 +46,9 @@ def generate_matmul_test( # This is only used for batch matmul. replace["B"] = b + replace["TRUNCI_SCALE"] = trunci_scale + replace["TRUNCI_SHIFT"] = trunci_shift + # m0, n0, k0 are only used for matmul4d as inner dim sizes. replace["M0"] = m0 replace["N0"] = n0 diff --git a/build_tools/ci/cpu_comparison/matmul_template/matmul_trunci_scaling_MxK_KxN.mlir b/build_tools/ci/cpu_comparison/matmul_template/matmul_trunci_scaling_MxK_KxN.mlir index 5ed4a849b..8759d8938 100644 --- a/build_tools/ci/cpu_comparison/matmul_template/matmul_trunci_scaling_MxK_KxN.mlir +++ b/build_tools/ci/cpu_comparison/matmul_template/matmul_trunci_scaling_MxK_KxN.mlir @@ -2,15 +2,16 @@ // input ${K}x${N}x${TYPE1} // Matmul + Trunci variant with scaling. -// In an actual quantized model, truncating from a higher bitwidth to a lower precision bitwidth -// won't work and we need to scale. -// Since the output of the Matmul here is an integer cannot be multiplied with a floating point -// scale factor, we need to represent the scale factor with a multiplier and a shift operator instead. +// In an actual quantized model, truncating from a higher bitwidth to a lower +// precision bitwidth won't work and we need to scale. Since the output of the +// matmul here is an integer cannot be multiplied with a floating point +// scale factor, we need to represent the scale factor with a multiplier and a +// shift operator instead. func.func @matmul_trunci(%arg0: tensor<${M}x${K}x${TYPE1}>, %arg1: tensor<${K}x${N}x${TYPE1}>) -> tensor<${M}x${N}x${TYPE1}> { %cst = arith.constant ${ZERO} : ${TYPE2} - %cst_mul = arith.constant 10 : ${TYPE_MUL_RESULT} - %cst_shift = arith.constant 7 : ${TYPE_MUL_RESULT} + %cst_mul = arith.constant ${TRUNCI_SCALE} : ${TYPE_MUL_RESULT} + %cst_shift = arith.constant ${TRUNCI_SHIFT} : ${TYPE_MUL_RESULT} %0 = tensor.empty() : tensor<${M}x${N}x${TYPE2}> %i8out = tensor.empty() : tensor<${M}x${N}x${TYPE1}> %1 = linalg.fill ins(%cst : ${TYPE2}) outs(%0 : tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}> diff --git a/build_tools/ci/cpu_comparison/run.py b/build_tools/ci/cpu_comparison/run.py index f081a2b4f..ccbb9bd9f 100755 --- a/build_tools/ci/cpu_comparison/run.py +++ b/build_tools/ci/cpu_comparison/run.py @@ -733,10 +733,9 @@ def __init__( K, input_type, acc_type, - lhs, - rhs, - expected_out, test_params=None, + trunci_scale=10, + trunci_shift=7, ): super().__init__( name=f"matmul_scale_trunci_{M}_{N}_{K}_{input_type}_{acc_type}", @@ -749,23 +748,42 @@ def __init__( ) self.labels.append("MatmulScaleTrunci") - # Assertions on shapes: Check that lhs is MxK, rhs is KxN, and expected_out is MxN - assert lhs.shape == (M, K) - assert rhs.shape == (K, N) - assert expected_out.shape == (M, N) + self.trunci_scale = trunci_scale + self.trunci_shift = trunci_shift + self.lhs = np.random.randint(0, 3, (self.M, self.K)).astype(np.int8) + self.rhs = np.random.randint(0, 3, (self.K, self.N)).astype(np.int8) + self.expected_out = np.right_shift( + ( + (self.lhs.astype(np.int32) @ self.rhs.astype(np.int32)) + * self.trunci_scale + ), + self.trunci_shift, + ).astype(np.int8) - self.lhs = lhs - self.rhs = rhs - self.expected_out = expected_out + # Assertions on shapes: Check that lhs is MxK, rhs is KxN, and expected_out is MxN + assert self.expected_out.shape == (M, N) def _execute(self, config): matmul_template_dir = config.file_dir / "matmul_template" template_name = matmul_template_dir / "matmul_trunci_scaling_MxK_KxN.mlir" - self.generate(config, template_name) + + generate_matmul_test( + self.get_filename(config), + template_name, + k=self.K, + m=self.M, + n=self.N, + lhs_rhs_type=self.input_type, + acc_type=self.acc_type, + trunci_scale=self.trunci_scale, + trunci_shift=self.trunci_shift, + ) + filename = self.get_filename(config) input_args = generate_inputs( filename, self.get_dir(config), 1, {1: self.lhs, 2: self.rhs} ) + aie_vs_baseline( config=config, aie_compilation_flags=self.aie_compilation_flags, @@ -1576,9 +1594,6 @@ def __init__(self): 128, "i8", "i32", - 2 * np.ones([256, 128], dtype=np.int8), - 3 * np.ones([128, 256], dtype=np.int8), - 60 * np.ones([256, 256], dtype=np.int8), test_params=TestParams( name_suffix="scaling", tile_pipeline="pack-peel-4-level-tiling", @@ -1599,9 +1614,6 @@ def __init__(self): 128, "i8", "i32", - 2 * np.ones([256, 128], dtype=np.int8), - 3 * np.ones([128, 256], dtype=np.int8), - 60 * np.ones([256, 256], dtype=np.int8), test_params=TestParams( tile_pipeline="pack-peel-4-level-tiling", run_on_target=["npu1_4col"], @@ -1620,9 +1632,6 @@ def __init__(self): 128, "i8", "i32", - 2 * np.ones([256, 128], dtype=np.int8), - 3 * np.ones([128, 256], dtype=np.int8), - 60 * np.ones([256, 256], dtype=np.int8), test_params=TestParams( tile_pipeline="pack-peel-4-level-tiling", run_on_target=["npu4"], @@ -1636,6 +1645,7 @@ def __init__(self): ), ) ) + # Matmul with truncf test(s): for tile_pipeline in ["pack-peel", "pack-peel-4-level-tiling"]: self.register(