From 55e81dc1f4d539a0164869a33738b0e0c4278e36 Mon Sep 17 00:00:00 2001 From: Michiel Derhaeg Date: Fri, 8 May 2026 20:53:23 +0200 Subject: [PATCH] arcv: FPU latencies for RHX-100 --- gcc/config/riscv/arcv-rhx100.md | 73 ++++++++++++++++--- gcc/config/riscv/arcv.cc | 67 +++++++++++------ gcc/config/riscv/riscv-protos.h | 1 + .../gcc.target/riscv/arcv-sched-fmadd.c | 45 ++++++++++++ 4 files changed, 152 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/arcv-sched-fmadd.c diff --git a/gcc/config/riscv/arcv-rhx100.md b/gcc/config/riscv/arcv-rhx100.md index 2724c4bd07d2..57664d022b44 100644 --- a/gcc/config/riscv/arcv-rhx100.md +++ b/gcc/config/riscv/arcv-rhx100.md @@ -28,6 +28,8 @@ (define_cpu_unit "arcv_rhx100_DIV" "arcv_rhx100") (define_cpu_unit "arcv_rhx100_DMP_fuse0" "arcv_rhx100") (define_cpu_unit "arcv_rhx100_DMP_fuse1" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_DMP_FPU" "arcv_rhx100") +(define_cpu_unit "arcv_rhx100_FPU" "arcv_rhx100") (define_cpu_unit "arcv_rhx100_fdivsqrt" "arcv_rhx100") (define_cpu_unit "arcv_rhx100_issueA_fuse0" "arcv_rhx100") (define_cpu_unit "arcv_rhx100_issueA_fuse1" "arcv_rhx100") @@ -69,29 +71,72 @@ (define_insn_reservation "arcv_rhx100_load_insn" 3 (and (eq_attr "tune" "arcv_rhx100") - (eq_attr "type" "load,fpload")) + (eq_attr "type" "load")) "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") (define_insn_reservation "arcv_rhx100_store_insn" 1 (and (eq_attr "tune" "arcv_rhx100") - (eq_attr "type" "store,fpstore")) + (eq_attr "type" "store")) "(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)") -;; (soft) floating points -(define_insn_reservation "arcv_rhx100_xfer" 3 +(define_insn_reservation "arcv_rhx100_fpload_insn" 1 (and (eq_attr "tune" "arcv_rhx100") - (eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp")) - "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early), nothing*2") + (eq_attr "type" "fpload")) + "arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_FPU") -(define_insn_reservation "arcv_rhx100_fmul" 5 +(define_insn_reservation "arcv_rhx100_fpstore_insn" 1 (and (eq_attr "tune" "arcv_rhx100") - (eq_attr "type" "fadd,fmul,fmadd")) - "(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early)") + (eq_attr "type" "fpstore")) + "arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_FPU") -(define_insn_reservation "arcv_rhx100_fdiv" 20 +(define_insn_reservation "arcv_rhx100_fmove" 2 (and (eq_attr "tune" "arcv_rhx100") - (eq_attr "type" "fdiv,fsqrt")) - "arcv_rhx100_fdivsqrt*20") + (eq_attr "type" "mtc,fmove")) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fcvt" 3 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fcvt,fcvt_i2f")) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fmv_x" 5 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "mfc")) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fcmp" 5 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fcmp")) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fcvt_f2i" 6 + (and (eq_attr "tune" "arcv_rhx100") + (eq_attr "type" "fcvt_f2i")) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fmul_dp" 5 + (and (eq_attr "tune" "arcv_rhx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "DF"))) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fmul" 4 + (and (eq_attr "tune" "arcv_rhx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (not (eq_attr "mode" "DF")))) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU") + +(define_insn_reservation "arcv_rhx100_fdiv_dp" 26 + (and (eq_attr "tune" "arcv_rhx100") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "DF"))) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU + arcv_rhx100_fdivsqrt, arcv_rhx100_fdivsqrt*6") + +(define_insn_reservation "arcv_rhx100_fdiv" 14 + (and (eq_attr "tune" "arcv_rhx100") + (and (eq_attr "type" "fdiv,fsqrt") + (not (eq_attr "mode" "DF")))) + "(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU + arcv_rhx100_fdivsqrt, arcv_rhx100_fdivsqrt*6") ;; Bypasses (define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") @@ -107,3 +152,7 @@ (define_bypass 1 "arcv_rhx100_mpy32_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p") (define_bypass 7 "arcv_rhx100_mpy32_insn" "arcv_rhx100_store_insn") (define_bypass 7 "arcv_rhx100_mpy32_insn" "arcv_rhx100_load_insn") + +(define_bypass 3 "arcv_rhx100_fmul" "arcv_rhx100_fmul*") +(define_bypass 4 "arcv_rhx100_fmul_dp" "arcv_rhx100_fmul*") +(define_bypass 2 "arcv_rhx100_fmul*" "arcv_rhx100_fmul*" "arcv_fmadd_acc_bypass_p") diff --git a/gcc/config/riscv/arcv.cc b/gcc/config/riscv/arcv.cc index 2c54f0d97521..9f9d2137da73 100644 --- a/gcc/config/riscv/arcv.cc +++ b/gcc/config/riscv/arcv.cc @@ -204,6 +204,15 @@ arcv_arith_type_insn_p (rtx_insn *insn) || type == TYPE_CTZ); } + +static bool +arcv_memop_p (rtx_insn *insn) +{ + enum attr_type type = get_attr_type (insn); + return (type == TYPE_LOAD || type == TYPE_STORE + || type == TYPE_FPLOAD || type == TYPE_FPSTORE); +} + /* Helper function to check if the pair of instructions prev/curr * are comformant with pre- or post-update memory operation. Examples: load+add, add+load, store+add, add+store. */ @@ -645,8 +654,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn && ready && *n_readyp > 0 && !SCHED_GROUP_P (sched_state.last_scheduled_insn) - && (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD - || get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE)) + && arcv_memop_p (sched_state.last_scheduled_insn)) { for (int i = 1; i <= *n_readyp; i++) { @@ -673,8 +681,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p) && sched_state.last_scheduled_insn && ready && *n_readyp > 0 && !SCHED_GROUP_P (sched_state.last_scheduled_insn) - && (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD - && get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE)) + && !arcv_memop_p (sched_state.last_scheduled_insn)) { for (int i = 1; i <= *n_readyp; i++) { @@ -689,8 +696,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) if (GET_CODE (PATTERN (ready[*n_readyp - i])) == USE) continue; - if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE) + if (arcv_memop_p (ready[*n_readyp - i])) { if (sched_state.pipeB_scheduled_p) continue; @@ -715,8 +721,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) && NONDEBUG_INSN_P (ready[*n_readyp - 1]) && recog_memoized (ready[*n_readyp - 1]) >= 0 && !SCHED_GROUP_P (ready[*n_readyp - 1]) - && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + && arcv_memop_p (ready[*n_readyp - 1])) { if (sched_state.alu_pipe_scheduled_p) return 0; @@ -726,14 +731,12 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]); if ((NONDEBUG_INSN_P (ready[*n_readyp - i]) && recog_memoized (ready[*n_readyp - i]) >= 0 - && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD - && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE + && !arcv_memop_p (ready[*n_readyp - i]) && !SCHED_GROUP_P (ready[*n_readyp - i]) && (!next_insn || !SCHED_GROUP_P (next_insn))) || (next_insn && recog_memoized (next_insn) >= 0 - && get_attr_type (next_insn) != TYPE_LOAD - && get_attr_type (next_insn) != TYPE_STORE)) + && !arcv_memop_p (next_insn))) { std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]); sched_state.alu_pipe_scheduled_p = 1; @@ -748,19 +751,16 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp) if (ready && *n_readyp > 0 && NONDEBUG_INSN_P (ready[*n_readyp - 1]) && recog_memoized (ready[*n_readyp - 1]) >= 0 - && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + && !arcv_memop_p (ready[*n_readyp - 1])) { if (!sched_state.pipeB_scheduled_p - && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE)) + && arcv_memop_p (ready[*n_readyp - 1])) { sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; sched_state.cached_can_issue_more = 1; return 1; } - else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD - || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE) + else if (!arcv_memop_p (ready[*n_readyp - 1])) { sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1; sched_state.cached_can_issue_more = 1; @@ -957,10 +957,7 @@ arcv_sched_variable_issue (rtx_insn *insn, int more) rtx_insn *next = arcv_next_fusible_insn (insn); if (next && SCHED_GROUP_P (next)) { - if (get_attr_type (insn) == TYPE_LOAD - || get_attr_type (insn) == TYPE_STORE - || get_attr_type (next) == TYPE_LOAD - || get_attr_type (next) == TYPE_STORE) + if (arcv_memop_p (insn) || arcv_memop_p (next)) sched_state.pipeB_scheduled_p = 1; else sched_state.alu_pipe_scheduled_p = 1; @@ -978,3 +975,29 @@ arcv_sched_variable_issue (rtx_insn *insn, int more) return sched_state.cached_can_issue_more; } + +/* Check whether out_insn's output reg is passed into input register 3 of + an fmadd instruction. */ +bool +arcv_fmadd_acc_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + rtx out_set = single_set (out_insn); + rtx in_set = single_set (in_insn); + + if (!out_set || !in_set || !REG_P (SET_DEST (out_set))) + return false; + + rtx src = SET_SRC (in_set); + + if (GET_CODE (src) == NEG) + src = XEXP (src, 0); + + if (GET_CODE (src) != FMA) + return false; + + rtx acc = XEXP (src, 2); + if (GET_CODE (acc) == NEG) + acc = XEXP (acc, 0); + + return REG_P (acc) && REGNO (acc) == REGNO (SET_DEST (out_set)); +} diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f8510845210f..6a5d7b55c8a2 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -165,6 +165,7 @@ extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *); extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *); extern bool arcv_ld_1c_bypass_p (rtx_insn *, rtx_insn *); extern bool arcv_ld_2c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_fmadd_acc_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); diff --git a/gcc/testsuite/gcc.target/riscv/arcv-sched-fmadd.c b/gcc/testsuite/gcc.target/riscv/arcv-sched-fmadd.c new file mode 100644 index 000000000000..d86043028799 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arcv-sched-fmadd.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-O1" "-O3" "-Oz" "-Os" } } */ +/* { dg-options "-O2 -ffp-contract=fast -mtune=arc-v-rhx-100-series -march=rv32ifd_zfh -mabi=ilp32d -fdump-rtl-sched2" } */ + +float fmadd_s (float a, float b, float c, float d) +{ return a * b + (c * d); } +float fmsub_s (float a, float b, float c, float d) +{ return a * b - (c * d); } +float fnmadd_s (float a, float b, float c, float d) +{ return -(a * b) - (c * d); } +float fnmsub_s (float a, float b, float c, float d) +{ return -(a * b) + (c * d); } + +double fmadd_d (double a, double b, double c, double d) +{ return a * b + (c * d); } +double fmsub_d (double a, double b, double c, double d) +{ return a * b - (c * d); } +double fnmadd_d (double a, double b, double c, double d) +{ return -(a * b) - (c * d); } +double fnmsub_d (double a, double b, double c, double d) +{ return -(a * b) + (c * d); } + +_Float16 fmadd_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d) +{ return a * b + (c * d); } +_Float16 fmsub_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d) +{ return a * b - (c * d); } +_Float16 fnmadd_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d) +{ return -(a * b) - (c * d); } +_Float16 fnmsub_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d) +{ return -(a * b) + (c * d); } + +/* { dg-final { scan-rtl-dump "(?s)Function fmadd_s .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fmsub_s .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_s .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_s .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */ + +/* { dg-final { scan-rtl-dump "(?s)Function fmadd_d .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fmsub_d .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_d .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_d .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */ + +/* { dg-final { scan-rtl-dump "(?s)Function fmadd_h .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fmsub_h .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_h .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */ +/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_h .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */