Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 61 additions & 12 deletions gcc/config/riscv/arcv-rhx100.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
(define_cpu_unit "arcv_rhx100_DIV" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_DMP_fuse0" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_DMP_fuse1" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_DMP_FPU" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_FPU" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_fdivsqrt" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_issueA_fuse0" "arcv_rhx100")
(define_cpu_unit "arcv_rhx100_issueA_fuse1" "arcv_rhx100")
Expand Down Expand Up @@ -69,29 +71,72 @@

(define_insn_reservation "arcv_rhx100_load_insn" 3
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "load,fpload"))
(eq_attr "type" "load"))
"(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)")

(define_insn_reservation "arcv_rhx100_store_insn" 1
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "store,fpstore"))
(eq_attr "type" "store"))
"(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)")

;; (soft) floating points
(define_insn_reservation "arcv_rhx100_xfer" 3
(define_insn_reservation "arcv_rhx100_fpload_insn" 1
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp"))
"(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early), nothing*2")
(eq_attr "type" "fpload"))
"arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_FPU")

(define_insn_reservation "arcv_rhx100_fmul" 5
(define_insn_reservation "arcv_rhx100_fpstore_insn" 1
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "fadd,fmul,fmadd"))
"(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early)")
(eq_attr "type" "fpstore"))
"arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_FPU")

(define_insn_reservation "arcv_rhx100_fdiv" 20
(define_insn_reservation "arcv_rhx100_fmove" 2
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "fdiv,fsqrt"))
"arcv_rhx100_fdivsqrt*20")
(eq_attr "type" "mtc,fmove"))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fcvt" 3
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "fcvt,fcvt_i2f"))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fmv_x" 5
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "mfc"))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fcmp" 5
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "fcmp"))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fcvt_f2i" 6
(and (eq_attr "tune" "arcv_rhx100")
(eq_attr "type" "fcvt_f2i"))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fmul_dp" 5
(and (eq_attr "tune" "arcv_rhx100")
(and (eq_attr "type" "fadd,fmul,fmadd")
(eq_attr "mode" "DF")))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fmul" 4
(and (eq_attr "tune" "arcv_rhx100")
(and (eq_attr "type" "fadd,fmul,fmadd")
(not (eq_attr "mode" "DF"))))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU")

(define_insn_reservation "arcv_rhx100_fdiv_dp" 26
(and (eq_attr "tune" "arcv_rhx100")
(and (eq_attr "type" "fdiv,fsqrt")
(eq_attr "mode" "DF")))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU + arcv_rhx100_fdivsqrt, arcv_rhx100_fdivsqrt*6")

(define_insn_reservation "arcv_rhx100_fdiv" 14
(and (eq_attr "tune" "arcv_rhx100")
(and (eq_attr "type" "fdiv,fsqrt")
(not (eq_attr "mode" "DF"))))
"(arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1) + arcv_rhx100_FPU + arcv_rhx100_fdivsqrt, arcv_rhx100_fdivsqrt*6")

;; Bypasses
(define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
Expand All @@ -107,3 +152,7 @@
(define_bypass 1 "arcv_rhx100_mpy32_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
(define_bypass 7 "arcv_rhx100_mpy32_insn" "arcv_rhx100_store_insn")
(define_bypass 7 "arcv_rhx100_mpy32_insn" "arcv_rhx100_load_insn")

(define_bypass 3 "arcv_rhx100_fmul" "arcv_rhx100_fmul*")
(define_bypass 4 "arcv_rhx100_fmul_dp" "arcv_rhx100_fmul*")
(define_bypass 2 "arcv_rhx100_fmul*" "arcv_rhx100_fmul*" "arcv_fmadd_acc_bypass_p")
67 changes: 45 additions & 22 deletions gcc/config/riscv/arcv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ arcv_arith_type_insn_p (rtx_insn *insn)
|| type == TYPE_CTZ);
}


static bool
arcv_memop_p (rtx_insn *insn)
{
enum attr_type type = get_attr_type (insn);
return (type == TYPE_LOAD || type == TYPE_STORE
|| type == TYPE_FPLOAD || type == TYPE_FPSTORE);
}

/* Helper function to check if the pair of instructions prev/curr
* are comformant with pre- or post-update memory operation.
Examples: load+add, add+load, store+add, add+store. */
Expand Down Expand Up @@ -645,8 +654,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
if (!sched_state.pipeB_scheduled_p && sched_state.last_scheduled_insn
&& ready && *n_readyp > 0
&& !SCHED_GROUP_P (sched_state.last_scheduled_insn)
&& (get_attr_type (sched_state.last_scheduled_insn) == TYPE_LOAD
|| get_attr_type (sched_state.last_scheduled_insn) == TYPE_STORE))
&& arcv_memop_p (sched_state.last_scheduled_insn))
{
for (int i = 1; i <= *n_readyp; i++)
{
Expand All @@ -673,8 +681,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
if ((!sched_state.alu_pipe_scheduled_p || !sched_state.pipeB_scheduled_p)
&& sched_state.last_scheduled_insn && ready && *n_readyp > 0
&& !SCHED_GROUP_P (sched_state.last_scheduled_insn)
&& (get_attr_type (sched_state.last_scheduled_insn) != TYPE_LOAD
&& get_attr_type (sched_state.last_scheduled_insn) != TYPE_STORE))
&& !arcv_memop_p (sched_state.last_scheduled_insn))
{
for (int i = 1; i <= *n_readyp; i++)
{
Expand All @@ -689,8 +696,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
if (GET_CODE (PATTERN (ready[*n_readyp - i])) == USE)
continue;

if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
|| get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
if (arcv_memop_p (ready[*n_readyp - i]))
{
if (sched_state.pipeB_scheduled_p)
continue;
Expand All @@ -715,8 +721,7 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
&& recog_memoized (ready[*n_readyp - 1]) >= 0
&& !SCHED_GROUP_P (ready[*n_readyp - 1])
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
&& arcv_memop_p (ready[*n_readyp - 1]))
{
if (sched_state.alu_pipe_scheduled_p)
return 0;
Expand All @@ -726,14 +731,12 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
rtx_insn* next_insn = arcv_next_fusible_insn (ready[*n_readyp - i]);
if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
&& recog_memoized (ready[*n_readyp - i]) >= 0
&& get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
&& get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
&& !arcv_memop_p (ready[*n_readyp - i])
&& !SCHED_GROUP_P (ready[*n_readyp - i])
&& (!next_insn || !SCHED_GROUP_P (next_insn)))
|| (next_insn
&& recog_memoized (next_insn) >= 0
&& get_attr_type (next_insn) != TYPE_LOAD
&& get_attr_type (next_insn) != TYPE_STORE))
&& !arcv_memop_p (next_insn)))
{
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
sched_state.alu_pipe_scheduled_p = 1;
Expand All @@ -748,19 +751,16 @@ arcv_sched_reorder2 (rtx_insn **ready, int *n_readyp)
if (ready && *n_readyp > 0
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
&& recog_memoized (ready[*n_readyp - 1]) >= 0
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
&& !arcv_memop_p (ready[*n_readyp - 1]))
{
if (!sched_state.pipeB_scheduled_p
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
&& arcv_memop_p (ready[*n_readyp - 1]))
{
sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1;
sched_state.cached_can_issue_more = 1;
return 1;
}
else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
|| get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
else if (!arcv_memop_p (ready[*n_readyp - 1]))
{
sched_state.alu_pipe_scheduled_p = sched_state.pipeB_scheduled_p = 1;
sched_state.cached_can_issue_more = 1;
Expand Down Expand Up @@ -957,10 +957,7 @@ arcv_sched_variable_issue (rtx_insn *insn, int more)
rtx_insn *next = arcv_next_fusible_insn (insn);
if (next && SCHED_GROUP_P (next))
{
if (get_attr_type (insn) == TYPE_LOAD
|| get_attr_type (insn) == TYPE_STORE
|| get_attr_type (next) == TYPE_LOAD
|| get_attr_type (next) == TYPE_STORE)
if (arcv_memop_p (insn) || arcv_memop_p (next))
sched_state.pipeB_scheduled_p = 1;
else
sched_state.alu_pipe_scheduled_p = 1;
Expand All @@ -978,3 +975,29 @@ arcv_sched_variable_issue (rtx_insn *insn, int more)

return sched_state.cached_can_issue_more;
}

/* Check whether out_insn's output reg is passed into input register 3 of
an fmadd instruction. */
bool
arcv_fmadd_acc_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
{
rtx out_set = single_set (out_insn);
rtx in_set = single_set (in_insn);

if (!out_set || !in_set || !REG_P (SET_DEST (out_set)))
return false;

rtx src = SET_SRC (in_set);

if (GET_CODE (src) == NEG)
src = XEXP (src, 0);

if (GET_CODE (src) != FMA)
return false;

rtx acc = XEXP (src, 2);
if (GET_CODE (acc) == NEG)
acc = XEXP (acc, 0);

return REG_P (acc) && REGNO (acc) == REGNO (SET_DEST (out_set));
}
1 change: 1 addition & 0 deletions gcc/config/riscv/riscv-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *);
extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *);
extern bool arcv_ld_1c_bypass_p (rtx_insn *, rtx_insn *);
extern bool arcv_ld_2c_bypass_p (rtx_insn *, rtx_insn *);
extern bool arcv_fmadd_acc_bypass_p (rtx_insn *, rtx_insn *);
extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
extern bool riscv_gpr_save_operation_p (rtx);
extern void riscv_reinit (void);
Expand Down
45 changes: 45 additions & 0 deletions gcc/testsuite/gcc.target/riscv/arcv-sched-fmadd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* { dg-do compile } */
/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-O1" "-O3" "-Oz" "-Os" } } */
/* { dg-options "-O2 -ffp-contract=fast -mtune=arc-v-rhx-100-series -march=rv32ifd_zfh -mabi=ilp32d -fdump-rtl-sched2" } */

float fmadd_s (float a, float b, float c, float d)
{ return a * b + (c * d); }
float fmsub_s (float a, float b, float c, float d)
{ return a * b - (c * d); }
float fnmadd_s (float a, float b, float c, float d)
{ return -(a * b) - (c * d); }
float fnmsub_s (float a, float b, float c, float d)
{ return -(a * b) + (c * d); }

double fmadd_d (double a, double b, double c, double d)
{ return a * b + (c * d); }
double fmsub_d (double a, double b, double c, double d)
{ return a * b - (c * d); }
double fnmadd_d (double a, double b, double c, double d)
{ return -(a * b) - (c * d); }
double fnmsub_d (double a, double b, double c, double d)
{ return -(a * b) + (c * d); }

_Float16 fmadd_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d)
{ return a * b + (c * d); }
_Float16 fmsub_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d)
{ return a * b - (c * d); }
_Float16 fnmadd_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d)
{ return -(a * b) - (c * d); }
_Float16 fnmsub_h (_Float16 a, _Float16 b, _Float16 c, _Float16 d)
{ return -(a * b) + (c * d); }

/* { dg-final { scan-rtl-dump "(?s)Function fmadd_s .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fmsub_s .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_s .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_s .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */

/* { dg-final { scan-rtl-dump "(?s)Function fmadd_d .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fmsub_d .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_d .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_d .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */

/* { dg-final { scan-rtl-dump "(?s)Function fmadd_h .*?2-->.*\\{fa0\\*fa1\\+fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fmsub_h .*?2-->.*\\{fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmadd_h .*?2-->.*\\{-fa0\\*fa1\\+-fa2\\}" "sched2" } } */
/* { dg-final { scan-rtl-dump "(?s)Function fnmsub_h .*?2-->.*\\{fa2\\*fa3\\+-fa0\\}" "sched2" } } */
Loading