Skip to content

Commit 4951c8b

Browse files
RISC-V: Add instruction patterns for 32-bit multiply-add and bit-extract fusion.
This patch adds instruction patterns to support fusion of multiply-add and bit extraction sequences for the Synopsys RHX-100 processor. This increases the likelihood that fusible sequences are produced in more situations. gcc/ChangeLog: * config/riscv/arcv-rhx100.md (arcv_rhx100_imul_fused): New reservation. (arcv_rhx100_alu_fused): New reservation. * config/riscv/iterators.md (is_zero_extract): New code attribute. * config/riscv/riscv.cc (riscv_rtx_costs): Reduce cost for zero_extract for RHX-100. * config/riscv/riscv.md: Add imul_fused and alu_fused type attributes. (umaddhisi4): New expand. (madd_split): New insn_and_split. (madd_split_extended): New insn_and_split. (*zero_extract_fused): New insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/riscv/arcv-fusion-limm-condbr.c: New test. * gcc.target/riscv/arcv-fusion-madd.c: New test. * gcc.target/riscv/arcv-fusion-xbfu.c: New test. Co-authored-by: Artemiy Volkov <artemiyv@acm.org> Co-authored-by: Michiel Derhaeg <michiel@synopsys.com> Signed-off-by: Luis Silva <luiss@synopsys.com>
1 parent ffd24fb commit 4951c8b

7 files changed

Lines changed: 185 additions & 3 deletions

File tree

gcc/config/riscv/arcv-rhx100.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@
4242
condmove,mvpair,zicond,cpop,clmul"))
4343
"((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | (arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | ((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))")
4444

45+
(define_insn_reservation "arcv_rhx100_imul_fused" 4
46+
(and (eq_attr "tune" "arcv_rhx100")
47+
(eq_attr "type" "imul_fused"))
48+
"(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early + arcv_rhx100_MPY32), nothing*3")
49+
50+
(define_insn_reservation "arcv_rhx100_alu_fused" 1
51+
(and (eq_attr "tune" "arcv_rhx100")
52+
(eq_attr "type" "alu_fused"))
53+
"(arcv_rhx100_issueA_fuse0 + arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse0_early + arcv_rhx100_ALU_A_fuse1_early) | (arcv_rhx100_issueB_fuse0 + arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse0_early + arcv_rhx100_ALU_B_fuse1_early)")
54+
4555
(define_insn_reservation "arcv_rhx100_jmp_insn" 1
4656
(and (eq_attr "tune" "arcv_rhx100")
4757
(eq_attr "type" "branch,jump,call,jalr,ret,trap"))

gcc/config/riscv/iterators.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@
225225
(zero_extract "srliw")])
226226
(define_code_attr extract_shift [(sign_extract "ashiftrt")
227227
(zero_extract "lshiftrt")])
228+
(define_code_attr is_zero_extract [(sign_extract "false")
229+
(zero_extract "true")])
228230

229231
;; This code iterator allows the two right shift instructions to be
230232
;; generated from the same template.

gcc/config/riscv/riscv.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4476,7 +4476,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
44764476
}
44774477
gcc_fallthrough ();
44784478
case SIGN_EXTRACT:
4479-
if (TARGET_XTHEADBB && outer_code == SET
4479+
if ((TARGET_ARCV_RHX100 || TARGET_XTHEADBB)
4480+
&& outer_code == SET
44804481
&& CONST_INT_P (XEXP (x, 1))
44814482
&& CONST_INT_P (XEXP (x, 2)))
44824483
{

gcc/config/riscv/riscv.md

Lines changed: 133 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3073,6 +3073,7 @@
30733073
;; * Single-bit extraction (SFB)
30743074
;; * Extraction instruction th.ext(u) (XTheadBb)
30753075
;; * lshrsi3_extend_2 (see above)
3076+
;; * Zero extraction fusion (ARC-V)
30763077
(define_insn_and_split "*<any_extract:optab><GPR:mode>3"
30773078
[(set (match_operand:GPR 0 "register_operand" "=r")
30783079
(any_extract:GPR
@@ -3085,6 +3086,8 @@
30853086
&& (INTVAL (operands[2]) == 1))
30863087
&& !TARGET_XTHEADBB
30873088
&& !TARGET_XANDESPERF
3089+
&& !(TARGET_ARCV_RHX100
3090+
&& <any_extract:is_zero_extract>)
30883091
&& !(TARGET_64BIT
30893092
&& (INTVAL (operands[3]) > 0)
30903093
&& (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
@@ -4525,8 +4528,62 @@
45254528
(mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"))
45264529
(sign_extend:SI (match_operand:HI 2 "register_operand")))
45274530
(match_operand:SI 3 "register_operand")))]
4528-
"TARGET_XTHEADMAC"
4529-
)
4531+
"TARGET_XTHEADMAC || (TARGET_ARCV_RHX100
4532+
&& !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL))"
4533+
{
4534+
if (TARGET_ARCV_RHX100)
4535+
{
4536+
rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
4537+
emit_insn (gen_extendhisi2 (tmp0, operands[1]));
4538+
emit_insn (gen_extendhisi2 (tmp1, operands[2]));
4539+
4540+
if (TARGET_64BIT)
4541+
{
4542+
rtx op0 = gen_reg_rtx (DImode);
4543+
emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
4544+
op0 = gen_lowpart (SImode, op0);
4545+
SUBREG_PROMOTED_VAR_P (op0) = 1;
4546+
SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
4547+
emit_move_insn (operands[0], op0);
4548+
}
4549+
else
4550+
{
4551+
emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
4552+
}
4553+
4554+
DONE;
4555+
}
4556+
})
4557+
4558+
(define_expand "umaddhisi4"
4559+
[(set (match_operand:SI 0 "register_operand")
4560+
(plus:SI
4561+
(mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand"))
4562+
(zero_extend:SI (match_operand:HI 2 "register_operand")))
4563+
(match_operand:SI 3 "register_operand")))]
4564+
"TARGET_ARCV_RHX100
4565+
&& !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
4566+
{
4567+
rtx tmp0 = gen_reg_rtx (SImode), tmp1 = gen_reg_rtx (SImode);
4568+
emit_insn (gen_zero_extendhisi2 (tmp0, operands[1]));
4569+
emit_insn (gen_zero_extendhisi2 (tmp1, operands[2]));
4570+
4571+
if (TARGET_64BIT)
4572+
{
4573+
rtx op0 = gen_reg_rtx (DImode);
4574+
emit_insn (gen_madd_fused_extended (op0, tmp0, tmp1, operands[3]));
4575+
op0 = gen_lowpart (SImode, op0);
4576+
SUBREG_PROMOTED_VAR_P (op0) = 1;
4577+
SUBREG_PROMOTED_SET (op0, SRP_SIGNED);
4578+
emit_move_insn (operands[0], op0);
4579+
}
4580+
else
4581+
{
4582+
emit_insn (gen_madd_fused (operands[0], tmp0, tmp1, operands[3]));
4583+
}
4584+
4585+
DONE;
4586+
})
45304587

45314588
(define_expand "msubhisi4"
45324589
[(set (match_operand:SI 0 "register_operand")
@@ -4537,6 +4594,80 @@
45374594
"TARGET_XTHEADMAC"
45384595
)
45394596

4597+
(define_insn_and_split "madd_fused"
4598+
[(set (match_operand:SI 0 "register_operand" "=&r,r")
4599+
(plus:SI
4600+
(mult:SI (match_operand:SI 1 "register_operand" "r,r")
4601+
(match_operand:SI 2 "register_operand" "r,r"))
4602+
(match_operand:SI 3 "register_operand" "r,?0")))
4603+
(clobber (match_scratch:SI 4 "=&r,&r"))]
4604+
"TARGET_ARCV_RHX100
4605+
&& !TARGET_64BIT && (TARGET_ZMMUL || TARGET_MUL)"
4606+
"#"
4607+
"&& reload_completed"
4608+
[(const_int 0)]
4609+
"{
4610+
if (REGNO (operands[0]) == REGNO (operands[3]))
4611+
{
4612+
emit_insn (gen_mulsi3 (operands[4], operands[1], operands[2]));
4613+
emit_insn (gen_addsi3 (operands[0], operands[3], operands[4]));
4614+
}
4615+
else
4616+
{
4617+
emit_insn (gen_mulsi3 (operands[0], operands[1], operands[2]));
4618+
emit_insn (gen_addsi3 (operands[0], operands[0], operands[3]));
4619+
}
4620+
DONE;
4621+
}"
4622+
[(set_attr "type" "imul_fused")])
4623+
4624+
(define_insn_and_split "madd_fused_extended"
4625+
[(set (match_operand:DI 0 "register_operand" "=&r,r")
4626+
(sign_extend:DI
4627+
(plus:SI
4628+
(mult:SI (match_operand:SI 1 "register_operand" "r,r")
4629+
(match_operand:SI 2 "register_operand" "r,r"))
4630+
(match_operand:SI 3 "register_operand" "r,?0"))))
4631+
(clobber (match_scratch:SI 4 "=&r,&r"))]
4632+
"TARGET_ARCV_RHX100
4633+
&& (TARGET_ZMMUL || TARGET_MUL)"
4634+
"#"
4635+
"&& reload_completed"
4636+
[(const_int 0)]
4637+
"{
4638+
if (REGNO (operands[0]) == REGNO (operands[3]))
4639+
{
4640+
emit_insn (gen_mulsi3_extended (operands[4], operands[1], operands[2]));
4641+
emit_insn (gen_addsi3_extended (operands[0], operands[3], operands[4]));
4642+
}
4643+
else
4644+
{
4645+
emit_insn (gen_mulsi3_extended (operands[0], operands[1], operands[2]));
4646+
emit_insn (gen_addsi3_extended (operands[0], operands[0], operands[3]));
4647+
}
4648+
DONE;
4649+
}"
4650+
[(set_attr "type" "imul_fused")])
4651+
4652+
(define_insn_and_split "*zero_extract_fused"
4653+
[(set (match_operand:SI 0 "register_operand" "=r")
4654+
(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
4655+
(match_operand 2 "const_int_operand")
4656+
(match_operand 3 "const_int_operand")))]
4657+
"TARGET_ARCV_RHX100 && !TARGET_64BIT
4658+
&& (INTVAL (operands[2]) > 1 || !TARGET_ZBS)"
4659+
"#"
4660+
"&& reload_completed"
4661+
[(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2)))
4662+
(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3)))]
4663+
"{
4664+
int amount = INTVAL (operands[2]);
4665+
int end = INTVAL (operands[3]) + amount;
4666+
operands[2] = GEN_INT (BITS_PER_WORD - end);
4667+
operands[3] = GEN_INT (BITS_PER_WORD - amount);
4668+
}"
4669+
[(set_attr "type" "alu_fused")])
4670+
45404671
;; String compare with length insn.
45414672
;; Argument 0 is the target (result)
45424673
;; Argument 1 is the source1
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2 -mtune=arc-v-rhx-100-series" } */
3+
4+
int
5+
f (int x)
6+
{
7+
begin:
8+
if (x <= 3)
9+
goto begin;
10+
}
11+
12+
/* { dg-final { scan-assembler "\\sli\\sa5,3\n\\sble\\sa0,a5,.L\[0-9\]+\n" } } */
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* { dg-do compile } */
2+
/* { dg-require-effective-target rv32 } */
3+
/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
4+
/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im -mabi=ilp32" } */
5+
6+
int
7+
f (int x, int y, int z, int v, int w)
8+
{
9+
return x + y * z + v * w;
10+
}
11+
12+
/* { dg-final { scan-assembler {\smul\s([ast][0-9]+),a1,a2\n\sadd\s\1,\1,a0\n\smul\sa0,a3,a4\n\sadd\sa0,a0,\1\n} } } */
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* { dg-do compile } */
2+
/* { dg-require-effective-target rv32 } */
3+
/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" "-Oz" "-Os" } } */
4+
/* { dg-options "-mtune=arc-v-rhx-100-series -march=rv32im_zbs -mabi=ilp32" } */
5+
6+
#define bit_extract(x,start,amt) (((x)>>(start)) & (~(0xffffffff << (amt))))
7+
8+
int
9+
f (int x)
10+
{
11+
return bit_extract(x,10,14) + bit_extract(x,1,1);
12+
}
13+
14+
/* { dg-final { scan-assembler {\sslli\s([ast][0-9]+),a0,8\n\ssrli\s([ast][0-9]+),\1,18\n\sbexti\sa0,a0,1.*\n\sadd\sa0,\2,a0.*\n} } } */

0 commit comments

Comments
 (0)