Skip to content

Commit 6643349

Browse files
committed
arcv: RPX-100 add mul-mem bypasses
1 parent 9c40215 commit 6643349

2 files changed

Lines changed: 90 additions & 3 deletions

File tree

gcc/config/riscv/arcv-rpx100.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
condmove,mvpair,zicond,cpop,clmul"))
4343
"((arcv_rpx100_issueA_fuse0 + arcv_rpx100_ALU_A_fuse0_early) | (arcv_rpx100_issueA_fuse1 + arcv_rpx100_ALU_A_fuse1_early)) | ((arcv_rpx100_issueB_fuse0 + arcv_rpx100_ALU_B_fuse0_early) | (arcv_rpx100_issueB_fuse1 + arcv_rpx100_ALU_B_fuse1_early))")
4444

45-
(define_insn_reservation "arcv_rpx100_imul_fused" 4
45+
(define_insn_reservation "arcv_rpx100_mpy32_fused" 4
4646
(and (eq_attr "tune" "arcv_rpx100")
4747
(eq_attr "type" "imul_fused"))
4848
"(arcv_rpx100_issueA_fuse0 + arcv_rpx100_issueA_fuse1 + arcv_rpx100_ALU_A_fuse0_early + arcv_rpx100_ALU_A_fuse1_early + arcv_rpx100_MPY), nothing*3")
@@ -127,5 +127,12 @@
127127
(define_bypass 2 "arcv_rpx100_load_insn" "arcv_rpx100_load_insn")
128128
(define_bypass 1 "arcv_rpx100_load_insn" "arcv_rpx100_div_insn")
129129

130-
(define_bypass 3 "arcv_rpx100_mpy32_insn" "arcv_rpx100_mpy*_insn")
131-
(define_bypass 3 "arcv_rpx100_mpy32_insn" "arcv_rpx100_div_insn")
130+
(define_bypass 3 "arcv_rpx100_mpy32*_insn" "arcv_rpx100_mpy*_insn")
131+
(define_bypass 3 "arcv_rpx100_mpy32*_insn" "arcv_rpx100_div_insn")
132+
(define_bypass 1 "arcv_rpx100_mpy*_insn" "arcv_rpx100_store_insn" "riscv_store_data_bypass_p")
133+
(define_bypass 7 "arcv_rpx100_mpy32*_insn" "arcv_rpx100_load_insn")
134+
(define_bypass 7 "arcv_rpx100_mpy32*_insn" "arcv_rpx100_store_insn")
135+
(define_bypass 9 "arcv_rpx100_mpy64l_insn" "arcv_rpx100_load_insn")
136+
(define_bypass 9 "arcv_rpx100_mpy64l_insn" "arcv_rpx100_store_insn")
137+
(define_bypass 10 "arcv_rpx100_mpy64h_insn" "arcv_rpx100_load_insn")
138+
(define_bypass 10 "arcv_rpx100_mpy64h_insn" "arcv_rpx100_store_insn")
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/* { dg-do compile } */
2+
/* { dg-require-effective-target rv64 } */
3+
/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Oz" "-Os" "-Og" } } */
4+
/* { dg-options "-mtune=arc-v-rpx-100-series -march=rv64imd -mabi=lp64d -fdump-rtl-sched2 -Wno-int-to-pointer-cast" } */
5+
6+
void
7+
foo (int *a)
8+
{
9+
a[2] = a[1] * a[0];
10+
11+
}
12+
13+
void foo_high(long *b) {
14+
b[2] = ((__int128)b[1] * (__int128)b[0]) >> 64;
15+
}
16+
17+
void foo_low(long *b) {
18+
b[2] = b[1] * b[0];
19+
}
20+
21+
void
22+
bar (int *a)
23+
{
24+
int *addr = (int *) (a[1] * a[2]);
25+
*addr = 0;
26+
}
27+
28+
void
29+
bar_low (long *a)
30+
{
31+
long *addr = (long *) (a[1] * a[2]);
32+
*addr = 0;
33+
}
34+
35+
void
36+
bar_high (long *a)
37+
{
38+
long *addr = (long *) (((__int128) a[1] * (__int128) a[2]) >> 64);
39+
*addr = 0;
40+
}
41+
42+
int
43+
baz (int *a)
44+
{
45+
int *addr = (int *) (a[1] * a[2]);
46+
return *addr;
47+
}
48+
49+
long
50+
baz_low (long *a)
51+
{
52+
long *addr = (long *) (a[1] * a[2]);
53+
return *addr;
54+
}
55+
56+
long baz_high(long *a) {
57+
long *addr = (long *)(((__int128) a[1] * (__int128) a[2]) >> 64);
58+
return *addr;
59+
}
60+
61+
/* Verify MULW to STORE data delay is 1 cycle. */
62+
/* { dg-final { scan-rtl-dump "(?s);; Function foo.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+2-->.*\\\[.*0x8\\\]=" "sched2" } } */
63+
/* Verify MULW to STORE address delay is 7 cycles. */
64+
/* { dg-final { scan-rtl-dump "(?s);; Function bar.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+8-->.*\\\[.*\\\]=0" "sched2" } } */
65+
/* Verify MULW to LOAD address delay is 7 cycles. */
66+
/* { dg-final { scan-rtl-dump "(?s);; Function baz.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+8-->.*=.*\\\[.*\\\]" "sched2" } } */
67+
68+
/* Verify MUL to STORE data delay is 1 cycle. */
69+
/* { dg-final { scan-rtl-dump "(?s);; Function foo_low.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+2-->.*\\\[.*0x10\\\]=" "sched2" } } */
70+
/* Verify MUL to STORE address delay is 9 cycles. */
71+
/* { dg-final { scan-rtl-dump "(?s);; Function bar_low.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+10-->.*\\\[.*\\\]=0" "sched2" } } */
72+
/* Verify MUL to LOAD address delay is 9 cycles. */
73+
/* { dg-final { scan-rtl-dump "(?s);; Function baz_low.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+10-->.*=.*\\\[.*\\\]" "sched2" } } */
74+
75+
/* Verify MULH to STORE data delay is 1 cycle. */
76+
/* { dg-final { scan-rtl-dump "(?s);; Function foo_high.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+2-->.*\\\[.*0x10\\\]=" "sched2" } } */
77+
/* Verify MULH to STORE address delay is 10 cycles. */
78+
/* { dg-final { scan-rtl-dump "(?s);; Function bar_high.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+11-->.*\\\[.*\\\]=0" "sched2" } } */
79+
/* Verify MULH to LOAD address delay is 10 cycles. */
80+
/* { dg-final { scan-rtl-dump "(?s);; Function baz_high.*?;;\\s+1-->.*i\\s+\\d+\\s+.*\\*.*:\\(arcv.*?;;\\s+11-->.*=.*\\\[.*\\\]" "sched2" } } */

0 commit comments

Comments
 (0)