Skip to content

Commit 24b39e3

Browse files
committed
initial fix for P10019563-77915
fix to ensure the that the hs5x target produces correct code despite lacking 64-bit load/store instructions. I made use of the split mechanism enabled by the # keyword which forces the compiler to look for a define_split. Then the split pattern that takes a single 64bit memory move and breaks it into two 32-bit moves using gen_lowpart and gen_highpart. I then tried to run dejagnu and found failures that were not due to my patch.
1 parent 544ca30 commit 24b39e3

5 files changed

Lines changed: 296 additions & 21 deletions

File tree

gcc/config/arc64/arc64.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,7 +1680,8 @@ arc64_get_effective_mode_for_address_scaling (const machine_mode mode)
16801680
{
16811681
if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2))
16821682
{
1683-
gcc_assert (DOUBLE_LOAD_STORE);
1683+
if (!TARGET_LL64)
1684+
gcc_assert (DOUBLE_LOAD_STORE);
16841685
return Pmode;
16851686
}
16861687
return mode;
@@ -6117,7 +6118,7 @@ arc64_split_double_move_p (rtx *operands, machine_mode mode)
61176118
return false;
61186119
}
61196120

6120-
/* Evereything else is going for a split. */
6121+
/* Everything else is going for a split. */
61216122
return true;
61226123
}
61236124

@@ -6135,8 +6136,7 @@ arc64_split_double_move (rtx *operands, machine_mode mode)
61356136
machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD).require ();
61366137

61376138
/* Maximum size handled is twice UNITS_PER_WORD. */
6138-
gcc_assert (iregs <= 2);
6139-
6139+
gcc_assert(iregs <= 2);
61406140
/* This procedure works as long as the width of the fp regs is the
61416141
same as the width of r regs. */
61426142
if (FLOAT_MODE_P (mode))

gcc/config/arc64/arc64.md

Lines changed: 224 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -865,8 +865,8 @@ xorl"
865865

866866
(define_insn "*arc64_movsi"
867867
[(set
868-
(match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor")
869-
(match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r"))
868+
(match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor,w,r,w")
869+
(match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r,r,w,w"))
870870
]
871871
"register_operand (operands[0], SImode)
872872
|| register_operand (operands[1], SImode)
@@ -889,9 +889,12 @@ xorl"
889889
pop_s\\t%0
890890
ld%U1\\t%0,%1
891891
push_s\\t%1
892-
st%U0\\t%1,%0"
893-
[(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st")
894-
(set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,2,*,2,*")]
892+
st%U0\\t%1,%0
893+
fmov\\t%0,%1
894+
fmov\\t%0,%1
895+
fmov\\t%0,%1"
896+
[(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st,fmov,fmov,fmov")
897+
(set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,2,*,2,*,4,4,4")]
895898
)
896899

897900
(define_insn "*mov<mode>_cmp0"
@@ -927,24 +930,79 @@ xorl"
927930
;; For a fp move I use FSMOV.<cc> instruction. However, we can also
928931
;; use FSSGNJ.
929932
;; FIXME! add short instruction selection
933+
;;(define_insn "*mov<mode>_hardfp"
934+
;; [(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
935+
;; (match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))]
936+
;; "ARC64_HAS_FP_BASE
937+
;; && (register_operand (operands[0], <MODE>mode)
938+
;; || register_operand (operands[1], <MODE>mode))"
939+
;; "@
940+
;; f<sfxtab>mov\\t%0,%1
941+
;; fld<sizef>%U1\\t%0,%1
942+
;; fst<sizef>%U0\\t%1,%0
943+
;; fmv<fmvftab>2<fmvitab>\\t%0,%1
944+
;; fmv<fmvitab>2<fmvftab>\\t%0,%1
945+
;; mov<mcctab>\\t%0,%1
946+
;; mov<mcctab>\\t%0,%1
947+
;; ld<slfp>%U1\\t%0,%1
948+
;; st<slfp>%U0\\t%1,%0"
949+
;; [(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
950+
;; (set_attr "length" "4,*,*,4,4,4,8,*,*")])
951+
930952
(define_insn "*mov<mode>_hardfp"
931953
[(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
932954
(match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))]
933955
"ARC64_HAS_FP_BASE
934956
&& (register_operand (operands[0], <MODE>mode)
935957
|| register_operand (operands[1], <MODE>mode))"
936-
"@
937-
f<sfxtab>mov\\t%0,%1
938-
fld<sizef>%U1\\t%0,%1
939-
fst<sizef>%U0\\t%1,%0
940-
fmv<fmvftab>2<fmvitab>\\t%0,%1
941-
fmv<fmvitab>2<fmvftab>\\t%0,%1
942-
mov<mcctab>\\t%0,%1
943-
mov<mcctab>\\t%0,%1
944-
ld<slfp>%U1\\t%0,%1
945-
st<slfp>%U0\\t%1,%0"
958+
{
959+
switch (which_alternative)
960+
{
961+
case 0:
962+
return "f<sfxtab>mov\\t%0,%1";
963+
case 1:
964+
return "fld<sizef>%U1\\t%0,%1";
965+
case 2:
966+
return "fst<sizef>%U0\\t%1,%0";
967+
case 3:
968+
return "fmv<fmvftab>2<fmvitab>\\t%0,%1";
969+
case 4:
970+
return "fmv<fmvitab>2<fmvftab>\\t%0,%1";
971+
case 5:
972+
return "mov<mcctab>\\t%0,%1";
973+
case 6:
974+
return "mov<mcctab>\\t%0,%1";
975+
case 7:
976+
/* opt 7 (*r, *m) - This is the Load.
977+
It moves 64 bits from Memory into a general purpose pair (e.g. r0:r1).
978+
*/
979+
case 8:
980+
/* option 8 (*Ustor, *r) - is a Store.
981+
It moves 64 bits from a GPR pair back into Memory.
982+
*/
983+
if (GET_MODE_SIZE (<MODE>mode) == 8 && !TARGET_LL64)
984+
/*for hx5s with fp unit that supports double precision fp
985+
we trigger the splitter*/
986+
return "#";
987+
return (which_alternative == 7) ?
988+
"ld<slfp>%U1\\t%0,%1" :
989+
"st<slfp>%U0\\t%1,%0";
990+
default: gcc_unreachable ();
991+
}
992+
}
946993
[(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
947-
(set_attr "length" "4,*,*,4,4,4,8,*,*")])
994+
(set_attr "length" "4,*,*,4,4,4,8,*,*")
995+
;; disable fpu to/from Memory 64-bit on hs5x.
996+
;; This forces the compiler to use general purpose registers as a buffer
997+
;; becasue the alternatives 1 and 2 are disabled.
998+
(set (attr "enabled")
999+
(if_then_else
1000+
(and (match_test "GET_MODE_SIZE (<MODE>mode) == 8")
1001+
(not (match_test "TARGET_LL64"))
1002+
(ior (eq_attr "alternative" "1")
1003+
(eq_attr "alternative" "2")))
1004+
(const_string "no")
1005+
(const_string "yes")))])
9481006

9491007
;; move 128bit
9501008
(define_insn_and_split "*mov<mode>_insn"
@@ -968,12 +1026,127 @@ xorl"
9681026
}
9691027
[(set_attr "type" "move,ld,ld,st,st")
9701028
(set_attr "length" "8,2,*,2,*")])
1029+
1030+
;;(define_insn "*arc64_movdi"
1031+
;; [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
1032+
;; (match_operand:DI 1 "general_operand" "r,m,r"))]
1033+
;; "register_operand (operands[0], DImode) || register_operand (operands[1], DImode)"
1034+
;; "@
1035+
;; mov %0,%1
1036+
;; #
1037+
;; #"
1038+
;; [(set_attr "type" "move,ld,st")
1039+
;; (set (attr "enabled")
1040+
;; (cond [(eq_attr "type" "move" )
1041+
;; (const_string "yes")
1042+
;; (match_test "TARGET_LL64")
1043+
;; (const_string "yes") ]
1044+
;; (const_string "no")))
1045+
;; (set_attr "length" "4,8,8")])
1046+
1047+
;; the split applys for targets where there is no 64 bit ld/st
1048+
;; e.g. -mfpu=fpud -mcpu=hs5x
1049+
1050+
(define_split
1051+
[(set (match_operand:GPF_HF 0 "nonimmediate_operand" "")
1052+
(match_operand:GPF_HF 1 "general_operand" ""))]
1053+
"reload_completed
1054+
&& GET_MODE_SIZE (<MODE>mode) == 8
1055+
&& !TARGET_LL64"
1056+
[(const_int 0)]
1057+
{
1058+
rtx dest = operands[0];
1059+
rtx src = operands[1];
1060+
rtx hi_dest, hi_src, lo_dest, lo_src;
1061+
1062+
/*extract source lo/hi */
1063+
if (MEM_P (src))
1064+
{
1065+
rtx addr = XEXP (src, 0);
1066+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1067+
addr = XEXP (addr, 0);
1068+
1069+
rtx clean_mem = change_address (src, SImode, addr);
1070+
lo_src = adjust_address (clean_mem, SImode, 0);
1071+
hi_src = adjust_address (clean_mem, SImode, 4);
1072+
}
1073+
else if (REG_P (src))
1074+
{
1075+
/* Manually force SImode for source registers */
1076+
lo_src = gen_rtx_REG (SImode, REGNO (src));
1077+
hi_src = gen_rtx_REG (SImode, REGNO (src) + 1);
1078+
}
1079+
else
1080+
{
1081+
lo_src = operand_subword_force (src, 0, <MODE>mode);
1082+
hi_src = operand_subword_force (src, 1, <MODE>mode);
1083+
}
1084+
1085+
/*extract destination lo/hi */
1086+
if (MEM_P (dest))
1087+
{
1088+
rtx addr = XEXP (dest, 0);
1089+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1090+
addr = XEXP (addr, 0);
1091+
1092+
rtx clean_mem = change_address (dest, SImode, addr);
1093+
lo_dest = adjust_address (clean_mem, SImode, 0);
1094+
hi_dest = adjust_address (clean_mem, SImode, 4);
1095+
}
1096+
else if (REG_P (dest))
1097+
{
1098+
/* Manually force SImode for destination registers */
1099+
lo_dest = gen_rtx_REG (SImode, REGNO (dest));
1100+
hi_dest = gen_rtx_REG (SImode, REGNO (dest) + 1);
1101+
}
1102+
else
1103+
{
1104+
lo_dest = operand_subword_force (dest, 0, <MODE>mode);
1105+
hi_dest = operand_subword_force (dest, 1, <MODE>mode);
1106+
}
1107+
1108+
/* preincr - r1 has DImode */
1109+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == PRE_INC)
1110+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0), XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1111+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == PRE_INC)
1112+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0), XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1113+
1114+
/* emit SI moves - both are now SImode */
1115+
if (reg_overlap_mentioned_p (lo_dest, hi_src))
1116+
{ /*move from r0:r1 into r1:r2
1117+
in this case:
1118+
lo_src = r0
1119+
hi_src = r1
1120+
lo_dest = r1
1121+
hi_dest = r2 so we follow the following order*/
1122+
emit_move_insn (hi_dest, hi_src);
1123+
emit_move_insn (lo_dest, lo_src);
1124+
}
1125+
else
1126+
{ /*move r1:r2 to r0:r1
1127+
lo_src = r1
1128+
hi_src = r2
1129+
lo_dest = r0
1130+
hi_dest = r1*/
1131+
emit_move_insn (lo_dest, lo_src);
1132+
emit_move_insn (hi_dest, hi_src);
1133+
}
1134+
1135+
/* emit post increments */
1136+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == POST_INC)
1137+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0), XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1138+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == POST_INC)
1139+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0), XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1140+
1141+
DONE;
1142+
})
1143+
9711144
;;
9721145
;; Short insns: movl_s g,h; movl_s b,u8
9731146
;; Long insns: movl, stl, ldl
9741147
;;
9751148
(define_insn "*arc64_movdi"
976-
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1149+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
9771150
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
9781151
"TARGET_64BIT
9791152
&& (register_operand (operands[0], DImode)
@@ -999,6 +1172,40 @@ xorl"
9991172
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")]
10001173
)
10011174

1175+
(define_insn "*arc64_movdi"
1176+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1177+
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
1178+
"TARGET_64BIT
1179+
&& (register_operand (operands[0], DImode)
1180+
|| register_operand (operands[1], DImode)
1181+
|| (CONST_INT_P (operands[1])
1182+
&& satisfies_constraint_Ucnst (operands[0])))"
1183+
"@
1184+
movl_s\\t%0,%1
1185+
movl_s\\t%0,%1
1186+
bclrl\\t%0,%q1,%t1
1187+
bsetl\\t%0,%L1,%T1
1188+
movl\\t%0,%1
1189+
movl\\t%0,%1
1190+
movl\\t%0,%1
1191+
vpack2wl\\t%0,%L1,%H1
1192+
addl\\t%0,pcl,%1
1193+
#
1194+
popl_s\\t%0
1195+
#
1196+
pushl_s\\t%1
1197+
#"
1198+
[(set_attr "type" "move,move,bclr,bset,move,move,move,vpack,addl,st,ld,ld,st,st")
1199+
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")
1200+
(set (attr "enabled")
1201+
(cond [(match_test "TARGET_LL64")
1202+
(const_string "yes")
1203+
1204+
(eq_attr "type" "ld,st")
1205+
(const_string "no")]
1206+
(const_string "yes")))
1207+
])
1208+
10021209
;; Hi/Low moves for constant and symbol loading.
10031210

10041211
(define_insn "*movdi_high"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* { dg-do run } */
2+
3+
#include <stdlib.h>
4+
5+
int test0(){
6+
float a, b;
7+
double c, d;
8+
a = (float)rand()/(float)(RAND_MAX);
9+
b = (float)rand()/(float)(RAND_MAX);
10+
c = (double)rand()/(double)(RAND_MAX);
11+
d = (double)rand()/(double)(RAND_MAX);
12+
return (int)((double)a + (double)b + c + d);
13+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* { dg-do run } */
2+
/* { dg-options "-O2 -mcpu=hs5x -mfpu=fpud" } */
3+
/* Standard success is return 0 */
4+
volatile double a = 10.0;
5+
volatile double b = 5.0;
6+
7+
int main() {
8+
double res = a - b;
9+
if (res == 5.0)
10+
return 0;
11+
return 1;
12+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/* { dg-do run } */
2+
/* { dg-additional-options "-ftree-slp-vectorize" } */
3+
4+
typedef double aligned_double __attribute__((aligned(2*sizeof(double))));
5+
6+
void __attribute__((noipa))
7+
bar (int aligned, double *p)
8+
{
9+
if (aligned)
10+
{
11+
*(aligned_double *)p = 3.;
12+
p[1] = 4.;
13+
}
14+
else
15+
{
16+
p[2] = 0.;
17+
p[3] = 1.;
18+
}
19+
}
20+
21+
void __attribute__((noipa))
22+
foo (int i)
23+
{
24+
if (i)
25+
__builtin_exit (0);
26+
}
27+
void __attribute__((noipa))
28+
baz (double *p)
29+
{
30+
p[0] = 0.;
31+
p[1] = 1.;
32+
foo (1);
33+
*(aligned_double *)p = 3.;
34+
p[1] = 4.;
35+
}
36+
37+
double x[8] __attribute__((aligned(2*sizeof (double))));
38+
int main()
39+
{
40+
bar (0, &x[1]);
41+
baz (&x[1]);
42+
return 0;
43+
}

0 commit comments

Comments
 (0)