Skip to content

Commit f62246a

Browse files
committed
fix for P10019563-77915
fix to ensure the that the hs5x target produces correct code despite lacking 64-bit load/store instructions. I made use of the split mechanism enabled by the # keyword which forces the compiler to make use of pairs of general purpose registers and further make use of the define_split. Then the split pattern takes a single 64bit memory move and breaks it into two 32-bit moves using gen_lowpart and gen_highpart. Below are 3 other possible options: 1. Would have been possible to make use of a scratch register by using a match_scratch. However, I observed that this can lead to reload failures. 2. Alternatively i could have reserved a register by declaring it as fixed register but then this register (in this case a pair of registers) wouldnt be used for other purposes. 3. There is yet another option that makes use of the secondary reload. But in general the secondary reload mechanism introduces spills and restores which are not nice for performance.
1 parent 544ca30 commit f62246a

5 files changed

Lines changed: 290 additions & 25 deletions

File tree

gcc/config/arc64/arc64.cc

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,6 @@ frame_save_reg (rtx reg, HOST_WIDE_INT offset, HOST_WIDE_INT displacement)
648648
}
649649

650650
/* ARC prologue saving regs routine. */
651-
652651
static HOST_WIDE_INT
653652
arc64_save_callee_saves (void)
654653
{
@@ -674,6 +673,26 @@ arc64_save_callee_saves (void)
674673
{
675674
save_mode = ARC64_HAS_FPUD ? DFmode : SFmode;
676675
disp = UNITS_PER_WORD;
676+
677+
/* TODO: still check tests where the stack is manualy written. */
678+
/* If the mode is 64-bit DFmode but the hardware
679+
does not support 64-bit memory operations (!TARGET_LL64),
680+
decompose the save into two sequential 4-byte SFmode saves. */
681+
if (save_mode == DFmode && !TARGET_LL64)
682+
{
683+
/* Save the lower 4-byte sub-register piece (regno) */
684+
rtx reg_lo = gen_rtx_REG (SFmode, regno);
685+
frame_allocated += frame_save_reg (reg_lo, offset, disp);
686+
offset = 0;
687+
688+
/* Save the upper 4-byte sub-register piece (regno + 1).
689+
The displacement or offset is updated implicitly by
690+
the frame tracker inside frame_save_reg. */
691+
rtx reg_hi = gen_rtx_REG (SFmode, regno + 1);
692+
frame_allocated += frame_save_reg (reg_hi, offset, disp);
693+
694+
continue; /* Skip the original single-register save logic */
695+
}
677696
}
678697
else if (regno >= 1
679698
&& (((regno - 1) % 2) == 0)
@@ -1680,7 +1699,8 @@ arc64_get_effective_mode_for_address_scaling (const machine_mode mode)
16801699
{
16811700
if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2))
16821701
{
1683-
gcc_assert (DOUBLE_LOAD_STORE);
1702+
if (!TARGET_LL64)
1703+
gcc_assert (DOUBLE_LOAD_STORE);
16841704
return Pmode;
16851705
}
16861706
return mode;
@@ -5546,9 +5566,19 @@ arc64_expand_prologue (void)
55465566

55475567
frame_allocated = frame->frame_size;
55485568

5549-
frame_allocated -= arc64_save_callee_saves ();
55505569

55515570
/* If something left, allocate. */
5571+
if (ARC_INTERRUPT_P (cfun->machine->fn_type) && !TARGET_LL64)
5572+
{
5573+
if (frame_allocated > 0)
5574+
{
5575+
frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
5576+
frame_allocated = 0;
5577+
}
5578+
}
5579+
5580+
frame_allocated -= arc64_save_callee_saves ();
5581+
55525582
if (frame_allocated > 0)
55535583
frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
55545584

@@ -6117,7 +6147,7 @@ arc64_split_double_move_p (rtx *operands, machine_mode mode)
61176147
return false;
61186148
}
61196149

6120-
/* Evereything else is going for a split. */
6150+
/* Everything else is going for a split. */
61216151
return true;
61226152
}
61236153

@@ -6135,8 +6165,7 @@ arc64_split_double_move (rtx *operands, machine_mode mode)
61356165
machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD).require ();
61366166

61376167
/* Maximum size handled is twice UNITS_PER_WORD. */
6138-
gcc_assert (iregs <= 2);
6139-
6168+
gcc_assert(iregs <= 2);
61406169
/* This procedure works as long as the width of the fp regs is the
61416170
same as the width of r regs. */
61426171
if (FLOAT_MODE_P (mode))

gcc/config/arc64/arc64.md

Lines changed: 187 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -865,8 +865,8 @@ xorl"
865865

866866
(define_insn "*arc64_movsi"
867867
[(set
868-
(match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor")
869-
(match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r"))
868+
(match_operand:SI 0 "arc64_dest_operand" "=w,r,w,qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor")
869+
(match_operand:SI 1 "arc64_movl_operand" "r,w,w,qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r"))
870870
]
871871
"register_operand (operands[0], SImode)
872872
|| register_operand (operands[1], SImode)
@@ -875,6 +875,9 @@ xorl"
875875
|| (CONST_INT_P (operands[1])
876876
&& satisfies_constraint_Ucnst (operands[0]))"
877877
"@
878+
mov\\t%0,%1
879+
mov\\t%0,%1
880+
mov\\t%0,%1
878881
mov_s\\t%0,%1
879882
mov\\t%0,%1
880883
mov_s\\t%0,%1
@@ -890,8 +893,8 @@ xorl"
890893
ld%U1\\t%0,%1
891894
push_s\\t%1
892895
st%U0\\t%1,%0"
893-
[(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st")
894-
(set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,2,*,2,*")]
896+
[(set_attr "type" "fmov,fmov,fmov,move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st")
897+
(set_attr "length" "4,4,4,2,4,2,4,8,6,8,2,2,*,8,2,*,2,*")]
895898
)
896899

897900
(define_insn "*mov<mode>_cmp0"
@@ -924,27 +927,56 @@ xorl"
924927
[(set_attr "type" "move,move,move,move,ld,st,ld,st")
925928
(set_attr "length" "2,4,6,8,2,2,*,*")])
926929

927-
;; For a fp move I use FSMOV.<cc> instruction. However, we can also
928-
;; use FSSGNJ.
929-
;; FIXME! add short instruction selection
930930
(define_insn "*mov<mode>_hardfp"
931931
[(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
932932
(match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))]
933933
"ARC64_HAS_FP_BASE
934934
&& (register_operand (operands[0], <MODE>mode)
935935
|| register_operand (operands[1], <MODE>mode))"
936-
"@
937-
f<sfxtab>mov\\t%0,%1
938-
fld<sizef>%U1\\t%0,%1
939-
fst<sizef>%U0\\t%1,%0
940-
fmv<fmvftab>2<fmvitab>\\t%0,%1
941-
fmv<fmvitab>2<fmvftab>\\t%0,%1
942-
mov<mcctab>\\t%0,%1
943-
mov<mcctab>\\t%0,%1
944-
ld<slfp>%U1\\t%0,%1
945-
st<slfp>%U0\\t%1,%0"
936+
{
937+
switch (which_alternative)
938+
{
939+
case 0:
940+
return "f<sfxtab>mov\\t%0,%1";
941+
case 3:
942+
return "fmv<fmvftab>2<fmvitab>\\t%0,%1";
943+
case 4:
944+
return "fmv<fmvitab>2<fmvftab>\\t%0,%1";
945+
case 5:
946+
return "mov<mcctab>\\t%0,%1";
947+
case 6:
948+
return "mov<mcctab>\\t%0,%1";
949+
case 7:
950+
/* opt 7 (*r, *m) - This is the Load.
951+
It moves 64 bits from Memory into a general purpose pair (e.g. r0:r1).
952+
*/
953+
case 8:
954+
/* option 8 (*Ustor, *r) - is a Store.
955+
It moves 64 bits from a GPR pair back into Memory.
956+
*/
957+
if (GET_MODE_SIZE (<MODE>mode) == 8 && !TARGET_LL64)
958+
/*for hx5s with fp unit that supports double precision fp
959+
we trigger the splitter*/
960+
return "#";
961+
return (which_alternative == 7) ?
962+
"ld<slfp>%U1\\t%0,%1" :
963+
"st<slfp>%U0\\t%1,%0";
964+
default: gcc_unreachable ();
965+
}
966+
}
946967
[(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
947-
(set_attr "length" "4,*,*,4,4,4,8,*,*")])
968+
(set_attr "length" "4,*,*,4,4,4,8,*,*")
969+
;; disable fpu to/from Memory 64 bit on hs5x.
970+
;; This forces the compiler to use general purpose registers as a buffer
971+
;; becasue the alternatives 1 and 2 are disabled.
972+
(set (attr "enabled")
973+
(if_then_else
974+
(and (match_test "GET_MODE_SIZE (<MODE>mode) == 8")
975+
(not (match_test "TARGET_LL64"))
976+
(ior (eq_attr "alternative" "1")
977+
(eq_attr "alternative" "2")))
978+
(const_string "no")
979+
(const_string "yes")))])
948980

949981
;; move 128bit
950982
(define_insn_and_split "*mov<mode>_insn"
@@ -968,12 +1000,114 @@ xorl"
9681000
}
9691001
[(set_attr "type" "move,ld,ld,st,st")
9701002
(set_attr "length" "8,2,*,2,*")])
1003+
1004+
;; the split applys for targets where there is no 64 bit ld/st
1005+
;; e.g. -mfpu=fpud -mcpu=hs5x
1006+
1007+
(define_split
1008+
[(set (match_operand:GPF_HF 0 "nonimmediate_operand" "")
1009+
(match_operand:GPF_HF 1 "general_operand" ""))]
1010+
"reload_completed
1011+
&& GET_MODE_SIZE (<MODE>mode) == 8
1012+
&& !TARGET_LL64"
1013+
[(const_int 0)]
1014+
{
1015+
rtx dest = operands[0];
1016+
rtx src = operands[1];
1017+
rtx hi_dest, hi_src, lo_dest, lo_src;
1018+
1019+
/*extract source lo/hi */
1020+
if (MEM_P (src))
1021+
{
1022+
rtx addr = XEXP (src, 0);
1023+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1024+
addr = XEXP (addr, 0);
1025+
1026+
rtx clean_mem = change_address (src, SImode, addr);
1027+
lo_src = adjust_address (clean_mem, SImode, 0);
1028+
hi_src = adjust_address (clean_mem, SImode, 4);
1029+
}
1030+
else if (REG_P (src))
1031+
{
1032+
/* Manually force SImode for source registers */
1033+
lo_src = gen_rtx_REG (SImode, REGNO (src));
1034+
hi_src = gen_rtx_REG (SImode, REGNO (src) + 1);
1035+
}
1036+
else
1037+
{
1038+
lo_src = operand_subword_force (src, 0, <MODE>mode);
1039+
hi_src = operand_subword_force (src, 1, <MODE>mode);
1040+
}
1041+
1042+
/*extract destination lo/hi */
1043+
if (MEM_P (dest))
1044+
{
1045+
rtx addr = XEXP (dest, 0);
1046+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1047+
addr = XEXP (addr, 0);
1048+
1049+
rtx clean_mem = change_address (dest, SImode, addr);
1050+
lo_dest = adjust_address (clean_mem, SImode, 0);
1051+
hi_dest = adjust_address (clean_mem, SImode, 4);
1052+
}
1053+
else if (REG_P (dest))
1054+
{
1055+
/* Manually force SImode for destination registers */
1056+
lo_dest = gen_rtx_REG (SImode, REGNO (dest));
1057+
hi_dest = gen_rtx_REG (SImode, REGNO (dest) + 1);
1058+
}
1059+
else
1060+
{
1061+
lo_dest = operand_subword_force (dest, 0, <MODE>mode);
1062+
hi_dest = operand_subword_force (dest, 1, <MODE>mode);
1063+
}
1064+
1065+
/* preincr - r1 has DImode */
1066+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == PRE_INC)
1067+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0),
1068+
XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1069+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == PRE_INC)
1070+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0),
1071+
XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1072+
1073+
/* emit SI moves - both are now SImode */
1074+
if (reg_overlap_mentioned_p (lo_dest, hi_src))
1075+
{ /*move from r0:r1 into r1:r2
1076+
in this case:
1077+
lo_src = r0
1078+
hi_src = r1
1079+
lo_dest = r1
1080+
hi_dest = r2 so we follow the following order*/
1081+
emit_move_insn (hi_dest, hi_src);
1082+
emit_move_insn (lo_dest, lo_src);
1083+
}
1084+
else
1085+
{ /*move r1:r2 to r0:r1
1086+
lo_src = r1
1087+
hi_src = r2
1088+
lo_dest = r0
1089+
hi_dest = r1*/
1090+
emit_move_insn (lo_dest, lo_src);
1091+
emit_move_insn (hi_dest, hi_src);
1092+
}
1093+
1094+
/* emit post increments */
1095+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == POST_INC)
1096+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0),
1097+
XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1098+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == POST_INC)
1099+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0),
1100+
XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1101+
1102+
DONE;
1103+
})
1104+
9711105
;;
9721106
;; Short insns: movl_s g,h; movl_s b,u8
9731107
;; Long insns: movl, stl, ldl
9741108
;;
9751109
(define_insn "*arc64_movdi"
976-
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1110+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
9771111
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
9781112
"TARGET_64BIT
9791113
&& (register_operand (operands[0], DImode)
@@ -999,6 +1133,40 @@ xorl"
9991133
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")]
10001134
)
10011135

1136+
(define_insn "*arc64_movdi"
1137+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1138+
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
1139+
"TARGET_64BIT
1140+
&& (register_operand (operands[0], DImode)
1141+
|| register_operand (operands[1], DImode)
1142+
|| (CONST_INT_P (operands[1])
1143+
&& satisfies_constraint_Ucnst (operands[0])))"
1144+
"@
1145+
movl_s\\t%0,%1
1146+
movl_s\\t%0,%1
1147+
bclrl\\t%0,%q1,%t1
1148+
bsetl\\t%0,%L1,%T1
1149+
movl\\t%0,%1
1150+
movl\\t%0,%1
1151+
movl\\t%0,%1
1152+
vpack2wl\\t%0,%L1,%H1
1153+
addl\\t%0,pcl,%1
1154+
#
1155+
popl_s\\t%0
1156+
#
1157+
pushl_s\\t%1
1158+
#"
1159+
[(set_attr "type" "move,move,bclr,bset,move,move,move,vpack,addl,st,ld,ld,st,st")
1160+
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")
1161+
(set (attr "enabled")
1162+
(cond [(match_test "TARGET_LL64")
1163+
(const_string "yes")
1164+
1165+
(eq_attr "type" "ld,st")
1166+
(const_string "no")]
1167+
(const_string "yes")))
1168+
])
1169+
10021170
;; Hi/Low moves for constant and symbol loading.
10031171

10041172
(define_insn "*movdi_high"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* { dg-do run } */
2+
3+
#include <stdlib.h>
4+
5+
int test0(){
6+
float a, b;
7+
double c, d;
8+
a = (float)rand()/(float)(RAND_MAX);
9+
b = (float)rand()/(float)(RAND_MAX);
10+
c = (double)rand()/(double)(RAND_MAX);
11+
d = (double)rand()/(double)(RAND_MAX);
12+
return (int)((double)a + (double)b + c + d);
13+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* { dg-do run } */
2+
/* { dg-options "-O2 -mcpu=hs5x -mfpu=fpud" } */
3+
/* Standard success is return 0 */
4+
volatile double a = 10.0;
5+
volatile double b = 5.0;
6+
7+
int main() {
8+
double res = a - b;
9+
if (res == 5.0)
10+
return 0;
11+
return 1;
12+
}

0 commit comments

Comments
 (0)