Skip to content

Commit b86aa49

Browse files
committed
fix for P10019563-77915
fix to ensure the that the hs5x target produces correct code despite lacking 64-bit load/store instructions. I made use of the split mechanism enabled by the # keyword which forces the compiler to make use of pairs of general purpose registers and further make use of the define_split. Then the split pattern takes a single 64bit memory move and breaks it into two 32-bit moves using gen_lowpart and gen_highpart. Would have been possible to make use of a scratch register by using a match_scratch. However, sometimes this can lead to a reload failure.
1 parent 544ca30 commit b86aa49

5 files changed

Lines changed: 289 additions & 25 deletions

File tree

gcc/config/arc64/arc64.cc

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,6 @@ frame_save_reg (rtx reg, HOST_WIDE_INT offset, HOST_WIDE_INT displacement)
648648
}
649649

650650
/* ARC prologue saving regs routine. */
651-
652651
static HOST_WIDE_INT
653652
arc64_save_callee_saves (void)
654653
{
@@ -674,6 +673,25 @@ arc64_save_callee_saves (void)
674673
{
675674
save_mode = ARC64_HAS_FPUD ? DFmode : SFmode;
676675
disp = UNITS_PER_WORD;
676+
677+
/* CRITICAL FIX: If the mode is 64-bit DFmode but the hardware
678+
does not support 64-bit memory operations (!TARGET_LL64),
679+
decompose the save into two sequential 4-byte SFmode saves. */
680+
if (save_mode == DFmode && !TARGET_LL64)
681+
{
682+
/* Save the lower 4-byte sub-register piece (regno) */
683+
rtx reg_lo = gen_rtx_REG (SFmode, regno);
684+
frame_allocated += frame_save_reg (reg_lo, offset, disp);
685+
offset = 0;
686+
687+
/* Save the upper 4-byte sub-register piece (regno + 1).
688+
The displacement or offset is updated implicitly by
689+
the frame tracker inside frame_save_reg. */
690+
rtx reg_hi = gen_rtx_REG (SFmode, regno + 1);
691+
frame_allocated += frame_save_reg (reg_hi, offset, disp);
692+
693+
continue; /* Skip the original single-register save logic */
694+
}
677695
}
678696
else if (regno >= 1
679697
&& (((regno - 1) % 2) == 0)
@@ -1680,7 +1698,8 @@ arc64_get_effective_mode_for_address_scaling (const machine_mode mode)
16801698
{
16811699
if (GET_MODE_SIZE (mode) == (UNITS_PER_WORD * 2))
16821700
{
1683-
gcc_assert (DOUBLE_LOAD_STORE);
1701+
if (!TARGET_LL64)
1702+
gcc_assert (DOUBLE_LOAD_STORE);
16841703
return Pmode;
16851704
}
16861705
return mode;
@@ -5546,9 +5565,19 @@ arc64_expand_prologue (void)
55465565

55475566
frame_allocated = frame->frame_size;
55485567

5549-
frame_allocated -= arc64_save_callee_saves ();
55505568

55515569
/* If something left, allocate. */
5570+
if (ARC_INTERRUPT_P (cfun->machine->fn_type) && !TARGET_LL64)
5571+
{
5572+
if (frame_allocated > 0)
5573+
{
5574+
frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
5575+
frame_allocated = 0;
5576+
}
5577+
}
5578+
5579+
frame_allocated -= arc64_save_callee_saves ();
5580+
55525581
if (frame_allocated > 0)
55535582
frame_stack_add ((HOST_WIDE_INT) 0 - frame_allocated);
55545583

@@ -6117,7 +6146,7 @@ arc64_split_double_move_p (rtx *operands, machine_mode mode)
61176146
return false;
61186147
}
61196148

6120-
/* Evereything else is going for a split. */
6149+
/* Everything else is going for a split. */
61216150
return true;
61226151
}
61236152

@@ -6135,8 +6164,7 @@ arc64_split_double_move (rtx *operands, machine_mode mode)
61356164
machine_mode mvmode = smallest_int_mode_for_size (BITS_PER_WORD).require ();
61366165

61376166
/* Maximum size handled is twice UNITS_PER_WORD. */
6138-
gcc_assert (iregs <= 2);
6139-
6167+
gcc_assert(iregs <= 2);
61406168
/* This procedure works as long as the width of the fp regs is the
61416169
same as the width of r regs. */
61426170
if (FLOAT_MODE_P (mode))

gcc/config/arc64/arc64.md

Lines changed: 187 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -865,8 +865,8 @@ xorl"
865865

866866
(define_insn "*arc64_movsi"
867867
[(set
868-
(match_operand:SI 0 "arc64_dest_operand" "=qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor")
869-
(match_operand:SI 1 "arc64_movl_operand" "qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r"))
868+
(match_operand:SI 0 "arc64_dest_operand" "=w,r,w,qh,r, q, r, r,h,r, q,Ustms,Ustor,Ucnst,RBLNKq,r, Ustk<,Ustor")
869+
(match_operand:SI 1 "arc64_movl_operand" "r,w,w,qhS03MV,r,U08S0,S12S0,SyPic,i,i,Uldms, q,S06S0, i, Ustk>,m,RBLNKq, r"))
870870
]
871871
"register_operand (operands[0], SImode)
872872
|| register_operand (operands[1], SImode)
@@ -875,6 +875,9 @@ xorl"
875875
|| (CONST_INT_P (operands[1])
876876
&& satisfies_constraint_Ucnst (operands[0]))"
877877
"@
878+
mov\\t%0,%1
879+
mov\\t%0,%1
880+
mov\\t%0,%1
878881
mov_s\\t%0,%1
879882
mov\\t%0,%1
880883
mov_s\\t%0,%1
@@ -890,8 +893,8 @@ xorl"
890893
ld%U1\\t%0,%1
891894
push_s\\t%1
892895
st%U0\\t%1,%0"
893-
[(set_attr "type" "move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st")
894-
(set_attr "length" "2,4,2,4,8,6,8,2,2,*,8,2,*,2,*")]
896+
[(set_attr "type" "fmov,fmov,fmov,move,move,move,move,add,move,move,ld,st,st,st,ld,ld,st,st")
897+
(set_attr "length" "4,4,4,2,4,2,4,8,6,8,2,2,*,8,2,*,2,*")]
895898
)
896899

897900
(define_insn "*mov<mode>_cmp0"
@@ -924,27 +927,56 @@ xorl"
924927
[(set_attr "type" "move,move,move,move,ld,st,ld,st")
925928
(set_attr "length" "2,4,6,8,2,2,*,*")])
926929

927-
;; For a fp move I use FSMOV.<cc> instruction. However, we can also
928-
;; use FSSGNJ.
929-
;; FIXME! add short instruction selection
930930
(define_insn "*mov<mode>_hardfp"
931931
[(set (match_operand:GPF_HF 0 "arc64_dest_operand" "=w, w,Ufpms,*r,*w,*r,*r,*r,*Ustor")
932932
(match_operand:GPF_HF 1 "arc64_movf_operand" "w,Ufpms, w,*w,*r,*r,*G,*m, *r"))]
933933
"ARC64_HAS_FP_BASE
934934
&& (register_operand (operands[0], <MODE>mode)
935935
|| register_operand (operands[1], <MODE>mode))"
936-
"@
937-
f<sfxtab>mov\\t%0,%1
938-
fld<sizef>%U1\\t%0,%1
939-
fst<sizef>%U0\\t%1,%0
940-
fmv<fmvftab>2<fmvitab>\\t%0,%1
941-
fmv<fmvitab>2<fmvftab>\\t%0,%1
942-
mov<mcctab>\\t%0,%1
943-
mov<mcctab>\\t%0,%1
944-
ld<slfp>%U1\\t%0,%1
945-
st<slfp>%U0\\t%1,%0"
936+
{
937+
switch (which_alternative)
938+
{
939+
case 0:
940+
return "f<sfxtab>mov\\t%0,%1";
941+
case 3:
942+
return "fmv<fmvftab>2<fmvitab>\\t%0,%1";
943+
case 4:
944+
return "fmv<fmvitab>2<fmvftab>\\t%0,%1";
945+
case 5:
946+
return "mov<mcctab>\\t%0,%1";
947+
case 6:
948+
return "mov<mcctab>\\t%0,%1";
949+
case 7:
950+
/* opt 7 (*r, *m) - This is the Load.
951+
It moves 64 bits from Memory into a general purpose pair (e.g. r0:r1).
952+
*/
953+
case 8:
954+
/* option 8 (*Ustor, *r) - is a Store.
955+
It moves 64 bits from a GPR pair back into Memory.
956+
*/
957+
if (GET_MODE_SIZE (<MODE>mode) == 8 && !TARGET_LL64)
958+
/*for hx5s with fp unit that supports double precision fp
959+
we trigger the splitter*/
960+
return "#";
961+
return (which_alternative == 7) ?
962+
"ld<slfp>%U1\\t%0,%1" :
963+
"st<slfp>%U0\\t%1,%0";
964+
default: gcc_unreachable ();
965+
}
966+
}
946967
[(set_attr "type" "fmov,ld,st,move,move,move,move,ld,st")
947-
(set_attr "length" "4,*,*,4,4,4,8,*,*")])
968+
(set_attr "length" "4,*,*,4,4,4,8,*,*")
969+
;; disable fpu to/from Memory 64 bit on hs5x.
970+
;; This forces the compiler to use general purpose registers as a buffer
971+
;; becasue the alternatives 1 and 2 are disabled.
972+
(set (attr "enabled")
973+
(if_then_else
974+
(and (match_test "GET_MODE_SIZE (<MODE>mode) == 8")
975+
(not (match_test "TARGET_LL64"))
976+
(ior (eq_attr "alternative" "1")
977+
(eq_attr "alternative" "2")))
978+
(const_string "no")
979+
(const_string "yes")))])
948980

949981
;; move 128bit
950982
(define_insn_and_split "*mov<mode>_insn"
@@ -968,12 +1000,114 @@ xorl"
9681000
}
9691001
[(set_attr "type" "move,ld,ld,st,st")
9701002
(set_attr "length" "8,2,*,2,*")])
1003+
1004+
;; the split applys for targets where there is no 64 bit ld/st
1005+
;; e.g. -mfpu=fpud -mcpu=hs5x
1006+
1007+
(define_split
1008+
[(set (match_operand:GPF_HF 0 "nonimmediate_operand" "")
1009+
(match_operand:GPF_HF 1 "general_operand" ""))]
1010+
"reload_completed
1011+
&& GET_MODE_SIZE (<MODE>mode) == 8
1012+
&& !TARGET_LL64"
1013+
[(const_int 0)]
1014+
{
1015+
rtx dest = operands[0];
1016+
rtx src = operands[1];
1017+
rtx hi_dest, hi_src, lo_dest, lo_src;
1018+
1019+
/*extract source lo/hi */
1020+
if (MEM_P (src))
1021+
{
1022+
rtx addr = XEXP (src, 0);
1023+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1024+
addr = XEXP (addr, 0);
1025+
1026+
rtx clean_mem = change_address (src, SImode, addr);
1027+
lo_src = adjust_address (clean_mem, SImode, 0);
1028+
hi_src = adjust_address (clean_mem, SImode, 4);
1029+
}
1030+
else if (REG_P (src))
1031+
{
1032+
/* Manually force SImode for source registers */
1033+
lo_src = gen_rtx_REG (SImode, REGNO (src));
1034+
hi_src = gen_rtx_REG (SImode, REGNO (src) + 1);
1035+
}
1036+
else
1037+
{
1038+
lo_src = operand_subword_force (src, 0, <MODE>mode);
1039+
hi_src = operand_subword_force (src, 1, <MODE>mode);
1040+
}
1041+
1042+
/*extract destination lo/hi */
1043+
if (MEM_P (dest))
1044+
{
1045+
rtx addr = XEXP (dest, 0);
1046+
if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
1047+
addr = XEXP (addr, 0);
1048+
1049+
rtx clean_mem = change_address (dest, SImode, addr);
1050+
lo_dest = adjust_address (clean_mem, SImode, 0);
1051+
hi_dest = adjust_address (clean_mem, SImode, 4);
1052+
}
1053+
else if (REG_P (dest))
1054+
{
1055+
/* Manually force SImode for destination registers */
1056+
lo_dest = gen_rtx_REG (SImode, REGNO (dest));
1057+
hi_dest = gen_rtx_REG (SImode, REGNO (dest) + 1);
1058+
}
1059+
else
1060+
{
1061+
lo_dest = operand_subword_force (dest, 0, <MODE>mode);
1062+
hi_dest = operand_subword_force (dest, 1, <MODE>mode);
1063+
}
1064+
1065+
/* preincr - r1 has DImode */
1066+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == PRE_INC)
1067+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0),
1068+
XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1069+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == PRE_INC)
1070+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0),
1071+
XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1072+
1073+
/* emit SI moves - both are now SImode */
1074+
if (reg_overlap_mentioned_p (lo_dest, hi_src))
1075+
{ /*move from r0:r1 into r1:r2
1076+
in this case:
1077+
lo_src = r0
1078+
hi_src = r1
1079+
lo_dest = r1
1080+
hi_dest = r2 so we follow the following order*/
1081+
emit_move_insn (hi_dest, hi_src);
1082+
emit_move_insn (lo_dest, lo_src);
1083+
}
1084+
else
1085+
{ /*move r1:r2 to r0:r1
1086+
lo_src = r1
1087+
hi_src = r2
1088+
lo_dest = r0
1089+
hi_dest = r1*/
1090+
emit_move_insn (lo_dest, lo_src);
1091+
emit_move_insn (hi_dest, hi_src);
1092+
}
1093+
1094+
/* emit post increments */
1095+
if (MEM_P (src) && GET_CODE (XEXP (src, 0)) == POST_INC)
1096+
emit_insn (gen_adddi3 (XEXP (XEXP (src, 0), 0),
1097+
XEXP (XEXP (src, 0), 0), GEN_INT (8)));
1098+
if (MEM_P (dest) && GET_CODE (XEXP (dest, 0)) == POST_INC)
1099+
emit_insn (gen_adddi3 (XEXP (XEXP (dest, 0), 0),
1100+
XEXP (XEXP (dest, 0), 0), GEN_INT (8)));
1101+
1102+
DONE;
1103+
})
1104+
9711105
;;
9721106
;; Short insns: movl_s g,h; movl_s b,u8
9731107
;; Long insns: movl, stl, ldl
9741108
;;
9751109
(define_insn "*arc64_movdi"
976-
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1110+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
9771111
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
9781112
"TARGET_64BIT
9791113
&& (register_operand (operands[0], DImode)
@@ -999,6 +1133,40 @@ xorl"
9991133
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")]
10001134
)
10011135

1136+
(define_insn "*arc64_movdi"
1137+
[(set (match_operand:DI 0 "arc64_dest_operand" "=qh, q, r, r,r, r, r, r, r,Ucnst, r,r,Ustk<,Ustor")
1138+
(match_operand:DI 1 "arc64_movl_operand" "qh,U08S0,BCLRX,BSETX,r,S12S0,S32S0SymMV,U38S0,SyPic,S32S0,Ustk>,m, r, r"))]
1139+
"TARGET_64BIT
1140+
&& (register_operand (operands[0], DImode)
1141+
|| register_operand (operands[1], DImode)
1142+
|| (CONST_INT_P (operands[1])
1143+
&& satisfies_constraint_Ucnst (operands[0])))"
1144+
"@
1145+
movl_s\\t%0,%1
1146+
movl_s\\t%0,%1
1147+
bclrl\\t%0,%q1,%t1
1148+
bsetl\\t%0,%L1,%T1
1149+
movl\\t%0,%1
1150+
movl\\t%0,%1
1151+
movl\\t%0,%1
1152+
vpack2wl\\t%0,%L1,%H1
1153+
addl\\t%0,pcl,%1
1154+
#
1155+
popl_s\\t%0
1156+
#
1157+
pushl_s\\t%1
1158+
#"
1159+
[(set_attr "type" "move,move,bclr,bset,move,move,move,vpack,addl,st,ld,ld,st,st")
1160+
(set_attr "length" "2,2,8,8,4,4,8,8,8,8,2,*,2,*")
1161+
(set (attr "enabled")
1162+
(cond [(match_test "TARGET_LL64")
1163+
(const_string "yes")
1164+
1165+
(eq_attr "type" "ld,st")
1166+
(const_string "no")]
1167+
(const_string "yes")))
1168+
])
1169+
10021170
;; Hi/Low moves for constant and symbol loading.
10031171

10041172
(define_insn "*movdi_high"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* { dg-do run } */
2+
3+
#include <stdlib.h>
4+
5+
int test0(){
6+
float a, b;
7+
double c, d;
8+
a = (float)rand()/(float)(RAND_MAX);
9+
b = (float)rand()/(float)(RAND_MAX);
10+
c = (double)rand()/(double)(RAND_MAX);
11+
d = (double)rand()/(double)(RAND_MAX);
12+
return (int)((double)a + (double)b + c + d);
13+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/* { dg-do run } */
2+
/* { dg-options "-O2 -mcpu=hs5x -mfpu=fpud" } */
3+
/* Standard success is return 0 */
4+
volatile double a = 10.0;
5+
volatile double b = 5.0;
6+
7+
int main() {
8+
double res = a - b;
9+
if (res == 5.0)
10+
return 0;
11+
return 1;
12+
}

0 commit comments

Comments
 (0)