Skip to content

[lld][LoongArch] GOT indirection to PC relative optimization #123743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: users/ylzsx/r-tlsdesc-to-iele-relax
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class LoongArch final : public TargetInfo {
void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const Relocation &rLo12, uint64_t secAddr) const;
};
} // end anonymous namespace

Expand Down Expand Up @@ -1150,6 +1152,58 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
}
}

// Try GOT indirection to PC relative optimization when relaxation is enabled.
// From:
// * pcalau12i $a0, %got_pc_hi20(sym_got)
// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
// To:
// * pcalau12i $a0, %pc_hi20(sym)
// * addi.w/d $a0, $a0, %pc_lo12(sym)
//
// Note: Althouth the optimization has been performed, the GOT entries still
// exists, similarly to AArch64. Eliminating the entries will increase code
// complexity.
bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const Relocation &rLo12, uint64_t secAddr) const {
if (!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need symbol tests to test each condition here. aarch64-adrp-ldr-got-symbols.s has an example

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I have added this test in a previous patch(#123566).

rHi20.sym->isGnuIFunc() ||
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))
return false;

Symbol &sym = *rHi20.sym;
uint64_t symLocal = sym.getVA(ctx) + rHi20.addend;
// Check if the address difference is within +/-2GB range.
// For simplicity, the range mentioned here is an approximate estimate and is
// not fully equivalent to the entire region that PC-relative addressing can
// cover.
int64_t pageOffset =
getLoongArchPage(symLocal) - getLoongArchPage(secAddr + rHi20.offset);
if (!isInt<20>(pageOffset >> 12))
return false;

Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset,
rHi20.addend, &sym};
Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend,
&sym};

const uint32_t currInsn = read32le(loc);
const uint32_t nextInsn = read32le(loc + 4);
// Check if use the same register.
if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
return false;

uint64_t pageDelta =
getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type);
// pcalau12i $a0, %pc_hi20
write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0));
relocate(loc, newRHi20, pageDelta);
// addi.w/d $a0, $a0, %pc_lo12
write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn),
getJ5(nextInsn), 0));
relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64));
return true;
}

// During TLSDESC GD_TO_IE, the converted code sequence always includes an
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
Expand Down Expand Up @@ -1259,6 +1313,22 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
tlsdescToLe(loc, rel, val);
}
continue;
case RE_LOONGARCH_GOT_PAGE_PC:
// In LoongArch, we try GOT indirection to PC relative optimization only
// when relaxation is enabled. This approach avoids determining whether
// relocation types are paired and whether the destination register of
// pcalau12i is only used by the immediately following instruction.
// Moreover, if the original code sequence can be relaxed to a single
// instruction `pcaddi`, the first instruction will be removed and it will
// not reach here.
if (isPairRelaxable(relocs, i) && rel.type == R_LARCH_GOT_PC_HI20 &&
relocs[i + 2].type == R_LARCH_GOT_PC_LO12 &&
tryGotToPCRel(loc, rel, relocs[i + 2], secAddr)) {
i = i + 3; // skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12,
// R_LARCH_RELAX
continue;
}
break;
default:
break;
}
Expand Down
10 changes: 6 additions & 4 deletions lld/test/ELF/loongarch-relax-pc-hi20-lo12.s
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,26 @@
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX32-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
## Not relaxation, convertion to PCRel.
# NORELAX32-NEXT: pcalau12i $a0, 1024
# NORELAX32-NEXT: ld.w $a0, $a0, 4
# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
# NORELAX32-NEXT: addi.w $a0, $a0, 0
# NORELAX32-NEXT: pcalau12i $a0, 1024
# NORELAX32-NEXT: ld.w $a0, $a0, 4
# NORELAX32-NEXT: addi.w $a0, $a0, 0

# NORELAX64-LABEL: <_start>:
## offset exceed range of pcaddi
## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0
# NORELAX64-NEXT: 10000: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
## Not relaxation, convertion to PCRel.
# NORELAX64-NEXT: pcalau12i $a0, 1024
# NORELAX64-NEXT: ld.d $a0, $a0, 8
# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
# NORELAX64-NEXT: addi.d $a0, $a0, 0
# NORELAX64-NEXT: pcalau12i $a0, 1024
# NORELAX64-NEXT: ld.d $a0, $a0, 8
# NORELAX64-NEXT: addi.d $a0, $a0, 0

.section .text
.global _start
Expand Down
Loading