Skip to content

[lld][LoongArch] Support TLSDESC GD/LD to IE/LE #123715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 147 additions & 1 deletion lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,13 @@ class LoongArch final : public TargetInfo {
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
bool relaxOnce(int pass) const override;
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
void finalizeRelax(int passes) const override;

private:
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
};
} // end anonymous namespace

Expand All @@ -58,6 +63,7 @@ enum Op {
LU12I_W = 0x14000000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
PCALAU12I = 0x1a000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
JIRL = 0x4c000000,
Expand All @@ -69,6 +75,7 @@ enum Reg {
R_ZERO = 0,
R_RA = 1,
R_TP = 2,
R_A0 = 4,
R_T0 = 12,
R_T1 = 13,
R_T2 = 14,
Expand Down Expand Up @@ -961,7 +968,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
case R_LARCH_TLS_LD_PC_HI20:
case R_LARCH_TLS_DESC_PC_HI20:
// The overflow check for i+2 will be carried out in isPairRelaxable.
if (isPairRelaxable(relocs, i))
if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC &&
r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
break;
case R_LARCH_CALL36:
Expand Down Expand Up @@ -1046,6 +1054,103 @@ static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
}
}

// Convert TLSDESC GD/LD to IE.
// In normal or medium code model, there are two forms of code sequences:
// * pcalau12i $a0, %desc_pc_hi20(sym_desc)
// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
// * ld.d $ra, $a0, %desc_ld(sym_desc)
// * jirl $ra, $ra, %desc_call(sym_desc)
// ------
// * pcaddi $a0, %desc_pcrel_20(a)
// * load $ra, $a0, %desc_ld(a)
// * jirl $ra, $ra, %desc_call(a)
//
// The code sequence obtained is as follows:
// * pcalau12i $a0, %ie_pc_hi20(sym_ie)
// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
//
// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the
// preceding instructions to NOPs, due to both forms of code sequence
// (corresponding to relocation combinations:
// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and
// R_LARCH_TLS_DESC_PCREL20_S2) have same process.
//
// When relaxation enables, redundant NOPs can be removed.
void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
switch (rel.type) {
case R_LARCH_TLS_DESC_PC_HI20:
case R_LARCH_TLS_DESC_PC_LO12:
case R_LARCH_TLS_DESC_PCREL20_S2:
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
break;
case R_LARCH_TLS_DESC_LD:
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
break;
case R_LARCH_TLS_DESC_CALL:
write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0,
0)); // ld.[wd] $a0, $a0, %ie_pc_lo12
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
break;
default:
llvm_unreachable("unsupported relocation for TLSDESC to IE");
}
}

// Convert TLSDESC GD/LD to LE.
// The code sequence obtained in the normal or medium code model is as follows:
// * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0
// * ori $a0 $a0, %le_lo12(sym_le)
// See the comment in tlsdescToIe for detailed information.
void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
assert(isInt<32>(val) &&
"val exceeds the range of medium code model in tlsdescToLe");

bool isUInt12 = isUInt<12>(val);
switch (rel.type) {
case R_LARCH_TLS_DESC_PC_HI20:
case R_LARCH_TLS_DESC_PC_LO12:
case R_LARCH_TLS_DESC_PCREL20_S2:
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
break;
case R_LARCH_TLS_DESC_LD:
if (isUInt12)
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
else
write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12),
0)); // lu12i.w $a0, %le_hi20
break;
case R_LARCH_TLS_DESC_CALL:
if (isUInt12)
write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12
else
write32le(loc,
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
break;
default:
llvm_unreachable("unsupported relocation for TLSDESC to LE");
}
}

// During TLSDESC GD_TO_IE, the converted code sequence always includes an
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other instructions related to the
// Hi20 relocation (pcalau12i) should be adjusted to
// RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC. Specifically, in the normal or
// medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL is
// the candidate of Lo12 relocation.
RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
if (expr == R_RELAX_TLS_GD_TO_IE) {
if (type != R_LARCH_TLS_DESC_CALL)
return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
return R_RELAX_TLS_GD_TO_IE_ABS;
}
return expr;
}

void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
const unsigned bits = ctx.arg.is64 ? 64 : 32;
uint64_t secAddr = sec.getOutputSection()->addr;
Expand Down Expand Up @@ -1088,6 +1193,47 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
tlsIeToLe(loc, rel, val);
}
continue;
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
// LoongArch does not support TLSDESC GD/LD to LE/IE optimization in the
// extreme code model. In these cases, the relocs are as follows:
//
// * i -- R_LARCH_TLS_DESC_PC_HI20
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
isExtreme =
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
}
[[fallthrough]];
case R_RELAX_TLS_GD_TO_IE_ABS:
if (isExtreme) {
if (rel.type == R_LARCH_TLS_DESC_CALL)
continue;
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
bits);
relocateNoSym(loc, rel.type, val);
} else {
tlsdescToIe(loc, rel, val);
}
continue;
case R_RELAX_TLS_GD_TO_LE:
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
isExtreme =
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
}
if (isExtreme) {
if (rel.type == R_LARCH_TLS_DESC_CALL)
continue;
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
bits);
relocateNoSym(loc, rel.type, val);
} else {
tlsdescToLe(loc, rel, val);
}
continue;
default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
case R_GOTPLT_PC:
return r.sym->getGotPltVA(ctx) + a - p;
case RE_LOONGARCH_GOT_PAGE_PC:
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
if (r.sym->hasFlag(NEEDS_TLSGD))
return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p,
r.type);
Expand Down
50 changes: 33 additions & 17 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1346,22 +1346,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
if (ctx.arg.emachine == EM_MIPS)
return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);

// LoongArch does not yet implement transition from TLSDESC to LE/IE, so
// generate TLSDESC dynamic relocation for the dynamic linker to handle.
if (ctx.arg.emachine == EM_LOONGARCH &&
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
R_TLSDESC_CALL>(expr)) {
if (expr != R_TLSDESC_CALL) {
sym.setFlags(NEEDS_TLSDESC);
sec->addReloc({expr, type, offset, addend, &sym});
}
return 1;
}

bool isRISCV = ctx.arg.emachine == EM_RISCV;

if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_TLSDESC_GOTPLT>(expr) &&
R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
ctx.arg.shared) {
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
// set NEEDS_TLSDESC on the label.
Expand All @@ -1375,10 +1363,14 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch supports IE to LE optimization in non-extreme code model.
// LoongArch supports IE to LE, DESC GD/LD to IE/LE optimizations in
// non-extreme code model.
bool execOptimizeInLoongArch =
ctx.arg.emachine == EM_LOONGARCH &&
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12 ||
type == R_LARCH_TLS_DESC_PC_HI20 || type == R_LARCH_TLS_DESC_PC_LO12 ||
type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL ||
type == R_LARCH_TLS_DESC_PCREL20_S2);

// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
// optimizations.
Expand Down Expand Up @@ -1437,9 +1429,23 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch does not support transition from TLSDESC to LE/IE in the extreme
// code model, in which NEEDS_TLSDESC should set, rather than NEEDS_TLSGD. So
// we check independently.
if (ctx.arg.emachine == EM_LOONGARCH &&
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
R_TLSDESC_CALL>(expr) &&
!execOptimize) {
if (expr != R_TLSDESC_CALL) {
sym.setFlags(NEEDS_TLSDESC);
sec->addReloc({expr, type, offset, addend, &sym});
}
return 1;
}

if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) {
if (!execOptimize) {
sym.setFlags(NEEDS_TLSGD);
sec->addReloc({expr, type, offset, addend, &sym});
Expand All @@ -1453,7 +1459,17 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
// label, so TLSDESC=>IE will be categorized as R_RELAX_TLS_GD_TO_LE. We fix
// the categorization in RISCV::relocateAllosec->
if (sym.isPreemptible) {
sym.setFlags(NEEDS_TLSGD_TO_IE);
// In LoongArch, TLSDESC code sequences share relocations
// R_LARCH_TLS_DESC_PC_HI20 and R_LARCH_TLS_DESC_PC_LO12 in
// normal/medium/extreme code model. Since the extreme code model cannot
// be optimized to IE/LE, the flag NEEDS_TLSGD_TO_IE added previously
// needs to be cleared.
// In extreme code model, R_LARCH_TLS_DESC64_LO20 and
// R_LARCH_TLS_DESC64_HI12 will set NEEDS_TLSDESC flag.
if (ctx.arg.emachine == EM_LOONGARCH && sym.hasFlag(NEEDS_TLSDESC))
sym.clearFlags(NEEDS_TLSGD_TO_IE);
else
sym.setFlags(NEEDS_TLSGD_TO_IE);
sec->addReloc({ctx.target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE),
type, offset, addend, &sym});
} else {
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Relocations.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ enum RelExpr {
RE_LOONGARCH_GOT_PAGE_PC,
RE_LOONGARCH_TLSGD_PAGE_PC,
RE_LOONGARCH_TLSDESC_PAGE_PC,
RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
};

// Architecture-neutral representation of relocation.
Expand Down
3 changes: 3 additions & 0 deletions lld/ELF/Symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ class Symbol {
void setFlags(uint16_t bits) {
flags.fetch_or(bits, std::memory_order_relaxed);
}
void clearFlags(uint16_t bits) {
flags.fetch_and(~bits, std::memory_order_relaxed);
}
bool hasFlag(uint16_t bit) const {
assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2");
return flags.load(std::memory_order_relaxed) & bit;
Expand Down
Loading