Skip to content

Commit 076226f

Browse files
authored
[ELF] Separate relative and non-relative dynamic relocations (llvm#187959)
Previously, the flow was: 1. Parallel scan adds relative relocs to per-thread `relocsVec` 2. `mergeRels()` copies all into `relocs` 3. `partitionRels()` uses `stable_partition` to separate Now, relative relocs are routed at `addReloc` time by checking `reloc.type == relativeRel`. In `mergeRels`, sharded entries are classified through the same `addReloc` path rather than blindly appended. `relocsVec` may contain non-relative entries like `R_AARCH64_AUTH_RELATIVE`. This eliminates the `stable_partition` on the full relocation vector (543K entries for clang) and avoids copying relative relocations into `relocs` only to move them out again. Linking an x86_64 release+assertions build of clang is 1.04x as fast. `numRelativeRelocs` caches `relativeRelocs.size()` at `finalizeContents` time for `DT_RELACOUNT`. Using a live `relativeRelocs.size()` would cause `DynamicSection::writeTo` to emit an extra entry when thunks add relocs after `.dynamic` is sized, overflowing into adjacent sections. Tested by ppc64-long-branch-rel14.s.
1 parent 5567572 commit 076226f

File tree

2 files changed

+35
-31
lines changed

2 files changed

+35
-31
lines changed

lld/ELF/SyntheticSections.cpp

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,8 @@ RelocationBaseSection::RelocationBaseSection(Ctx &ctx, StringRef name,
14831483
unsigned concurrency)
14841484
: SyntheticSection(ctx, name, type, SHF_ALLOC, ctx.arg.wordsize),
14851485
dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag),
1486-
relocsVec(concurrency), combreloc(combreloc) {}
1486+
relocsVec(concurrency), relativeRel(ctx.target->relativeRel),
1487+
combreloc(combreloc) {}
14871488

14881489
void RelocationBaseSection::addSymbolReloc(
14891490
RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
@@ -1503,29 +1504,24 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
15031504
}
15041505

15051506
void RelocationBaseSection::mergeRels() {
1506-
size_t newSize = relocs.size();
1507+
size_t newSize = relativeRelocs.size();
15071508
for (const auto &v : relocsVec)
15081509
newSize += v.size();
1509-
relocs.reserve(newSize);
1510+
relativeRelocs.reserve(newSize);
1511+
// Classify relocsVec entries into relativeRelocs or relocs. Note that
1512+
// relocsVec may contain non-relative entries (e.g. R_AARCH64_AUTH_RELATIVE)
1513+
// so we must check the type.
15101514
for (const auto &v : relocsVec)
1511-
llvm::append_range(relocs, v);
1515+
for (const DynamicReloc &r : v)
1516+
addReloc(r);
15121517
relocsVec.clear();
15131518
}
15141519

1515-
void RelocationBaseSection::partitionRels() {
1516-
if (!combreloc)
1517-
return;
1518-
const RelType relativeRel = ctx.target->relativeRel;
1519-
numRelativeRelocs =
1520-
std::stable_partition(relocs.begin(), relocs.end(),
1521-
[=](auto &r) { return r.type == relativeRel; }) -
1522-
relocs.begin();
1523-
}
1524-
15251520
void RelocationBaseSection::finalizeContents() {
15261521
mergeRels();
1527-
// Compute DT_RELACOUNT to be used by part.dynamic.
1528-
partitionRels();
1522+
// Cache the count for DT_RELACOUNT. This must not change after
1523+
// DynamicSection::finalizeContents sizes the .dynamic section.
1524+
numRelativeRelocs = relativeRelocs.size();
15291525
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
15301526

15311527
// When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
@@ -1551,26 +1547,29 @@ void DynamicReloc::finalize(Ctx &ctx, SymbolTableBaseSection *symt) {
15511547

15521548
void RelocationBaseSection::computeRels() {
15531549
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
1550+
parallelForEach(relativeRelocs, [&ctx = ctx, symTab](DynamicReloc &rel) {
1551+
rel.finalize(ctx, symTab);
1552+
});
15541553
parallelForEach(relocs, [&ctx = ctx, symTab](DynamicReloc &rel) {
15551554
rel.finalize(ctx, symTab);
15561555
});
15571556

1557+
// Place IRELATIVE relocations last so that other dynamic relocations are
1558+
// applied before IFUNC resolvers run.
15581559
auto irelative = std::stable_partition(
1559-
relocs.begin() + numRelativeRelocs, relocs.end(),
1560+
relocs.begin(), relocs.end(),
15601561
[t = ctx.target->iRelativeRel](auto &r) { return r.type != t; });
15611562

15621563
// Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to
15631564
// place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset
15641565
// is to make results easier to read.
1565-
if (combreloc) {
1566-
auto nonRelative = relocs.begin() + numRelativeRelocs;
1567-
parallelSort(relocs.begin(), nonRelative,
1568-
[&](auto &a, auto &b) { return a.r_offset < b.r_offset; });
1569-
// Non-relative relocations are few, so don't bother with parallelSort.
1570-
llvm::sort(nonRelative, irelative, [&](auto &a, auto &b) {
1566+
parallelSort(relativeRelocs.begin(), relativeRelocs.end(),
1567+
[](auto &a, auto &b) { return a.r_offset < b.r_offset; });
1568+
// Non-relative relocations are few, so don't bother with parallelSort.
1569+
if (combreloc)
1570+
llvm::sort(relocs.begin(), irelative, [](auto &a, auto &b) {
15711571
return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset);
15721572
});
1573-
}
15741573
}
15751574

15761575
template <class ELFT>
@@ -1585,7 +1584,9 @@ RelocationSection<ELFT>::RelocationSection(Ctx &ctx, StringRef name,
15851584

15861585
template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
15871586
computeRels();
1588-
for (const DynamicReloc &rel : relocs) {
1587+
// Write relative relocations first for DT_REL[A]COUNT.
1588+
for (const DynamicReloc &rel :
1589+
llvm::concat<const DynamicReloc>(relativeRelocs, relocs)) {
15891590
auto *p = reinterpret_cast<Elf_Rela *>(buf);
15901591
p->r_offset = rel.r_offset;
15911592
p->setSymbolAndType(rel.r_sym, rel.type, ctx.arg.isMips64EL);

lld/ELF/SyntheticSections.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,10 @@ class RelocationBaseSection : public SyntheticSection {
495495
/// This overload can be used if the addends are written directly instead of
496496
/// using relocations on the input section (e.g. MipsGotSection::writeTo()).
497497
template <bool shard = false> void addReloc(const DynamicReloc &reloc) {
498-
relocs.push_back(reloc);
498+
if (combreloc && reloc.type == relativeRel)
499+
relativeRelocs.push_back(reloc);
500+
else
501+
relocs.push_back(reloc);
499502
}
500503
/// Add a dynamic relocation against \p sym with an optional addend.
501504
void addSymbolReloc(RelType dynType, InputSectionBase &isec,
@@ -532,11 +535,11 @@ class RelocationBaseSection : public SyntheticSection {
532535
{dynType, &sec, offsetInSec, isAgainstSymbol, sym, addend, expr});
533536
}
534537
bool isNeeded() const override {
535-
return !relocs.empty() ||
538+
return !relocs.empty() || !relativeRelocs.empty() ||
536539
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
537540
}
538541
size_t getSize() const override {
539-
size_t count = relocs.size();
542+
size_t count = relocs.size() + relativeRelocs.size();
540543
for (const auto &v : relocsVec)
541544
count += v.size();
542545
return count * this->entsize;
@@ -545,16 +548,16 @@ class RelocationBaseSection : public SyntheticSection {
545548
void finalizeContents() override;
546549

547550
int32_t dynamicTag, sizeDynamicTag;
548-
SmallVector<DynamicReloc, 0> relocs;
551+
SmallVector<DynamicReloc, 0> relocs, relativeRelocs;
549552

550553
protected:
551554
void mergeRels();
552-
void partitionRels();
553555
void computeRels();
554556
// Used when parallel relocation scanning adds relocations. The elements
555-
// will be moved into relocs by mergeRel().
557+
// will be classified into relativeRelocs or relocs by mergeRels().
556558
SmallVector<SmallVector<DynamicReloc, 0>, 0> relocsVec;
557559
size_t numRelativeRelocs = 0; // used by -z combreloc
560+
RelType relativeRel;
558561
bool combreloc;
559562
};
560563

0 commit comments

Comments
 (0)