Skip to content

Commit 384527f

Browse files
rui314claude
andcommitted
Don't unconditionally retain C-identifier-named sections in --gc-sections
A section whose name is a valid C identifier was treated as a GC root, keeping it (and everything reachable from it) alive even when nothing referenced its synthesized __start_/__stop_ marker symbols. That could pull in unrelated code with undefined references and cause spurious "undefined symbol" errors that lld and ld.bfd don't produce. Build a multimap of c-identifier-named sections up front and, during marking, when a relocation targets __start_<name> or __stop_<name>, mark every section named <name> alive. Fixes #1571 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d947b1a commit 384527f

2 files changed

Lines changed: 78 additions & 10 deletions

File tree

src/gc-sections.cc

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "mold.h"
66

77
#include <fstream>
8+
#include <tbb/concurrent_unordered_map.h>
89
#include <tbb/concurrent_vector.h>
910
#include <tbb/parallel_for_each.h>
1011

@@ -28,8 +29,28 @@ static bool should_keep(const InputSection<E> &isec) {
2829
name.starts_with(".ctors") ||
2930
name.starts_with(".dtors") ||
3031
name.starts_with(".init") ||
31-
name.starts_with(".fini") ||
32-
is_c_identifier(name);
32+
name.starts_with(".fini");
33+
}
34+
35+
// Sections whose names are valid C identifiers can be referenced via
36+
// __start_<name>/__stop_<name> symbols, which the linker synthesizes.
37+
// Such sections must be kept alive only if such a marker symbol is
38+
// referenced from a live section. This map lets us find all sections
39+
// of a given name when we encounter such a reference during marking.
40+
template <typename E>
41+
using StartStopMap =
42+
tbb::concurrent_unordered_multimap<std::string_view, InputSection<E> *>;
43+
44+
template <typename E>
45+
static StartStopMap<E> build_start_stop_map(Context<E> &ctx) {
46+
StartStopMap<E> map;
47+
tbb::parallel_for_each(ctx.objs, [&](ObjectFile<E> *file) {
48+
for (std::unique_ptr<InputSection<E>> &isec : file->sections)
49+
if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) &&
50+
is_c_identifier(isec->name()))
51+
map.insert({isec->name(), isec.get()});
52+
});
53+
return map;
3354
}
3455

3556
template <typename E>
@@ -96,17 +117,26 @@ collect_root_set(Context<E> &ctx) {
96117
return rootset;
97118
}
98119

120+
static std::string_view start_stop_name(std::string_view sym) {
121+
if (sym.starts_with("__start_"))
122+
return sym.substr(8);
123+
if (sym.starts_with("__stop_"))
124+
return sym.substr(7);
125+
return "";
126+
}
127+
99128
template <typename E>
100129
static void visit(Context<E> &ctx, InputSection<E> *isec,
101-
tbb::feeder<InputSection<E> *> &feeder, i64 depth) {
130+
tbb::feeder<InputSection<E> *> &feeder, i64 depth,
131+
const StartStopMap<E> &start_stop_map) {
102132
assert(isec->is_visited);
103133

104134
// Mark a section alive. For better performacne, we don't call
105135
// `feeder.add` too often.
106136
auto mark = [&](InputSection<E> *sec) {
107137
if (mark_section(sec)) {
108138
if (depth < 3)
109-
visit(ctx, sec, feeder, depth + 1);
139+
visit(ctx, sec, feeder, depth + 1, start_stop_map);
110140
else
111141
feeder.add(sec);
112142
}
@@ -123,10 +153,21 @@ static void visit(Context<E> &ctx, InputSection<E> *isec,
123153
for (const ElfRel<E> &rel : isec->get_rels(ctx)) {
124154
// Symbol can refer to either a section fragment or an input section.
125155
Symbol<E> &sym = *isec->file.symbols[rel.r_sym];
126-
if (SectionFragment<E> *frag = sym.get_frag())
156+
if (SectionFragment<E> *frag = sym.get_frag()) {
127157
frag->is_alive = true;
128-
else
129-
mark(sym.get_input_section());
158+
continue;
159+
}
160+
161+
mark(sym.get_input_section());
162+
163+
// A reference to __start_<name> or __stop_<name> keeps every section
164+
// named <name> alive, mirroring how those symbols are defined.
165+
if (std::string_view sec = start_stop_name(sym.name());
166+
!sec.empty()) {
167+
auto [i, end] = start_stop_map.equal_range(sec);
168+
for (; i != end; ++i)
169+
mark(i->second);
170+
}
130171
}
131172

132173
if constexpr (is_arm32<E>)
@@ -136,12 +177,13 @@ static void visit(Context<E> &ctx, InputSection<E> *isec,
136177
// Mark all reachable sections
137178
template <typename E>
138179
static void mark(Context<E> &ctx,
139-
tbb::concurrent_vector<InputSection<E> *> &rootset) {
180+
tbb::concurrent_vector<InputSection<E> *> &rootset,
181+
const StartStopMap<E> &start_stop_map) {
140182
Timer t(ctx, "mark");
141183

142184
tbb::parallel_for_each(rootset, [&](InputSection<E> *isec,
143185
tbb::feeder<InputSection<E> *> &feeder) {
144-
visit(ctx, isec, feeder, 0);
186+
visit(ctx, isec, feeder, 0, start_stop_map);
145187
});
146188
}
147189

@@ -190,7 +232,8 @@ template <typename E>
190232
void gc_sections(Context<E> &ctx) {
191233
Timer t(ctx, "gc");
192234
tbb::concurrent_vector<InputSection<E> *> rootset = collect_root_set(ctx);
193-
mark(ctx, rootset);
235+
StartStopMap<E> start_stop_map = build_start_stop_map(ctx);
236+
mark(ctx, rootset, start_stop_map);
194237
sweep(ctx);
195238
}
196239

test/gc-sections-start-stop.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
. $(dirname $0)/common.inc
3+
4+
# A section whose name is a valid C identifier should be GC'd if
5+
# neither its contents nor its __start_/__stop_ marker symbols are
6+
# referenced from a live section.
7+
cat <<EOF | $CC -c -o $t/a.o -xc -
8+
__attribute__((section("foo"))) int dead_data = 42;
9+
__attribute__((section("bar"))) int live_data = 99;
10+
11+
extern int __start_bar[];
12+
extern int __stop_bar[];
13+
14+
int main() {
15+
return __stop_bar - __start_bar + live_data;
16+
}
17+
EOF
18+
19+
$CC -B. -o $t/exe $t/a.o -Wl,-gc-sections
20+
readelf --symbols $t/exe > $t/log
21+
22+
not grep dead_data $t/log
23+
grep live_data $t/log
24+
grep __start_bar $t/log
25+
grep __stop_bar $t/log

0 commit comments

Comments
 (0)