-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdetector.go
More file actions
475 lines (419 loc) · 14.9 KB
/
detector.go
File metadata and controls
475 lines (419 loc) · 14.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
package resurgo
import (
"cmp"
"debug/elf"
"fmt"
"io"
"slices"
"golang.org/x/arch/arm64/arm64asm"
"golang.org/x/arch/x86/x86asm"
)
const (
// Confidence levels ordered from highest to lowest reliability.
ConfidenceHigh Confidence = "high"
ConfidenceMedium Confidence = "medium"
ConfidenceLow Confidence = "low"
ConfidenceNone Confidence = "none"
// endbr64Byte{0..3} are the four bytes of the ENDBR64 instruction
// (F3 0F 1E FA). ENDBR32 shares the first three bytes but ends with 0xFB.
// These CET indirect-branch-tracking prefixes appear at function entries
// on binaries compiled with -fcf-protection=branch.
endbr64Byte0 = byte(0xF3)
endbr64Byte1 = byte(0x0F)
endbr64Byte2 = byte(0x1E)
endbr64Byte3 = byte(0xFA)
endbr32Byte3 = byte(0xFB)
)
// endbr64Bytes is the 4-byte encoding of the ENDBR64 instruction as a
// fixed-size array for direct comparison. Built from the individual byte
// constants so the encoding is defined in one place.
var endbr64Bytes = [4]byte{endbr64Byte0, endbr64Byte1, endbr64Byte2, endbr64Byte3}
// Confidence represents the reliability level of a detected function candidate.
type Confidence string
// DetectionType represents the signal or combination of signals that
// produced a function candidate.
type DetectionType string
// FunctionCandidate represents a potential function entry point detected
// through one or more signals (prologue matching, call-site analysis,
// boundary analysis, or CFI).
type FunctionCandidate struct {
// Address is the virtual address of the function entry point.
Address uint64 `json:"address"`
// DetectionType is the signal or combination of signals that produced
// this candidate.
DetectionType DetectionType `json:"detection_type"`
// PrologueType is the matched prologue pattern, if any.
PrologueType PrologueType `json:"prologue_type,omitempty"`
// CalledFrom holds the virtual addresses of instructions that call this
// candidate directly.
CalledFrom []uint64 `json:"called_from,omitempty"`
// JumpedFrom holds the virtual addresses of instructions that jump to
// this candidate.
JumpedFrom []uint64 `json:"jumped_from,omitempty"`
// Confidence is the reliability level of this candidate.
Confidence Confidence `json:"confidence"`
}
// isENDBR reports whether the 4 bytes at code[i:i+4] encode an ENDBR64
// (F3 0F 1E FA) or ENDBR32 (F3 0F 1E FB) instruction.
// golang.org/x/arch/x86/x86asm does not recognise these CET instructions,
// so callers must skip them explicitly before invoking the decoder.
func isENDBR(code []byte, i int) bool {
return i+4 <= len(code) &&
code[i] == endbr64Byte0 &&
code[i+1] == endbr64Byte1 &&
code[i+2] == endbr64Byte2 &&
(code[i+3] == endbr64Byte3 || code[i+3] == endbr32Byte3)
}
// DetectFunctions combines prologue detection, call site analysis, and
// alignment-based boundary detection to identify function entry points.
// Functions detected by multiple methods receive higher confidence ratings.
func DetectFunctions(code []byte, baseAddr uint64, arch Arch) ([]FunctionCandidate, error) {
// Detect prologues
prologues, err := DetectPrologues(code, baseAddr, arch)
if err != nil {
return nil, fmt.Errorf("failed to detect prologues: %w", err)
}
// Detect call sites
edges, err := DetectCallSites(code, baseAddr, arch)
if err != nil {
return nil, fmt.Errorf("failed to detect call sites: %w", err)
}
// Build a map of function candidates by address
candidates := make(map[uint64]*FunctionCandidate)
// Add prologue-based candidates
for _, p := range prologues {
candidates[p.Address] = &FunctionCandidate{
Address: p.Address,
DetectionType: DetectionPrologueOnly,
PrologueType: p.Type,
Confidence: ConfidenceMedium, // Will be upgraded if also a call target
}
}
// Process call site edges - include both high-confidence (direct calls)
// and medium-confidence (unconditional jumps, which may be tail calls).
for _, edge := range edges {
if edge.Confidence != ConfidenceHigh && edge.Confidence != ConfidenceMedium {
continue
}
candidate, exists := candidates[edge.TargetAddr]
if exists {
// Address has both prologue and is called/jumped to - highest confidence
candidate.DetectionType = DetectionPrologueCallSite
candidate.Confidence = ConfidenceHigh
if edge.Type == CallSiteCall {
candidate.CalledFrom = append(candidate.CalledFrom, edge.SourceAddr)
} else {
candidate.JumpedFrom = append(candidate.JumpedFrom, edge.SourceAddr)
}
} else {
// New candidate from call site analysis only
detType := DetectionCallTarget
if edge.Type == CallSiteJump {
detType = DetectionJumpTarget
}
calledFrom := []uint64{}
jumpedFrom := []uint64{}
if edge.Type == CallSiteCall {
calledFrom = []uint64{edge.SourceAddr}
} else {
jumpedFrom = []uint64{edge.SourceAddr}
}
candidates[edge.TargetAddr] = &FunctionCandidate{
Address: edge.TargetAddr,
DetectionType: detType,
CalledFrom: calledFrom,
JumpedFrom: jumpedFrom,
Confidence: ConfidenceMedium, // Call/jump target but no prologue
}
}
}
// Add alignment-based candidates for functions that have no prologue and
// no call-site signal (e.g. pure-leaf functions with external linkage
// that were never called due to inlining or compile-time evaluation).
//
// These receive ConfidenceLow because the pattern (ret + NOP padding →
// 16-byte aligned address) is reliable for function separators but can
// also match intra-function alignment at loop heads.
var alignedEntries []uint64
switch arch {
case ArchAMD64:
alignedEntries = detectAlignedEntriesAMD64(code, baseAddr)
case ArchARM64:
alignedEntries = detectAlignedEntriesARM64(code, baseAddr)
}
for _, addr := range alignedEntries {
if _, exists := candidates[addr]; !exists {
candidates[addr] = &FunctionCandidate{
Address: addr,
DetectionType: DetectionAlignedEntry,
Confidence: ConfidenceLow,
}
}
}
filterJumpTargetsByAnchorRange(candidates)
// Convert map to sorted slice
result := make([]FunctionCandidate, 0, len(candidates))
for _, candidate := range candidates {
result = append(result, *candidate)
}
slices.SortFunc(result, func(a, b FunctionCandidate) int {
return cmp.Compare(a.Address, b.Address)
})
return result, nil
}
// DetectFunctionsFromELF parses an ELF binary from the given reader, extracts
// the .text section, and returns detected function candidates using combined
// prologue detection, call site analysis, and alignment-based boundary
// detection, followed by FP filters (intra-function jump targets, PLT stubs).
// When .eh_frame is present, FDE entries are used as a whitelist to discard
// disassembly candidates that are not confirmed by the compiler, and any
// function entries visible only in .eh_frame are added to the result.
// The architecture is inferred from the ELF header.
//
// By default the full filter pipeline (CETFilter, EhFrameFilter, PLTFilter)
// is applied. PLTFilter runs last so that any PLT-section addresses
// reintroduced by EhFrameFilter (via FDE records for linker-generated stubs)
// are always evicted regardless of detection method.
// opts may include WithFilters to replace the default pipeline.
func DetectFunctionsFromELF(r io.ReaderAt, opts ...Option) ([]FunctionCandidate, error) {
o := &options{
filters: []CandidateFilter{CETFilter, EhFrameFilter, PLTFilter},
}
for _, opt := range opts {
opt(o)
}
f, err := elf.NewFile(r)
if err != nil {
return nil, fmt.Errorf("failed to parse ELF file: %w", err)
}
defer f.Close()
textSec := f.Section(".text")
if textSec == nil {
return nil, fmt.Errorf("no .text section found")
}
code, err := textSec.Data()
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read .text section: %w", err)
}
var arch Arch
switch f.Machine {
case elf.EM_X86_64:
arch = ArchAMD64
case elf.EM_AARCH64:
arch = ArchARM64
default:
return nil, fmt.Errorf("unsupported ELF machine: %s", f.Machine)
}
candidates, err := DetectFunctions(code, textSec.Addr, arch)
if err != nil {
return nil, err
}
for _, filter := range o.filters {
candidates, err = filter(candidates, f)
if err != nil {
return nil, err
}
}
return candidates, nil
}
// DetectPrologues analyzes raw machine code bytes and returns detected function
// prologues. baseAddr is the virtual address corresponding to the start of code.
// arch selects the architecture-specific detection logic.
// This function performs no I/O and works with any binary format.
func DetectPrologues(code []byte, baseAddr uint64, arch Arch) ([]Prologue, error) {
switch arch {
case ArchAMD64:
return detectProloguesAMD64(code, baseAddr)
case ArchARM64:
return detectProloguesARM64(code, baseAddr)
default:
return nil, fmt.Errorf("unsupported architecture: %s", arch)
}
}
func detectProloguesAMD64(code []byte, baseAddr uint64) ([]Prologue, error) {
var result []Prologue
offset := 0
addr := baseAddr
var prevInsn *x86asm.Inst
for offset < len(code) {
// Skip ENDBR64 / ENDBR32: golang.org/x/arch/x86/x86asm does not
// recognise these CET instructions. They appear at function entries
// on binaries compiled with -fcf-protection and are transparent to
// prologue detection.
if isENDBR(code, offset) {
offset += 4
addr += 4
continue // prevInsn intentionally unchanged
}
inst, err := x86asm.Decode(code[offset:], 64)
if err != nil {
offset++
addr++
prevInsn = nil
continue
}
// Pattern 1: Classic frame pointer setup - push rbp; mov rbp, rsp
if prevInsn != nil &&
prevInsn.Op == x86asm.PUSH && prevInsn.Args[0] == x86asm.RBP &&
inst.Op == x86asm.MOV && inst.Args[0] == x86asm.RBP && inst.Args[1] == x86asm.RSP {
result = append(result, Prologue{
Address: addr - uint64(prevInsn.Len),
Type: PrologueClassic,
Instructions: "push rbp; mov rbp, rsp",
})
}
// Pattern 2: No-frame-pointer function - sub rsp, imm
if inst.Op == x86asm.SUB && inst.Args[0] == x86asm.RSP {
if imm, ok := inst.Args[1].(x86asm.Imm); ok && imm > 0 {
if prevInsn == nil || prevInsn.Op == x86asm.RET || prevInsn.Op == x86asm.PUSH {
result = append(result, Prologue{
Address: addr,
Type: PrologueNoFramePointer,
Instructions: fmt.Sprintf("sub rsp, 0x%x", int64(imm)),
})
}
}
}
// Pattern 3: Push callee-saved register at function boundary
if inst.Op == x86asm.PUSH {
if reg, ok := inst.Args[0].(x86asm.Reg); ok && isCalleeSavedAMD64(reg) {
if prevInsn == nil || prevInsn.Op == x86asm.RET {
result = append(result, Prologue{
Address: addr,
Type: ProloguePushOnly,
Instructions: fmt.Sprintf("push %s", reg),
})
}
}
}
// Pattern 4: Stack allocation with lea - lea rsp, [rsp-imm]
if inst.Op == x86asm.LEA && inst.Args[0] == x86asm.RSP {
if prevInsn == nil || prevInsn.Op == x86asm.RET {
result = append(result, Prologue{
Address: addr,
Type: PrologueLEABased,
Instructions: "lea rsp, [rsp-offset]",
})
}
}
prevInsn = &inst
offset += inst.Len
addr += uint64(inst.Len)
}
return result, nil
}
func isCalleeSavedAMD64(reg x86asm.Reg) bool {
switch reg {
case x86asm.RBX, x86asm.RBP, x86asm.R12, x86asm.R13, x86asm.R14, x86asm.R15:
return true
}
return false
}
// isSTPx29x30PreIndex checks if an ARM64 instruction is stp x29, x30, [sp, #-N]!
func isSTPx29x30PreIndex(inst arm64asm.Inst) bool {
if inst.Op != arm64asm.STP {
return false
}
r0, ok0 := inst.Args[0].(arm64asm.Reg)
r1, ok1 := inst.Args[1].(arm64asm.Reg)
mem, ok2 := inst.Args[2].(arm64asm.MemImmediate)
return ok0 && ok1 && ok2 &&
r0 == arm64asm.X29 && r1 == arm64asm.X30 &&
mem.Mode == arm64asm.AddrPreIndex
}
// isMovX29SP checks if an ARM64 instruction is mov x29, sp.
// The disassembler decodes this as MOV with both args as RegSP.
func isMovX29SP(inst arm64asm.Inst) bool {
if inst.Op != arm64asm.MOV {
return false
}
r0, ok0 := inst.Args[0].(arm64asm.RegSP)
r1, ok1 := inst.Args[1].(arm64asm.RegSP)
return ok0 && ok1 && r0 == arm64asm.RegSP(arm64asm.X29) && r1 == arm64asm.RegSP(arm64asm.SP)
}
func detectProloguesARM64(code []byte, baseAddr uint64) ([]Prologue, error) {
var result []Prologue
const insnLen = 4
var prevInsn *arm64asm.Inst
for offset := 0; offset+insnLen <= len(code); offset += insnLen {
inst, err := arm64asm.Decode(code[offset : offset+insnLen])
if err != nil {
prevInsn = nil
continue
}
addr := baseAddr + uint64(offset)
if prevInsn != nil && isSTPx29x30PreIndex(*prevInsn) {
if isMovX29SP(inst) {
// Pattern 1: STP frame pair - stp x29, x30, [sp, #-N]! ; mov x29, sp
result = append(result, Prologue{
Address: addr - insnLen,
Type: PrologueSTPFramePair,
Instructions: "stp x29, x30, [sp, #-N]!; mov x29, sp",
})
} else {
// Pattern 3: STP-only - stp x29, x30, [sp, #-N]! without mov x29, sp
result = append(result, Prologue{
Address: addr - insnLen,
Type: PrologueSTPOnly,
Instructions: "stp x29, x30, [sp, #-N]!",
})
}
}
// Pattern 2: STR LR pre-index - str x30, [sp, #-N]! (Go-style prologue)
if inst.Op == arm64asm.STR {
if r0, ok := inst.Args[0].(arm64asm.Reg); ok && r0 == arm64asm.X30 {
if mem, ok := inst.Args[1].(arm64asm.MemImmediate); ok && mem.Mode == arm64asm.AddrPreIndex {
if prevInsn == nil || prevInsn.Op == arm64asm.RET {
result = append(result, Prologue{
Address: addr,
Type: PrologueSTRLRPreIndex,
Instructions: fmt.Sprintf("str x30, %s", inst.Args[1]),
})
}
}
}
}
// Pattern 3: Sub SP - sub sp, sp, #N (stack allocation without frame pointer)
if inst.Op == arm64asm.SUB {
if dst, ok := inst.Args[0].(arm64asm.RegSP); ok && dst == arm64asm.RegSP(arm64asm.SP) {
if src, ok := inst.Args[1].(arm64asm.RegSP); ok && src == arm64asm.RegSP(arm64asm.SP) {
if prevInsn == nil || prevInsn.Op == arm64asm.RET {
result = append(result, Prologue{
Address: addr,
Type: PrologueSubSP,
Instructions: fmt.Sprintf("sub sp, sp, #%s", inst.Args[2]),
})
}
}
}
}
prevInsn = &inst
}
return result, nil
}
// DetectProloguesFromELF parses an ELF binary from the given reader, extracts
// the .text section, and returns detected function prologues.
// The architecture is inferred from the ELF header.
func DetectProloguesFromELF(r io.ReaderAt) ([]Prologue, error) {
f, err := elf.NewFile(r)
if err != nil {
return nil, fmt.Errorf("failed to parse ELF file: %w", err)
}
defer f.Close()
textSec := f.Section(".text")
if textSec == nil {
return nil, fmt.Errorf("no .text section found")
}
code, err := textSec.Data()
if err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read .text section: %w", err)
}
switch f.Machine {
case elf.EM_X86_64:
return detectProloguesAMD64(code, textSec.Addr)
case elf.EM_AARCH64:
return detectProloguesARM64(code, textSec.Addr)
default:
return nil, fmt.Errorf("unsupported ELF machine: %s", f.Machine)
}
}