Skip to content

Commit 1aa1e96

Browse files
committed
fix: function cloning on Darwin
- On Darwin, use MAP_JIT when allocating memory for cloned functions and call `pthread_jit_write_protect_np()` as needed. - On arm64, try to reserve memory within 128 MiB of the text/data segments, but allow addresses up to 4 GiB away. When a BL address is more than 128 MiB away insert a trampoline at the end of the function to branch to the fixed address. - On amd64, the search code still still applies, but we only need to be within range of a 32-bit signed int.
1 parent 4e31eb5 commit 1aa1e96

14 files changed

+256
-50
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Test
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
test:
11+
#if: ${{ 0 == 1}} # Disable for now since Darwin on arm64 is still broken
12+
name: Test Darwin (go ${{ matrix.go }}, ${{ matrix.host }})
13+
runs-on: ${{ matrix.host }}
14+
timeout-minutes: 10
15+
strategy:
16+
matrix:
17+
#go: ['1.25', '1.26']
18+
go: ['1.26']
19+
host: ['macos-latest']
20+
21+
steps:
22+
- name: Checkout code
23+
uses: actions/checkout@v4
24+
25+
- name: Setup Go
26+
uses: actions/setup-go@v5
27+
with:
28+
go-version: ${{ matrix.go }}
29+
cache: true
30+
31+
- name: Run tests (buildmode=exe)
32+
run: |
33+
go test -buildmode=exe -run 'TestCloneFunc'
34+
35+
- name: Run tests (buildmode=pie)
36+
run: |
37+
go test -buildmode=pie -run 'TestCloneFunc'

asm_amd64.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ const (
1919
opcodeJMP = 0xe9 // JMP rel32
2020
)
2121

22-
// The maximum acceptable distance from the text and data segments.
22+
// Cloned functions need to be within range of a signed 32-bit JMP.
23+
const idealCloneDistance = 0
2324
const maxCloneDistance = math.MaxInt32
2425

2526
func insertJump(buf []byte, dest uintptr) error {

asm_arm64.go

Lines changed: 100 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@ const (
2222
// -----------------------------------
2323
_BL = uint32(1<<31 | _B)
2424

25+
// ----------------------------------------------
26+
// | 1101011000111111000000 | 5-bit reg | 00000 |
27+
// ----------------------------------------------
28+
_BLR = uint32(0xd63f0000)
29+
30+
// -----------------------------------------------------------
31+
// | 1-bit sf | 10100101 | 2-bit hw | 16-bit imm | 5-bit reg |
32+
// -----------------------------------------------------------
33+
_MOVZ = uint32(0xd2800000) // sf is 1
34+
35+
// -----------------------------------------------------------
36+
// | 1-bit sf | 11100101 | 2-bit hw | 16-bit imm | 5-bit reg |
37+
// -----------------------------------------------------------
38+
_MOVK = uint32(0xf2800000) // sf is 1
39+
2540
// ADR/ADRP is encoded as:
2641
// --------------------------------------------------
2742
// | P | lo 2 bits | 10000 | hi 19 bits | 5-bit reg |
@@ -30,8 +45,15 @@ const (
3045
adrAddressMask = uint32(3<<29 | 0x7ffff<<5)
3146
)
3247

33-
// The maximum acceptable distance from the text and data segments.
34-
const maxCloneDistance = 128 * 1024 * 1024
48+
const scratchRegister = 16
49+
50+
// Ideally, cloned functions will be within 128 MiB of the original function.
51+
// But it's acceptable to be within the 4 GiB range for ADRP because there's code
52+
// to generate trampolines for BLs.
53+
const idealCloneDistance = 0 //128 * 1024 * 1024
54+
const maxCloneDistance = 4 * 1024 * 1024 * 1024
55+
56+
var errAddressOutOfRange = errors.New("address out of range")
3557

3658
func insertJump(buf []byte, dest uintptr) error {
3759
if len(buf) < 4 {
@@ -45,8 +67,7 @@ func insertJump(buf []byte, dest uintptr) error {
4567
return fmt.Errorf("B target out of range: %d bytes exceeds 128MiB", offset)
4668
}
4769

48-
inst := _B | (uint32(offset>>2) & (1<<26 - 1))
49-
binary.LittleEndian.PutUint32(buf, inst)
70+
encodeB(buf, uint32(offset))
5071

5172
// Pad the rest of the buffer with nulls
5273
for i := 4; i < len(buf); i++ {
@@ -62,6 +83,7 @@ func insertJump(buf []byte, dest uintptr) error {
6283
// The data underlying the slices is assumed to be the same address the code
6384
// would execute from.
6485
func relocateFunc(src, dest []byte) ([]byte, error) {
86+
src = trimPadding(src)
6587
dest = dest[:len(src)]
6688
copy(dest, src)
6789

@@ -83,7 +105,15 @@ func relocateFunc(src, dest []byte) ([]byte, error) {
83105
if _, ok := arg.(arm64asm.PCRel); ok {
84106
err = fixPCRelAddress(instruction, srcPC, raw)
85107
if err != nil {
86-
return nil, err
108+
if errors.Is(err, errAddressOutOfRange) && instruction.Op == arm64asm.BL {
109+
var trErr error
110+
dest, trErr = makeBLTrampoline(instruction, srcPC, dest, i)
111+
if trErr != nil {
112+
return nil, fmt.Errorf("unable to make trampoline: %w (original error: %w)", trErr, err)
113+
}
114+
} else {
115+
return nil, err
116+
}
87117
}
88118
}
89119
}
@@ -93,6 +123,17 @@ func relocateFunc(src, dest []byte) ([]byte, error) {
93123
return dest, nil
94124
}
95125

126+
func trimPadding(buf []byte) []byte {
127+
newLen := len(buf)
128+
for i := len(buf) - 4; i >= 0; i -= 4 {
129+
if bytes.Equal(buf[i:i+4], []byte{0, 0, 0, 0}) {
130+
newLen = i
131+
}
132+
}
133+
134+
return buf[:newLen]
135+
}
136+
96137
func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
97138
destPC := uintptr(unsafe.Pointer(unsafe.SliceData(dest)))
98139

@@ -105,12 +146,12 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
105146
newOffsetPages := (int64(srcPC&^uintptr(0xfff)) + oldOffset - int64(destPC&^uintptr(0xfff))) >> 12
106147

107148
if newOffsetPages < -(1<<20) || newOffsetPages >= (1<<20) {
108-
return fmt.Errorf("ADRP target out of range: %d pages exceeds 4GiB", newOffsetPages)
149+
return fmt.Errorf("%w: ADRP target out of range: %d pages exceeds 4GiB", errAddressOutOfRange, newOffsetPages)
109150
}
110151

111152
p := uint32(newOffsetPages)
112153
encoded := binary.LittleEndian.Uint32(dest) &^ adrAddressMask
113-
encoded |= (p & 3) << 29 // Lowest 2 bits to bits 30 and 29
154+
encoded |= (p & 3) << 29 // Lowest 2 bits to bits 30 and 29
114155
encoded |= ((p >> 2) & 0x7ffff) << 5 // Highest 19 bits to bits 23 to 5
115156
binary.LittleEndian.PutUint32(dest, encoded)
116157

@@ -120,7 +161,7 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
120161

121162
// BL encodes a 26-bit signed instruction offset.
122163
if offset < -(1<<27) || offset >= (1<<27) {
123-
return fmt.Errorf("BL target out of range: %d bytes exceeds 128MiB", offset)
164+
return fmt.Errorf("%w: BL target out of range: %d bytes exceeds 128MiB", errAddressOutOfRange, offset)
124165
}
125166

126167
binary.LittleEndian.PutUint32(dest, _BL|(uint32(offset>>2)&(1<<26-1)))
@@ -133,6 +174,57 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
133174
return nil
134175
}
135176

177+
func makeBLTrampoline(inst arm64asm.Inst, srcPC uintptr, dest []byte, blOffset int) ([]byte, error) {
178+
if cap(dest)-len(dest) < 24 {
179+
return nil, errors.New("destination is too small for BL trampoline")
180+
}
181+
origLen := len(dest)
182+
dest = dest[:len(dest)+24]
183+
184+
//destPC := uintptr(unsafe.Pointer(unsafe.SliceData(dest))) + uintptr(blOffset)
185+
blrTarget := uintptr(int64(srcPC) + int64(inst.Args[0].(arm64asm.PCRel)))
186+
187+
// Encode the trampoline itself.
188+
// It uses 4 instructions to store a 64-bit number in x16, then calls BLR x16.
189+
trampoline := dest[origLen:]
190+
encodeMov(trampoline, true, 0, uint16(blrTarget), scratchRegister)
191+
encodeMov(trampoline[4:], false, 16, uint16(blrTarget>>16), scratchRegister)
192+
encodeMov(trampoline[8:], false, 32, uint16(blrTarget>>32), scratchRegister)
193+
encodeMov(trampoline[12:], false, 48, uint16(blrTarget>>48), scratchRegister)
194+
binary.LittleEndian.PutUint32(trampoline[16:], uint32(_BLR|uint32(scratchRegister<<5)))
195+
196+
// Replace the original BL with a B to the beginning of the trampoline
197+
blAddr := uintptr(unsafe.Pointer(unsafe.SliceData(dest))) + uintptr(blOffset)
198+
trampolineAddr := uintptr(unsafe.Pointer(unsafe.SliceData(trampoline)))
199+
encodeB(dest[blOffset:], uint32(int32(trampolineAddr)-int32(blAddr)))
200+
201+
// The last instruction in the trampoline needs to jump back to the
202+
// instruction after the original BL
203+
encodeB(trampoline[20:], uint32(int32(blAddr+4)-int32(trampolineAddr+20)))
204+
205+
return dest, nil
206+
}
207+
208+
func encodeB(dest []byte, offset uint32) {
209+
inst := _B | (uint32(offset>>2) & 0x3ffffff)
210+
binary.LittleEndian.PutUint32(dest, inst)
211+
}
212+
213+
func encodeMov(dest []byte, zero bool, lsl uint8, imm uint16, register uint8) {
214+
var mov uint32
215+
if zero {
216+
mov = _MOVZ
217+
} else {
218+
mov = _MOVK
219+
}
220+
221+
mov |= (uint32(lsl>>4) & 3) << 21
222+
mov |= uint32(imm) << 5
223+
mov |= uint32(register & 0x1f)
224+
225+
binary.LittleEndian.PutUint32(dest, mov)
226+
}
227+
136228
func disassemble(code []byte) (string, error) {
137229
var buf bytes.Buffer
138230

clone.go

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func cloneFunc[T any](fn T) (*clonedFunc[T], error) {
2929
cloneAllocator.BeginMutate()
3030
defer cloneAllocator.EndMutate()
3131

32-
newCode, err := cloneAllocator.Allocate(len(originalCode))
32+
newCode, err := cloneAllocator.Allocate(len(originalCode) * 2)
3333
if err != nil {
3434
return nil, err
3535
}
@@ -83,7 +83,7 @@ func (a *allocator) init(startSize int) error {
8383
}
8484

8585
if protBE, ok := be.(malloc.ProtectedArenaBackend); ok {
86-
a.mprotect = protBE.Protect
86+
a.mprotect = mprotectHook(protBE.Protect)
8787
} else {
8888
// No real mprotect for some reason. This shouldn't
8989
// really happen, but continue with a no-op mprotect.
@@ -126,35 +126,71 @@ func initMallocBackend() (malloc.ArenaBackend, error) {
126126
// instructions that the original function used. There's often enough
127127
// space right before the text segment but that's not guaranteed
128128
// (particularly when buildmode=pie).
129+
// The distances are calculated from the furthest address in the
130+
// text/data segment so that every address in the reserved block can
131+
// reach every address in the target.
132+
133+
// If there's an ideal range for the architecture, try that first.
134+
if idealCloneDistance > 0 {
135+
// Search before text
136+
minAddress := end - idealCloneDistance
137+
if minAddress > end || minAddress < absMinAddress {
138+
minAddress = absMinAddress
139+
}
140+
be := tryBackendRange(size, minAddress, text-pageSize-size)
141+
if be != nil {
142+
return be, nil
143+
}
144+
145+
// Search after end
146+
maxAddress := text + idealCloneDistance - size
147+
if maxAddress < text {
148+
maxAddress = math.MaxUint
149+
}
150+
be = tryBackendRange(size, end, maxAddress)
151+
if be != nil {
152+
return be, nil
153+
}
154+
}
129155

130-
// The minimum acceptable address is where the first
131-
// instruction in the code segment can still reach the final
132-
// address before end. These are unsigned so watch for wrap-around.
156+
// Nothing in the ideal range, so search within the acceptable range
133157
minAddress := end - maxCloneDistance
134158
if minAddress > end || minAddress < absMinAddress {
135159
minAddress = absMinAddress
136160
}
137-
for addr := text - pageSize - size; addr >= minAddress; addr -= 0x100000 {
138-
be, err := malloc.VirtBackend(size, malloc.MmapAddr(addr), malloc.MmapProt(mprotectExec), malloc.MmapFlags(_MAP_FIXED_NOREPLACE))
139-
if err == nil {
140-
return be, nil
141-
}
161+
be := tryBackendRange(size, minAddress, text-pageSize-size)
162+
if be != nil {
163+
return be, nil
142164
}
143165

144-
// Nothing was found before the text segment, repeat the process for
145-
// the space after end.
146166
maxAddress := text + maxCloneDistance - size
147167
if maxAddress < text {
148168
maxAddress = math.MaxUint
149169
}
150-
for addr := end; addr <= maxAddress; addr += 0x100000 {
151-
be, err := malloc.VirtBackend(size, malloc.MmapAddr(addr), malloc.MmapProt(mprotectExec), malloc.MmapFlags(_MAP_FIXED_NOREPLACE))
170+
be = tryBackendRange(size, end, maxAddress)
171+
if be != nil {
172+
return be, nil
173+
}
174+
175+
// Well, we tried. We tried really hard. There's nothing left to do but
176+
// take whatever address the OS gives us.
177+
return malloc.VirtBackend(size, malloc.MmapAddr(minAddress), malloc.MmapProt(mprotectExec), malloc.MmapFlags(_MMAP_FLAGS))
178+
}
179+
180+
func tryBackendRange(size, minAddress, maxAddress uintptr) malloc.ArenaBackend {
181+
for addr := minAddress; addr <= maxAddress; addr += 0x100000 {
182+
be, err := malloc.VirtBackend(size, malloc.MmapAddr(addr), malloc.MmapProt(mprotectExec), malloc.MmapFlags(_MMAP_FLAGS))
152183
if err == nil {
153-
return be, nil
184+
if be.Addr() < minAddress || be.Addr() > maxAddress {
185+
// No good, try again.
186+
be.Release()
187+
} else {
188+
return be
189+
}
154190
}
155191
}
156192

157-
return nil, errors.New("no suitable virtual memory space found")
193+
return nil
158194
}
159195

160196
func (a *allocator) BeginMutate() error {

clone_mprotect.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
//go:build !(darwin && arm64)
2+
3+
package redefine
4+
5+
func mprotectHook(inner func(int) error) func(int) error {
6+
return inner
7+
}

clone_mprotect_darwin.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//go:build darwin && arm64
2+
3+
package redefine
4+
5+
import "golang.org/x/sys/unix"
6+
7+
/*
8+
#include <pthread.h>
9+
*/
10+
import "C"
11+
12+
func mprotectHook(inner func(int) error) func(int) error {
13+
return func(prot int) error {
14+
if prot&unix.PROT_WRITE != 0 {
15+
C.pthread_jit_write_protect_np(0)
16+
} else {
17+
C.pthread_jit_write_protect_np(1)
18+
}
19+
20+
err := inner(prot)
21+
22+
// Restore write protection after an error
23+
if err != nil && prot&unix.PROT_WRITE != 0 {
24+
C.pthread_jit_write_protect_np(1)
25+
}
26+
27+
return err
28+
}
29+
}

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ module github.com/pboyd/redefine
33
go 1.25.0
44

55
require (
6-
github.com/pboyd/malloc v1.2.0
6+
github.com/pboyd/malloc v1.2.1
77
github.com/stretchr/testify v1.11.1
88
golang.org/x/arch v0.23.0
9+
golang.org/x/sys v0.41.0
910
)
1011

1112
require (
1213
github.com/davecgh/go-spew v1.1.1 // indirect
1314
github.com/pmezard/go-difflib v1.0.0 // indirect
1415
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect
15-
golang.org/x/sys v0.41.0 // indirect
1616
gopkg.in/yaml.v3 v3.0.1 // indirect
1717
)

0 commit comments

Comments
 (0)