Skip to content

Commit 84da810

Browse files
committed
feat: implement function cloning for arm64
1 parent ad3f993 commit 84da810

File tree

8 files changed

+125
-19
lines changed

8 files changed

+125
-19
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ It's 5:00 PM somewhere
3636
| Linux | amd64 | Full | |
3737
| Windows | amd64 | Full | |
3838
| Darwin (macOS) | amd64 | Full | |
39-
| Linux | arm64 | Partial | `redefine.Func` and `redefine.Restore` work |
39+
| Linux | arm64 | Full | |
4040
| FreeBSD | amd64 | Untested | Compiles but untested |
4141
| OpenBSD | amd64 | Untested | Compiles but untested |
4242
| NetBSD | amd64 | Untested | Compiles but untested |
43-
| Windows | arm64 | Untested | No build environment available |
43+
| Windows | arm64 | Untested | |
4444
| Darwin (macOS) | arm64 | Broken | `mprotect` returns EACCES |
4545

4646
## FAQ

asm_amd64.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ const (
1919
opcodeJMP = 0xe9 // JMP rel32
2020
)
2121

22+
// The maximum acceptable distance from the text and data segments.
23+
const maxCloneDistance = math.MaxInt32
24+
2225
func insertJump(buf []byte, dest uintptr) error {
2326
const instructionSize = 5 // 1 byte opcode + 4 byte address
2427

@@ -45,12 +48,10 @@ func insertJump(buf []byte, dest uintptr) error {
4548
}
4649

4750
// relocateFunc copies machine instructions from src into dest translating
48-
// relative instructions as it goes. dest must be larger than src.
51+
// relative instructions as it goes. dest must be at least as large as src.
4952
//
5053
// The data underlying the slices is assumed to be the same address the code
5154
// would execute from.
52-
//
53-
// The dest slice is returned after being resized.
5455
func relocateFunc(src, dest []byte) ([]byte, error) {
5556
dest = dest[:len(src)]
5657

asm_arm64.go

Lines changed: 113 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,38 @@
11
package redefine
22

33
import (
4+
"bytes"
45
"encoding/binary"
6+
"encoding/hex"
57
"errors"
8+
"fmt"
69
"unsafe"
10+
11+
"golang.org/x/arch/arm64/arm64asm"
12+
)
13+
14+
const (
15+
// -----------------------------------
16+
// | 000101 | ... 26 bit address ... |
17+
// -----------------------------------
18+
_B = uint32(5 << 26)
19+
20+
// -----------------------------------
21+
// | 100101 | ... 26 bit address ... |
22+
// -----------------------------------
23+
_BL = uint32(1<<31 | _B)
24+
25+
// ADR/ADRP is encoded as:
26+
// --------------------------------------------------
27+
// | P | lo 2 bits | 10000 | hi 19 bits | 5-bit reg |
28+
// --------------------------------------------------
29+
// Mask for the address:
30+
adrAddressMask = uint32(3<<29 | 0x7ffff<<5)
731
)
832

33+
// The maximum acceptable distance from the text and data segments.
34+
const maxCloneDistance = 128 * 1024 * 1024
35+
936
func insertJump(buf []byte, dest uintptr) error {
1037
if len(buf) < 4 {
1138
return errors.New("buffer too small")
@@ -14,11 +41,7 @@ func insertJump(buf []byte, dest uintptr) error {
1441
addr := uintptr(unsafe.Pointer(unsafe.SliceData(buf)))
1542
offset := int32(dest - addr)
1643

17-
// Encode the instruction:
18-
// -----------------------------------
19-
// | 000101 | ... 26 bit address ... |
20-
// -----------------------------------
21-
inst := (5 << 26) | (uint32(offset>>2) & (1<<26 - 1))
44+
inst := _B | (uint32(offset>>2) & (1<<26 - 1))
2245
binary.LittleEndian.PutUint32(buf, inst)
2346

2447
// Pad the rest of the buffer with nulls
@@ -28,3 +51,88 @@ func insertJump(buf []byte, dest uintptr) error {
2851

2952
return nil
3053
}
54+
55+
// relocateFunc copies machine instructions from src into dest translating
56+
// relative instructions as it goes. dest must be at least as large as src.
57+
//
58+
// The data underlying the slices is assumed to be the same address the code
59+
// would execute from.
60+
func relocateFunc(src, dest []byte) ([]byte, error) {
61+
dest = dest[:len(src)]
62+
copy(dest, src)
63+
64+
srcPC := uintptr(unsafe.Pointer(unsafe.SliceData(src)))
65+
66+
for i := 0; i < len(src); i += 4 {
67+
srcPC += 4
68+
raw := dest[i : i+4]
69+
70+
instruction, err := arm64asm.Decode(raw)
71+
if err != nil {
72+
// Stop if the bad instruction was padding
73+
if bytes.Equal(raw, []byte{0, 0, 0, 0}) {
74+
break
75+
}
76+
return nil, fmt.Errorf("decode error at offset %d %v: %w", i, raw, err)
77+
}
78+
79+
for _, arg := range instruction.Args {
80+
if _, ok := arg.(arm64asm.PCRel); ok {
81+
err = fixPCRelAddress(instruction, srcPC, raw)
82+
if err != nil {
83+
return nil, err
84+
}
85+
}
86+
}
87+
}
88+
89+
return dest, nil
90+
}
91+
92+
func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
93+
destPC := uintptr(unsafe.Pointer(unsafe.SliceData(dest)))
94+
95+
switch inst.Op {
96+
case arm64asm.ADRP:
97+
// Get the offset (arm64asm converts it to bytes)
98+
oldOffset := int64(inst.Args[1].(arm64asm.PCRel))
99+
100+
// Calculate the offset in bytes, then divide it by 4096 to get it in pages
101+
newOffsetPages := uint32((int64(srcPC)+oldOffset-int64(destPC))>>12) + 1
102+
103+
encoded := binary.LittleEndian.Uint32(dest) &^ adrAddressMask
104+
encoded |= (newOffsetPages & 3) << 29 // Lowest 2 bits to bits 30 and 29
105+
encoded |= (newOffsetPages >> 2) << 5 // Highest 19 bits to bits 23 to 5
106+
binary.LittleEndian.PutUint32(dest, encoded)
107+
108+
case arm64asm.BL:
109+
oldOffset := int64(inst.Args[0].(arm64asm.PCRel))
110+
offset := int64(srcPC) + oldOffset - int64(destPC)
111+
binary.LittleEndian.PutUint32(dest, _BL|(uint32(offset>>2)&(1<<26-1)))
112+
113+
default:
114+
// Most PC-relative addresses are local. Go only seems to
115+
// generate ADRP and BL that are external to the function.
116+
}
117+
118+
return nil
119+
}
120+
121+
func disassemble(code []byte) (string, error) {
122+
var buf bytes.Buffer
123+
124+
baseAddr := uintptr(unsafe.Pointer(unsafe.SliceData(code)))
125+
126+
for i := 0; i < len(code); i += 4 {
127+
var asm string
128+
instruction, err := arm64asm.Decode(code[i:])
129+
if err == nil {
130+
asm = instruction.String()
131+
} else {
132+
asm = "?"
133+
}
134+
fmt.Fprintf(&buf, "0x%08x\t%-20s\t%s\n", baseAddr+uintptr(i), hex.EncodeToString(code[i:i+4]), asm)
135+
}
136+
137+
return buf.String(), nil
138+
}

clone.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ func (a *allocator) init(startSize int) error {
7272
return err
7373
}
7474

75-
// The maximum acceptable distance from the text and data segments.
76-
// This is the value from amd64, arm64 will be less.
77-
const maxCloneDistance = math.MaxInt32
78-
7975
// The lowest address to consider for our cloned functions.
8076
const absMinAddress = 0x100000
8177

clone_fallback.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build !amd64
1+
//go:build !amd64 && !arm64
22

33
package redefine
44

clone_amd64.go renamed to clone_full.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build amd64
1+
//go:build amd64 || arm64
22

33
package redefine
44

@@ -25,6 +25,8 @@ func _cloneFunc[T any](fn T, originalCode []byte) (*clonedFunc[T], error) {
2525
return nil, err
2626
}
2727

28+
cacheflush(newCode)
29+
2830
//fmt.Println(disassemble(newCode))
2931

3032
// This seems too complicated. The idea is to take our newly allocated

clone_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build amd64
1+
//go:build amd64 || arm64
22

33
package redefine
44

doc.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
// is a fun experiment, but do not use it for production code.
66
//
77
// This project is fundamentally non-portable. OS/Arch support:
8-
// - Full support: Linux/amd64, Windows/amd64, Darwin/amd64
9-
// - Partial support: Linux/arm64 (redefine.Func and redefine.Restore work)
8+
// - Full support: Linux/amd64, Windows/amd64, Darwin/amd64, Linux/arm64
109
// - Might work (untested, but it compiles): FreeBSD/amd64, OpenBSD/amd64, NetBSD/amd64
1110
// - Also might work: Windows/arm64 (I lack a working build environment)
1211
// - Known broken: Darwin/arm64 (EACCES errors from mprotect)

0 commit comments

Comments
 (0)