@@ -22,6 +22,21 @@ const (
2222 // -----------------------------------
2323 _BL = uint32 (1 << 31 | _B )
2424
25+ // ----------------------------------------------
26+ // | 1101011000111111000000 | 5-bit reg | 00000 |
27+ // ----------------------------------------------
28+ _BLR = uint32 (0xd63f0000 )
29+
30+ // -----------------------------------------------------------
31+ // | 1-bit sf | 10100101 | 2-bit hw | 16-bit imm | 5-bit reg |
32+ // -----------------------------------------------------------
33+ _MOVZ = uint32 (0xd2800000 ) // sf is 1
34+
35+ // -----------------------------------------------------------
36+ // | 1-bit sf | 11100101 | 2-bit hw | 16-bit imm | 5-bit reg |
37+ // -----------------------------------------------------------
38+ _MOVK = uint32 (0xf2800000 ) // sf is 1
39+
2540 // ADR/ADRP is encoded as:
2641 // --------------------------------------------------
2742 // | P | lo 2 bits | 10000 | hi 19 bits | 5-bit reg |
@@ -30,8 +45,15 @@ const (
3045 adrAddressMask = uint32 (3 << 29 | 0x7ffff << 5 )
3146)
3247
33- // The maximum acceptable distance from the text and data segments.
34- const maxCloneDistance = 128 * 1024 * 1024
48+ const scratchRegister = 16
49+
50+ // Ideally, cloned functions will be within 128 MiB of the original function.
51+ // But it's acceptable to be within the 4 GiB range for ADRP because there's code
52+ // to generate trampolines for BLs.
53+ const idealCloneDistance = 0 //128 * 1024 * 1024
54+ const maxCloneDistance = 4 * 1024 * 1024 * 1024
55+
56+ var errAddressOutOfRange = errors .New ("address out of range" )
3557
3658func insertJump (buf []byte , dest uintptr ) error {
3759 if len (buf ) < 4 {
@@ -45,8 +67,7 @@ func insertJump(buf []byte, dest uintptr) error {
4567 return fmt .Errorf ("B target out of range: %d bytes exceeds 128MiB" , offset )
4668 }
4769
48- inst := _B | (uint32 (offset >> 2 ) & (1 << 26 - 1 ))
49- binary .LittleEndian .PutUint32 (buf , inst )
70+ encodeB (buf , uint32 (offset ))
5071
5172 // Pad the rest of the buffer with nulls
5273 for i := 4 ; i < len (buf ); i ++ {
@@ -62,6 +83,7 @@ func insertJump(buf []byte, dest uintptr) error {
6283// The data underlying the slices is assumed to be the same address the code
6384// would execute from.
6485func relocateFunc (src , dest []byte ) ([]byte , error ) {
86+ src = trimPadding (src )
6587 dest = dest [:len (src )]
6688 copy (dest , src )
6789
@@ -83,7 +105,15 @@ func relocateFunc(src, dest []byte) ([]byte, error) {
83105 if _ , ok := arg .(arm64asm.PCRel ); ok {
84106 err = fixPCRelAddress (instruction , srcPC , raw )
85107 if err != nil {
86- return nil , err
108+ if errors .Is (err , errAddressOutOfRange ) && instruction .Op == arm64asm .BL {
109+ var trErr error
110+ dest , trErr = makeBLTrampoline (instruction , srcPC , dest , i )
111+ if trErr != nil {
112+ return nil , fmt .Errorf ("unable to make trampoline: %w (original error: %w)" , trErr , err )
113+ }
114+ } else {
115+ return nil , err
116+ }
87117 }
88118 }
89119 }
@@ -93,6 +123,17 @@ func relocateFunc(src, dest []byte) ([]byte, error) {
93123 return dest , nil
94124}
95125
126+ func trimPadding (buf []byte ) []byte {
127+ newLen := len (buf )
128+ for i := len (buf ) - 4 ; i >= 0 ; i -= 4 {
129+ if bytes .Equal (buf [i :i + 4 ], []byte {0 , 0 , 0 , 0 }) {
130+ newLen = i
131+ }
132+ }
133+
134+ return buf [:newLen ]
135+ }
136+
96137func fixPCRelAddress (inst arm64asm.Inst , srcPC uintptr , dest []byte ) error {
97138 destPC := uintptr (unsafe .Pointer (unsafe .SliceData (dest )))
98139
@@ -105,12 +146,12 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
105146 newOffsetPages := (int64 (srcPC &^uintptr (0xfff )) + oldOffset - int64 (destPC &^uintptr (0xfff ))) >> 12
106147
107148 if newOffsetPages < - (1 << 20 ) || newOffsetPages >= (1 << 20 ) {
108- return fmt .Errorf ("ADRP target out of range: %d pages exceeds 4GiB" , newOffsetPages )
149+ return fmt .Errorf ("%w: ADRP target out of range: %d pages exceeds 4GiB" , errAddressOutOfRange , newOffsetPages )
109150 }
110151
111152 p := uint32 (newOffsetPages )
112153 encoded := binary .LittleEndian .Uint32 (dest ) &^ adrAddressMask
113- encoded |= (p & 3 ) << 29 // Lowest 2 bits to bits 30 and 29
154+ encoded |= (p & 3 ) << 29 // Lowest 2 bits to bits 30 and 29
114155 encoded |= ((p >> 2 ) & 0x7ffff ) << 5 // Highest 19 bits to bits 23 to 5
115156 binary .LittleEndian .PutUint32 (dest , encoded )
116157
@@ -120,7 +161,7 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
120161
121162 // BL encodes a 26-bit signed instruction offset.
122163 if offset < - (1 << 27 ) || offset >= (1 << 27 ) {
123- return fmt .Errorf ("BL target out of range: %d bytes exceeds 128MiB" , offset )
164+ return fmt .Errorf ("%w: BL target out of range: %d bytes exceeds 128MiB" , errAddressOutOfRange , offset )
124165 }
125166
126167 binary .LittleEndian .PutUint32 (dest , _BL | (uint32 (offset >> 2 )& (1 << 26 - 1 )))
@@ -133,6 +174,57 @@ func fixPCRelAddress(inst arm64asm.Inst, srcPC uintptr, dest []byte) error {
133174 return nil
134175}
135176
177+ func makeBLTrampoline (inst arm64asm.Inst , srcPC uintptr , dest []byte , blOffset int ) ([]byte , error ) {
178+ if cap (dest )- len (dest ) < 24 {
179+ return nil , errors .New ("destination is too small for BL trampoline" )
180+ }
181+ origLen := len (dest )
182+ dest = dest [:len (dest )+ 24 ]
183+
184+ //destPC := uintptr(unsafe.Pointer(unsafe.SliceData(dest))) + uintptr(blOffset)
185+ blrTarget := uintptr (int64 (srcPC ) + int64 (inst .Args [0 ].(arm64asm.PCRel )))
186+
187+ // Encode the trampoline itself.
188+ // It uses 4 instructions to store a 64-bit number in x16, then calls BLR x16.
189+ trampoline := dest [origLen :]
190+ encodeMov (trampoline , true , 0 , uint16 (blrTarget ), scratchRegister )
191+ encodeMov (trampoline [4 :], false , 16 , uint16 (blrTarget >> 16 ), scratchRegister )
192+ encodeMov (trampoline [8 :], false , 32 , uint16 (blrTarget >> 32 ), scratchRegister )
193+ encodeMov (trampoline [12 :], false , 48 , uint16 (blrTarget >> 48 ), scratchRegister )
194+ binary .LittleEndian .PutUint32 (trampoline [16 :], uint32 (_BLR | uint32 (scratchRegister << 5 )))
195+
196+ // Replace the original BL with a B to the beginning of the trampoline
197+ blAddr := uintptr (unsafe .Pointer (unsafe .SliceData (dest ))) + uintptr (blOffset )
198+ trampolineAddr := uintptr (unsafe .Pointer (unsafe .SliceData (trampoline )))
199+ encodeB (dest [blOffset :], uint32 (int32 (trampolineAddr )- int32 (blAddr )))
200+
201+ // The last instruction in the trampoline needs to jump back to the
202+ // instruction after the original BL
203+ encodeB (trampoline [20 :], uint32 (int32 (blAddr + 4 )- int32 (trampolineAddr + 20 )))
204+
205+ return dest , nil
206+ }
207+
208+ func encodeB (dest []byte , offset uint32 ) {
209+ inst := _B | (uint32 (offset >> 2 ) & 0x3ffffff )
210+ binary .LittleEndian .PutUint32 (dest , inst )
211+ }
212+
213+ func encodeMov (dest []byte , zero bool , lsl uint8 , imm uint16 , register uint8 ) {
214+ var mov uint32
215+ if zero {
216+ mov = _MOVZ
217+ } else {
218+ mov = _MOVK
219+ }
220+
221+ mov |= (uint32 (lsl >> 4 ) & 3 ) << 21
222+ mov |= uint32 (imm ) << 5
223+ mov |= uint32 (register & 0x1f )
224+
225+ binary .LittleEndian .PutUint32 (dest , mov )
226+ }
227+
136228func disassemble (code []byte ) (string , error ) {
137229 var buf bytes.Buffer
138230
0 commit comments