|
4 | 4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | 6 | // |
7 | | -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates |
| 7 | +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates |
8 | 8 | // |
9 | 9 | //===----------------------------------------------------------------------===// |
10 | 10 |
|
@@ -136,64 +136,91 @@ static void writeNBytes(int N, uint8_t *Loc, uint64_t Value) { |
136 | 136 | /// aligns the field value to the right position, selects bits from the |
137 | 137 | /// image and the patch field using a selection mask and writes the result |
138 | 138 | /// back to the section data. |
139 | | -class Patch { |
140 | | - // Workspace |
141 | | - uint64_t val[2]; |
| 139 | +class RelocationPatch { |
| 140 | + // Workspace (256-bit = 4x64-bit lanes) |
| 141 | + uint64_t val[4]; |
142 | 142 |
|
143 | 143 | public: |
144 | | - Patch() { |
| 144 | + RelocationPatch() { |
145 | 145 | val[0] = 0; |
146 | 146 | val[1] = 0; |
| 147 | + val[2] = 0; |
| 148 | + val[3] = 0; |
147 | 149 | } |
148 | | - Patch(uint64_t vl, uint64_t vh = 0) { |
| 150 | + RelocationPatch(uint64_t vl, uint64_t vh = 0) { |
149 | 151 | val[0] = vl; |
150 | 152 | val[1] = vh; |
| 153 | + val[2] = 0; |
| 154 | + val[3] = 0; |
151 | 155 | } |
152 | 156 |
|
153 | 157 | /// Construct a value from the \p n bytes in memory pointed by \p loc |
154 | | - Patch(int n, uint8_t *loc) { |
155 | | - assert(n <= 16); |
| 158 | + /// Widened to support up to 32 bytes (256 bits). |
| 159 | + RelocationPatch(int n, uint8_t *loc) { |
| 160 | + assert(n <= 32); |
156 | 161 | int i = 0; |
157 | 162 | while (n > 8) { |
158 | 163 | val[i++] = readNBytes(8, loc); |
159 | 164 | n -= 8; |
160 | 165 | loc += 8; |
161 | 166 | } |
162 | 167 | val[i] = readNBytes(n, loc); |
| 168 | + // Zero any remaining lanes |
| 169 | + while (++i < 4) |
| 170 | + val[i] = 0; |
163 | 171 | } |
164 | 172 |
|
165 | | - /// Shift \p *this left by \p shift bits |
166 | | - Patch operator<<(int shift) { |
167 | | - Patch r; |
168 | | - assert(shift < 64); |
169 | | - if (shift >= 64) { |
170 | | - r.val[1] = val[0] << (shift - 64); |
| 173 | + /// Shift \p *this left by \p shift bits (0 <= shift < 256) |
| 174 | + RelocationPatch operator<<(int shift) const { |
| 175 | + RelocationPatch r = *this; |
| 176 | + assert(shift < 256); |
| 177 | + assert(shift >= 0); |
| 178 | + if (shift == 0) { |
| 179 | + // Avoid UB: val[0] >> (64 - shift) would right-shift by 64 when shift==0. |
| 180 | + return r; |
| 181 | + } |
| 182 | + |
| 183 | + // First shift by words |
| 184 | + while (shift >= 64) { |
| 185 | + for (int i = 3; i > 0; i--) { |
| 186 | + r.val[i] = r.val[i - 1]; |
| 187 | + } |
171 | 188 | r.val[0] = 0; |
172 | | - } else { |
173 | | - r.val[0] = val[0] << shift; |
174 | | - r.val[1] = (val[0] >> (64 - shift)) | (val[1] << shift); |
| 189 | + shift -= 64; |
175 | 190 | } |
| 191 | + |
| 192 | + // Then shift remaining bits across words |
| 193 | + if (shift != 0) { |
| 194 | + const int rshift = 64 - shift; |
| 195 | + for (int i = 3; i > 0; i--) { |
| 196 | + r.val[i] = (r.val[i] << shift) | (r.val[i - 1] >> rshift); |
| 197 | + } |
| 198 | + r.val[0] = r.val[0] << shift; |
| 199 | + } |
| 200 | + |
176 | 201 | return r; |
177 | 202 | } |
178 | 203 |
|
179 | 204 | /// Patch \size bits of \p field at position \p shift in the workspace |
180 | | - void patch(Patch field, uint32_t size, uint32_t shift) { |
| 205 | + void patch(RelocationPatch field, uint32_t size, uint32_t shift) { |
181 | 206 | assert(size <= 64); |
182 | 207 | // Create a mask of the field size |
183 | | - Patch mask(~(~uint64_t(0) << size)); |
| 208 | + RelocationPatch mask(~(size == 64 ? uint64_t(0) : ~uint64_t(0) << size)); |
184 | 209 |
|
185 | 210 | // Shift both into position |
186 | 211 | field = field << shift; |
187 | 212 | mask = mask << shift; |
188 | 213 |
|
189 | | - // Do the insertion |
| 214 | + // Do the insertion across all lanes |
190 | 215 | val[0] = (val[0] & ~mask.val[0]) | (field.val[0] & mask.val[0]); |
191 | 216 | val[1] = (val[1] & ~mask.val[1]) | (field.val[1] & mask.val[1]); |
| 217 | + val[2] = (val[2] & ~mask.val[2]) | (field.val[2] & mask.val[2]); |
| 218 | + val[3] = (val[3] & ~mask.val[3]) | (field.val[3] & mask.val[3]); |
192 | 219 | } |
193 | 220 |
|
194 | 221 | /// Write the \p n bytes patch back to memory at location \p loc |
195 | 222 | void write(int n, uint8_t *loc) { |
196 | | - assert(n <= 16); |
| 223 | + assert(n <= 32); |
197 | 224 | int i = 0; |
198 | 225 | while (n > 8) { |
199 | 226 | writeNBytes(8, loc, val[i++]); |
@@ -230,14 +257,14 @@ static void patchNBytes(uint32_t N, uint8_t *Loc, uint64_t V, uint32_t Hi, |
230 | 257 | assert(Pos + FieldSize <= BitSize); |
231 | 258 |
|
232 | 259 | // Read bytes to be patched in wide representation |
233 | | - Patch Image(N, Loc); |
| 260 | + RelocationPatch Image(N, Loc); |
234 | 261 |
|
235 | 262 | // Pos is the msb, the shift count needs the lsb |
236 | | - uint32_t Shift = BitSize - Pos - FieldSize; |
| 263 | + const uint32_t Shift = BitSize - Pos - FieldSize; |
237 | 264 |
|
238 | 265 | // Align field with bit 0, and put it in a wide representation. |
239 | 266 | // Excess high bits will be masked off when patching |
240 | | - Patch Field(V >> Lo); |
| 267 | + RelocationPatch Field(V >> Lo); |
241 | 268 |
|
242 | 269 | // Patch it |
243 | 270 | Image.patch(Field, FieldSize, Shift); |
|
0 commit comments