Skip to content

Commit 3ca3e07

Browse files
lucylqfacebook-github-bot
authored andcommitted
Introduce ET_ALIGN_ALLOC for portable aligned alloc (pytorch#10660)
Summary: Pull Request resolved: pytorch#10660 Issue with aligned buffers: P1800967583 The alignment requested is 16, and std::max_align_t is also 16. This means we do not need to pad the size to meet any alignment. However, the buffer we get from malloc is aligned to 8, not 16. When we try to align the buffer, we overflow and error out. Seems like malloc is not guaranteed to return 8 or 16 byte-aligned buffers, so also a bit hard to test definitively. So far we've only seen this when the buffer size is small (size 2, 4) ``` The malloc(), calloc(), realloc(), and reallocarray() functions return a pointer to the allocated memory, which is suitably aligned for any type that fits into the requested size or less. ``` Use std::aligned_alloc (C++17) to ensure buffer is aligned. Reviewed By: larryliu0820, mcr229 Differential Revision: D74041198
1 parent 94f7b10 commit 3ca3e07

File tree

2 files changed

+102
-57
lines changed

2 files changed

+102
-57
lines changed

extension/data_loader/file_data_loader.cpp

+12-57
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,6 @@ namespace {
4949
static bool is_power_of_2(size_t value) {
5050
return value > 0 && (value & ~(value - 1)) == value;
5151
}
52-
53-
/**
54-
* Returns the next alignment for a given pointer.
55-
*/
56-
static uint8_t* align_pointer(void* ptr, size_t alignment) {
57-
intptr_t addr = reinterpret_cast<intptr_t>(ptr);
58-
if ((addr & (alignment - 1)) == 0) {
59-
// Already aligned.
60-
return reinterpret_cast<uint8_t*>(ptr);
61-
}
62-
// Bump forward.
63-
addr = (addr | (alignment - 1)) + 1;
64-
return reinterpret_cast<uint8_t*>(addr);
65-
}
6652
} // namespace
6753

6854
FileDataLoader::~FileDataLoader() {
@@ -129,13 +115,13 @@ namespace {
129115
/**
130116
* FreeableBuffer::FreeFn-compatible callback.
131117
*
132-
* `context` is actually a ptrdiff_t value (not a pointer) that contains the
133-
* offset in bytes between `data` and the actual pointer to free.
118+
* `context` is the original buffer pointer. It is allocated with
119+
* ET_ALIGNED_ALLOC, and must be freed with ET_ALIGNED_FREE.
120+
*
121+
* `data` and `size` are unused.
134122
*/
135123
void FreeSegment(void* context, void* data, ET_UNUSED size_t size) {
136-
ptrdiff_t offset = reinterpret_cast<ptrdiff_t>(context);
137-
ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset);
138-
std::free(static_cast<uint8_t*>(data) - offset);
124+
ET_ALIGNED_FREE(context);
139125
}
140126
} // namespace
141127

@@ -163,57 +149,26 @@ Result<FreeableBuffer> FileDataLoader::load(
163149
}
164150

165151
// Allocate memory for the FreeableBuffer.
166-
size_t alloc_size = size;
167-
if (alignment_ > alignof(std::max_align_t)) {
168-
// malloc() will align to smaller values, but we must manually align to
169-
// larger values.
170-
alloc_size += alignment_;
171-
}
172-
void* buffer = std::malloc(alloc_size);
173-
if (buffer == nullptr) {
152+
void* aligned_buffer = ET_ALIGNED_ALLOC(alignment_, size);
153+
if (aligned_buffer == nullptr) {
174154
ET_LOG(
175155
Error,
176-
"Reading from %s at offset %zu: malloc(%zd) failed",
156+
"Reading from %s at offset %zu: ET_ALIGNED_ALLOC(%zd, %zd) failed",
177157
file_name_,
178158
offset,
159+
alignment_,
179160
size);
180161
return Error::MemoryAllocationFailed;
181162
}
182163

183-
// Align.
184-
void* aligned_buffer = align_pointer(buffer, alignment_);
185-
186-
// Assert that the alignment didn't overflow the buffer.
187-
ET_DCHECK_MSG(
188-
reinterpret_cast<uintptr_t>(aligned_buffer) + size <=
189-
reinterpret_cast<uintptr_t>(buffer) + alloc_size,
190-
"aligned_buffer %p + size %zu > buffer %p + alloc_size %zu",
191-
aligned_buffer,
192-
size,
193-
buffer,
194-
alloc_size);
195-
196164
auto err = load_into(offset, size, segment_info, aligned_buffer);
197165
if (err != Error::Ok) {
198-
// Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
199-
std::free(buffer);
166+
ET_ALIGNED_FREE(aligned_buffer);
200167
return err;
201168
}
202169

203-
// We can't naively free this pointer, since it may not be what malloc() gave
204-
// us. Pass the offset to the real buffer as context. This is the number of
205-
// bytes that need to be subtracted from the FreeableBuffer::data() pointer to
206-
// find the actual pointer to free.
207-
return FreeableBuffer(
208-
aligned_buffer,
209-
size,
210-
FreeSegment,
211-
/*free_fn_context=*/
212-
reinterpret_cast<void*>(
213-
// Using signed types here because it will produce a signed ptrdiff_t
214-
// value, though for us it will always be non-negative.
215-
reinterpret_cast<intptr_t>(aligned_buffer) -
216-
reinterpret_cast<intptr_t>(buffer)));
170+
// Pass the aligned_buffer pointer as context to FreeSegment.
171+
return FreeableBuffer(aligned_buffer, size, FreeSegment, aligned_buffer);
217172
}
218173

219174
Result<size_t> FileDataLoader::size() const {

runtime/platform/compiler.h

+90
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,96 @@
171171
using ssize_t = ptrdiff_t;
172172
#endif
173173

174+
/**
175+
* Platform-specific aligned memory allocation and deallocation.
176+
*
177+
* Usage:
178+
* void* ptr = ET_ALIGNED_ALLOC(alignment, size);
179+
* // use ptr...
180+
* ET_ALIGNED_FREE(ptr);
181+
*
182+
* Note: alignment must be a power of 2 and size must be an integral multiple of
183+
* alignment.
184+
*/
185+
#if defined(_MSC_VER)
186+
#include <malloc.h>
187+
#define ET_ALIGNED_ALLOC(alignment, size) \
188+
_aligned_malloc(((size + alignment - 1) & ~(alignment - 1)), (alignment))
189+
#define ET_ALIGNED_FREE(ptr) _aligned_free(ptr)
190+
#elif defined(__APPLE__)
191+
#include <stdlib.h> // For posix_memalign and free
192+
inline void* et_apple_aligned_alloc(size_t alignment, size_t size) {
193+
void* ptr = nullptr;
194+
// The address of the allocated memory must be a multiple of sizeof(void*).
195+
if (alignment < sizeof(void*)) {
196+
alignment = sizeof(void*);
197+
}
198+
if (posix_memalign(
199+
&ptr, alignment, (size + alignment - 1) & ~(alignment - 1)) != 0) {
200+
return nullptr;
201+
}
202+
return ptr;
203+
}
204+
#define ET_ALIGNED_ALLOC(alignment, size) \
205+
et_apple_aligned_alloc((alignment), (size))
206+
#define ET_ALIGNED_FREE(ptr) free(ptr)
207+
#elif __has_builtin(__builtin_aligned_alloc) || defined(_ISOC11_SOURCE)
208+
// Linux and other posix systems.
209+
#include <cstdlib>
210+
#define ET_ALIGNED_ALLOC(alignment, size) \
211+
::aligned_alloc(alignment, (size + alignment - 1) & ~(alignment - 1))
212+
#define ET_ALIGNED_FREE(ptr) free(ptr)
213+
#else
214+
// If the platform doesn't support aligned_alloc, fallback to malloc.
215+
#include <stdint.h>
216+
#include <cstdlib>
217+
inline void* et_aligned_malloc(size_t alignment, size_t size) {
218+
// place to store original ptr offset.
219+
size_t offset_size = sizeof(uint16_t);
220+
221+
// malloc extra space for offset + alignment.
222+
size_t alloc_size = size + offset_size + alignment - 1;
223+
void* ptr = std::malloc(alloc_size);
224+
225+
if (ptr == nullptr) {
226+
// malloc failed.
227+
return nullptr;
228+
}
229+
230+
uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
231+
uintptr_t aligned_ptr =
232+
(addr + offset_size + alignment - 1) & ~(alignment - 1);
233+
234+
// Check that alignment didn't overflow the buffer.
235+
if (reinterpret_cast<uintptr_t>(aligned_ptr) + size >
236+
reinterpret_cast<uintptr_t>(ptr) + alloc_size) {
237+
std::free(ptr);
238+
return nullptr;
239+
}
240+
241+
// Store the original ptr right before the aligned_ptr, so that it can be
242+
// freed later.
243+
*((uint16_t*)aligned_ptr - 1) =
244+
(uint16_t)((uintptr_t)aligned_ptr - (uintptr_t)ptr);
245+
246+
return (void*)aligned_ptr;
247+
}
248+
249+
inline void et_aligned_free(void* ptr) {
250+
if (ptr == nullptr) {
251+
return;
252+
}
253+
254+
// get original ptr.
255+
void* original_ptr = (void*)((uintptr_t)ptr - *((uint16_t*)ptr - 1));
256+
std::free(original_ptr);
257+
}
258+
259+
#define ET_ALIGNED_ALLOC(alignment, size) et_aligned_malloc((alignment), (size))
260+
#define ET_ALIGNED_FREE(ptr) et_aligned_free(ptr)
261+
262+
#endif
263+
174264
// DEPRECATED: Use the non-underscore-prefixed versions instead.
175265
// TODO(T199005537): Remove these once all users have stopped using them.
176266
#define __ET_DEPRECATED ET_DEPRECATED

0 commit comments

Comments
 (0)