Skip to content

Commit c967a41

Browse files
Khaalgebraic-dev
authored andcommitted
perf: avoid double-open per .olean file (#11507)
This PR optimizes the filesystem accesses during importing for a ~3% win on Linux, potentially more on other platforms.
1 parent 5f2b62a commit c967a41

File tree

1 file changed

+69
-25
lines changed

1 file changed

+69
-25
lines changed

src/library/module.cpp

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
3232

3333
#ifdef LEAN_WINDOWS
3434
#include <windows.h>
35+
#include <io.h>
36+
#include <fcntl.h>
3537
#else
3638
#include <sys/mman.h>
3739
#include <unistd.h>
@@ -46,6 +48,31 @@ Authors: Leonardo de Moura, Gabriel Ebner, Sebastian Ullrich
4648

4749
namespace lean {
4850

51+
/** Trivial RAII wrapper for file descriptors so we don't have to worry about `close` management. */
52+
class file_descriptor {
53+
private:
54+
int m_fd;
55+
public:
56+
explicit file_descriptor(int fd) : m_fd(fd) {}
57+
58+
// It should not be copyable
59+
file_descriptor(const file_descriptor&) = delete;
60+
file_descriptor& operator=(const file_descriptor&) = delete;
61+
62+
file_descriptor(file_descriptor&& other) noexcept : m_fd(other.m_fd) {
63+
other.m_fd = -1;
64+
}
65+
66+
~file_descriptor() {
67+
if (m_fd != -1) {
68+
close(m_fd);
69+
}
70+
}
71+
72+
int get() const { return m_fd; }
73+
operator bool() const { return m_fd != -1; }
74+
};
75+
4976
/** On-disk format of a .olean file. */
5077
struct olean_header {
5178
// 5 bytes: magic number
@@ -169,7 +196,10 @@ extern "C" LEAN_EXPORT object * lean_save_module_data_parts(b_obj_arg mod, b_obj
169196

170197
struct module_file {
171198
std::string m_fname;
172-
std::ifstream m_in;
199+
file_descriptor m_fd;
200+
#ifdef LEAN_WINDOWS
201+
HANDLE m_handle; // store the original Windows for mmap
202+
#endif
173203
char * m_base_addr;
174204
size_t m_size;
175205
char * m_buffer;
@@ -184,22 +214,40 @@ extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, o
184214
for (auto const & fname : fnames) {
185215
std::string olean_fn = fname.to_std_string();
186216
try {
187-
std::ifstream in(olean_fn, std::ios_base::binary);
188-
if (in.fail()) {
189-
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "'").str());
217+
#ifdef LEAN_WINDOWS
218+
// Use CreateFile with proper sharing flags, then convert to POSIX fd for shared code
219+
// `FILE_SHARE_DELETE` is necessary to allow the file to (be marked to) be deleted while in use
220+
HANDLE h_file = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
221+
if (h_file == INVALID_HANDLE_VALUE) {
222+
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << GetLastError()).str());
223+
}
224+
int raw_fd = _open_osfhandle((intptr_t)h_file, _O_RDONLY);
225+
if (raw_fd == -1) {
226+
CloseHandle(h_file);
227+
return io_result_mk_error((sstream() << "failed to convert handle to fd for '" << olean_fn << "'").str());
228+
}
229+
file_descriptor fd(raw_fd);
230+
#else
231+
file_descriptor fd(open(olean_fn.c_str(), O_RDONLY));
232+
if (!fd) {
233+
return io_result_mk_error((sstream() << "failed to open file '" << olean_fn << "': " << strerror(errno)).str());
190234
}
235+
#endif
236+
191237
/* Get file size */
192-
in.seekg(0, in.end);
193-
size_t size = in.tellg();
194-
in.seekg(0);
238+
struct stat st;
239+
if (fstat(fd.get(), &st) == -1) {
240+
return io_result_mk_error((sstream() << "failed to stat file '" << olean_fn << "': " << strerror(errno)).str());
241+
}
242+
size_t size = st.st_size;
195243

196244
olean_header default_header = {};
197245
olean_header header;
198-
if (!in.read(reinterpret_cast<char *>(&header), sizeof(header))
246+
if (read(fd.get(), &header, sizeof(header)) != sizeof(header)
199247
|| memcmp(header.marker, default_header.marker, sizeof(header.marker)) != 0) {
200248
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', invalid header").str());
201249
}
202-
in.seekg(0);
250+
lseek(fd.get(), 0, SEEK_SET);
203251
if (header.version != default_header.version || header.flags != default_header.flags
204252
#ifdef LEAN_CHECK_OLEAN_VERSION
205253
|| strncmp(header.githash, LEAN_GITHASH, sizeof(header.githash)) != 0
@@ -208,7 +256,11 @@ extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, o
208256
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "', incompatible header").str());
209257
}
210258
char * base_addr = reinterpret_cast<char *>(header.base_addr);
211-
files.push_back({olean_fn, std::move(in), base_addr, size, nullptr, nullptr});
259+
#ifdef LEAN_WINDOWS
260+
files.push_back({olean_fn, std::move(fd), h_file, base_addr, size, nullptr, nullptr});
261+
#else
262+
files.push_back({olean_fn, std::move(fd), base_addr, size, nullptr, nullptr});
263+
#endif
212264
} catch (exception & ex) {
213265
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
214266
}
@@ -224,18 +276,15 @@ extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, o
224276
char * base_addr = file.m_base_addr;
225277
try {
226278
#ifdef LEAN_WINDOWS
227-
// `FILE_SHARE_DELETE` is necessary to allow the file to (be marked to) be deleted while in use
228-
HANDLE h_olean_fn = CreateFile(olean_fn.c_str(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
229-
if (h_olean_fn == INVALID_HANDLE_VALUE) {
230-
return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << GetLastError()).str());
231-
}
279+
// Use the stored handle that was created with proper sharing flags
280+
HANDLE h_olean_fn = file.m_handle;
232281
HANDLE h_map = CreateFileMapping(h_olean_fn, NULL, PAGE_READONLY, 0, 0, NULL);
233-
if (h_olean_fn == NULL) {
282+
if (h_map == NULL) {
234283
return io_result_mk_error((sstream() << "failed to map '" << olean_fn << "': " << GetLastError()).str());
235284
}
236285
char * buffer = static_cast<char *>(MapViewOfFileEx(h_map, FILE_MAP_READ, 0, 0, 0, base_addr));
237286
lean_always_assert(CloseHandle(h_map));
238-
lean_always_assert(CloseHandle(h_olean_fn));
287+
// NOTE: no need to close `h_olean_fn` as it's owned by `file.m_fd`
239288
if (!buffer) {
240289
is_mmap = false;
241290
break;
@@ -244,16 +293,13 @@ extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, o
244293
lean_always_assert(UnmapViewOfFile(base_addr));
245294
};
246295
#else
247-
int fd = open(olean_fn.c_str(), O_RDONLY);
248-
if (fd == -1) {
249-
return io_result_mk_error((sstream() << "failed to open '" << olean_fn << "': " << strerror(errno)).str());
250-
}
296+
int fd = file.m_fd.get();
297+
// NOTE: `file.m_fd` does NOT need to outlive `buffer` after this call
251298
char * buffer = static_cast<char *>(mmap(base_addr, file.m_size, PROT_READ, MAP_PRIVATE, fd, 0));
252299
if (buffer == MAP_FAILED) {
253300
is_mmap = false;
254301
break;
255302
}
256-
close(fd);
257303
size_t size = file.m_size;
258304
file.m_free_data = [=]() {
259305
lean_always_assert(munmap(buffer, size) == 0);
@@ -287,11 +333,9 @@ extern "C" LEAN_EXPORT object * lean_read_module_data_parts(b_obj_arg ofnames, o
287333
std::string const & olean_fn = file.m_fname;
288334
try {
289335
file.m_buffer = big_buffer + (file.m_base_addr - files[0].m_base_addr);
290-
file.m_in.read(file.m_buffer, file.m_size);
291-
if (!file.m_in) {
336+
if (read(file.m_fd.get(), file.m_buffer, file.m_size) != static_cast<ssize_t>(file.m_size)) {
292337
return io_result_mk_error((sstream() << "failed to read file '" << olean_fn << "'").str());
293338
}
294-
file.m_in.close();
295339
} catch (exception & ex) {
296340
return io_result_mk_error((sstream() << "failed to read '" << olean_fn << "': " << ex.what()).str());
297341
}

0 commit comments

Comments
 (0)