Skip to content

Commit f1203f3

Browse files
hargoniXKha
andauthored
perf: used hashmaps for symbol lookup in the interpreter (#10927)
This PR uses hashmaps for the symbol lookups in the IR interpreter instead of the existing rbmaps. Thus reducing the constant overhead per function call. --------- Co-authored-by: Sebastian Ullrich <[email protected]>
1 parent 881a131 commit f1203f3

File tree

2 files changed

+38
-28
lines changed

2 files changed

+38
-28
lines changed

src/library/ir_interpreter.cpp

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ functions, which have a (relatively) homogeneous ABI that we can use without run
4949
#include "library/init_attribute.h"
5050
#include "util/nat.h"
5151
#include "util/option_declarations.h"
52+
#include "util/name_hash_map.h"
5253

5354
#ifndef LEAN_DEFAULT_INTERPRETER_PREFER_NATIVE
5455
#define LEAN_DEFAULT_INTERPRETER_PREFER_NATIVE true
@@ -204,7 +205,8 @@ static string_ref * g_boxed_mangled_suffix = nullptr;
204205
static name * g_interpreter_prefer_native = nullptr;
205206

206207
// constants (lacking native declarations) initialized by `lean_run_init`
207-
static name_map<object *> * g_init_globals;
208+
// We can assume this variable is never written to and read from in parallel; see `enableInitializersExecution`.
209+
static name_hash_map<object *> * g_init_globals;
208210

209211
// reuse the compiler's name mangling to compute native symbol names
210212
extern "C" object * lean_name_mangle(object * n, object * pre);
@@ -364,7 +366,7 @@ struct native_symbol_cache_entry {
364366

365367
// Caches native symbol lookup successes _and_ failures; we assume no native code is loaded or
366368
// unloaded after the interpreter is first invoked, so this can be a global cache.
367-
name_map<native_symbol_cache_entry> * g_native_symbol_cache;
369+
name_hash_map<native_symbol_cache_entry> * g_native_symbol_cache;
368370
// could be `shared_mutex` with C++17
369371
std::shared_timed_mutex * g_native_symbol_cache_mutex;
370372

@@ -391,15 +393,15 @@ class interpreter {
391393
value m_val;
392394
};
393395
// caches values of nullary functions ("constants")
394-
name_map<constant_cache_entry> m_constant_cache;
396+
name_hash_map<constant_cache_entry> m_constant_cache;
395397
struct symbol_cache_entry {
396398
// looking up IR from .oleans is slow enough to warrant its own cache; but as local IR can
397399
// be backtracked, this cache needs to be local as well.
398400
decl m_decl;
399401
native_symbol_cache_entry m_native;
400402
};
401403
// caches symbol lookup successes _and_ failures
402-
name_map<symbol_cache_entry> m_symbol_cache;
404+
name_hash_map<symbol_cache_entry> m_symbol_cache;
403405

404406
/** \brief Get current stack frame */
405407
inline frame & get_frame() {
@@ -816,20 +818,23 @@ class interpreter {
816818

817819
/** \brief Return cached lookup result for given unmangled function name in the current binary. */
818820
symbol_cache_entry lookup_symbol(name const & fn) {
819-
if (symbol_cache_entry const * e = m_symbol_cache.find(fn)) {
820-
return *e;
821+
auto e = m_symbol_cache.find(fn);
822+
if (e != m_symbol_cache.end()) {
823+
return e->second;
821824
}
822825
std::shared_lock<std::shared_timed_mutex> lock(*g_native_symbol_cache_mutex);
823-
if (native_symbol_cache_entry const * ne = g_native_symbol_cache->find(fn)) {
824-
symbol_cache_entry e_new { get_decl(fn), *ne };
825-
m_symbol_cache.insert(fn, e_new);
826+
auto ne = g_native_symbol_cache->find(fn);
827+
if (ne != g_native_symbol_cache->end()) {
828+
symbol_cache_entry e_new { get_decl(fn), ne->second };
829+
m_symbol_cache.insert({ fn, e_new });
826830
return e_new;
827831
}
828832
lock.unlock();
829833
std::unique_lock<std::shared_timed_mutex> unique_lock(*g_native_symbol_cache_mutex);
830-
if (native_symbol_cache_entry const * ne = g_native_symbol_cache->find(fn)) {
831-
symbol_cache_entry e_new { get_decl(fn), *ne };
832-
m_symbol_cache.insert(fn, e_new);
834+
ne = g_native_symbol_cache->find(fn);
835+
if (ne != g_native_symbol_cache->end()) {
836+
symbol_cache_entry e_new { get_decl(fn), ne->second };
837+
m_symbol_cache.insert({ fn, e_new });
833838
return e_new;
834839
}
835840
symbol_cache_entry e_new { get_decl(fn), {nullptr, false} };
@@ -851,8 +856,8 @@ class interpreter {
851856
}
852857
}
853858
}
854-
g_native_symbol_cache->insert(fn, e_new.m_native);
855-
m_symbol_cache.insert(fn, e_new);
859+
g_native_symbol_cache->insert({ fn, e_new.m_native });
860+
m_symbol_cache.insert({ fn, e_new });
856861
return e_new;
857862
}
858863

@@ -867,15 +872,19 @@ class interpreter {
867872

868873
/** \brief Evaluate nullary function ("constant"). */
869874
value load(name const & fn, type t) {
870-
if (constant_cache_entry const * cached = m_constant_cache.find(fn)) {
871-
if (!cached->m_is_scalar) {
872-
inc(cached->m_val.m_obj);
875+
auto cached_entry = m_constant_cache.find(fn);
876+
if (cached_entry != m_constant_cache.end()) {
877+
auto cached = cached_entry->second;
878+
if (!cached.m_is_scalar) {
879+
inc(cached.m_val.m_obj);
873880
}
874-
return cached->m_val;
881+
return cached.m_val;
875882
}
876-
if (object * const * o = g_init_globals->find(fn)) {
883+
auto o_entry = g_init_globals->find(fn);
884+
if (o_entry != g_init_globals->end()) {
877885
// persistent, so no `inc` needed
878-
return type_is_scalar(t) ? unbox_t(*o, t) : *o;
886+
auto o = o_entry->second;
887+
return type_is_scalar(t) ? unbox_t(o, t) : o;
879888
}
880889

881890
symbol_cache_entry e = lookup_symbol(fn);
@@ -918,7 +927,7 @@ class interpreter {
918927
if (!type_is_scalar(t)) {
919928
inc(r.m_obj);
920929
}
921-
m_constant_cache.insert(fn, constant_cache_entry { type_is_scalar(t), r });
930+
m_constant_cache.insert({ fn, constant_cache_entry { type_is_scalar(t), r } });
922931
return r;
923932
}
924933

@@ -1040,11 +1049,12 @@ class interpreter {
10401049
interpreter(interpreter const &) = delete;
10411050

10421051
~interpreter() {
1043-
for_each(m_constant_cache, [](name const &, constant_cache_entry const & e) {
1052+
for (auto& it: m_constant_cache) {
1053+
auto e = it.second;
10441054
if (!e.m_is_scalar) {
10451055
dec(e.m_val.m_obj);
10461056
}
1047-
});
1057+
}
10481058
}
10491059

10501060
/** A variant of `call` designed for external uses.
@@ -1121,7 +1131,7 @@ class interpreter {
11211131
if (e.m_native.m_addr) {
11221132
*((object **)e.m_native.m_addr) = o;
11231133
} else {
1124-
g_init_globals->insert(decl, o);
1134+
g_init_globals->insert({ decl, o });
11251135
}
11261136
return lean_io_result_mk_ok(box(0));
11271137
} else {
@@ -1207,14 +1217,14 @@ void initialize_ir_interpreter() {
12071217
ir::g_boxed_mangled_suffix = new string_ref("___boxed");
12081218
mark_persistent(ir::g_boxed_mangled_suffix->raw());
12091219
ir::g_interpreter_prefer_native = new name({"interpreter", "prefer_native"});
1210-
ir::g_init_globals = new name_map<object *>();
1220+
ir::g_init_globals = new name_hash_map<object *>();
12111221
register_bool_option(*ir::g_interpreter_prefer_native, LEAN_DEFAULT_INTERPRETER_PREFER_NATIVE, "(interpreter) whether to use precompiled code where available");
12121222
DEBUG_CODE({
12131223
register_trace_class({"interpreter"});
12141224
register_trace_class({"interpreter", "call"});
12151225
register_trace_class({"interpreter", "step"});
12161226
});
1217-
ir::g_native_symbol_cache = new name_map<ir::native_symbol_cache_entry>();
1227+
ir::g_native_symbol_cache = new name_hash_map<ir::native_symbol_cache_entry>();
12181228
ir::g_native_symbol_cache_mutex = new std::shared_timed_mutex();
12191229
}
12201230

src/util/name_hash_map.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ Released under Apache 2.0 license as described in the file LICENSE.
55
Author: Leonardo de Moura
66
*/
77
#pragma once
8-
#include <unordered_map>
8+
#include "util/alloc.h"
99
#include "util/name.h"
1010
namespace lean {
11-
template<typename T> using name_hash_map = std::unordered_map<name, T, name_hash_fn, name_eq_fn>;
11+
template<typename T> using name_hash_map = lean::unordered_map<name, T, name_hash_fn, name_eq_fn>;
1212
}

0 commit comments

Comments
 (0)