Skip to content

Commit 5defce3

Browse files
victoryang00sakria9KikiSpaceBrian Zhao
committed
Enhance wasm with checkpoint and restore support (#2333)
- Add wasm_runtime_checkpoint/wasm_runtime_restore API - Support AOT and Classic Interpreter mode checkpoint and debug through OS signal, tested on windows/mac/linux aarch64/x64 - Static instrument the AOT to have the checkpoint and restore switches - Add sub extra library folder for implementing the ckpt-restore - Include extra dependency of yalantinglib Co-authored-by: Aibo Hu <[email protected]> Co-authored-by: kikispace <[email protected]> Co-authored-by: Brian Zhao <[email protected]> Signed-off-by: victoryang00 <[email protected]>
1 parent 1b9fbb1 commit 5defce3

File tree

86 files changed

+20785
-346
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+20785
-346
lines changed

build-scripts/config_common.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1)
304304
add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1)
305305
message (" AOT stack frame enabled")
306306
endif ()
307+
if (WAMR_BUILD_CHECKPOINT_RESTORE EQUAL 1)
308+
add_definitions (-DWASM_ENABLE_CHECKPOINT_RESTORE=1)
309+
message (" Checkpoint Restore enabled")
310+
endif ()
307311
if (WAMR_BUILD_MEMORY_PROFILING EQUAL 1)
308312
add_definitions (-DWASM_ENABLE_MEMORY_PROFILING=1)
309313
message (" Memory profiling enabled")

build-scripts/runtime_lib.cmake

+5
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ if (WAMR_BUILD_LIBC_BUILTIN EQUAL 1)
8686
include (${IWASM_DIR}/libraries/libc-builtin/libc_builtin.cmake)
8787
endif ()
8888

89+
if (WAMR_BUILD_CHECKPOINT_RESTORE EQUAL 1)
90+
include (${IWASM_DIR}/libraries/ckpt-restore/ckpt_restore.cmake)
91+
endif ()
92+
8993
if (WAMR_BUILD_LIBC_UVWASI EQUAL 1)
9094
include (${IWASM_DIR}/libraries/libc-uvwasi/libc_uvwasi.cmake)
9195
set (WAMR_BUILD_MODULE_INST_CONTEXT 1)
@@ -193,6 +197,7 @@ set (source_all
193197
${LIBC_EMCC_SOURCE}
194198
${LIB_RATS_SOURCE}
195199
${DEBUG_ENGINE_SOURCE}
200+
${CKPT_RESTORE_SOURCE}
196201
)
197202

198203
set (WAMR_RUNTIME_LIB_SOURCE ${source_all})

core/config.h

+5
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,11 @@
335335
#define WASM_ENABLE_AOT_STACK_FRAME 0
336336
#endif
337337

338+
/* Checkpoint Restore */
339+
#ifndef WASM_ENABLE_CHECKPOINT_RESTORE
340+
#define WASM_ENABLE_CHECKPOINT_RESTORE 0
341+
#endif
342+
338343
/* Heap verification */
339344
#ifndef BH_ENABLE_GC_VERIFY
340345
#define BH_ENABLE_GC_VERIFY 0

core/iwasm/aot/aot_reloc.h

+7-2
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,13 @@ typedef struct {
5757
#define REG_AOT_TRACE_SYM() \
5858
REG_SYM(aot_alloc_frame), \
5959
REG_SYM(aot_free_frame), \
60+
REG_SYM(aot_raise), \
6061
REG_SYM(aot_frame_update_profile_info),
6162
#else
6263
#define REG_AOT_TRACE_SYM()
6364
#endif
6465

66+
#if WASM_ENABLE_AOT_INTRINSICS != 0
6567
#define REG_INTRINSIC_SYM() \
6668
REG_SYM(aot_intrinsic_fabs_f32), \
6769
REG_SYM(aot_intrinsic_fabs_f64), \
@@ -124,7 +126,10 @@ typedef struct {
124126
REG_SYM(aot_intrinsic_i32_div_s), \
125127
REG_SYM(aot_intrinsic_i32_div_u), \
126128
REG_SYM(aot_intrinsic_i32_rem_s), \
127-
REG_SYM(aot_intrinsic_i32_rem_u), \
129+
REG_SYM(aot_intrinsic_i32_rem_u),
130+
#else
131+
#define REG_INTRINSIC_SYM()
132+
#endif
128133

129134
#if WASM_ENABLE_STATIC_PGO != 0
130135
#define REG_LLVM_PGO_SYM() \
@@ -241,4 +246,4 @@ apply_relocation(AOTModule *module,
241246
}
242247
#endif
243248

244-
#endif /* end of _AOT_RELOC_H_ */
249+
#endif /* end of _AOT_RELOC_H_ */

core/iwasm/aot/aot_runtime.c

+76-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
#include "../libraries/thread-mgr/thread_manager.h"
1717
#endif
1818

19+
#include "wasm_interp.h"
20+
#if WASM_ENABLE_CHECKPOINT_RESTORE != 0
21+
#include "../libraries/ckpt-restore/ckpt_restore.h"
22+
#endif
23+
1924
/*
2025
* Note: These offsets need to match the values hardcoded in
2126
* AoT compilation code: aot_create_func_context, check_suspend_flags.
@@ -72,6 +77,11 @@ bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
7277
bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
7378
bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
7479

80+
bh_static_assert(offsetof(AOTFrame, ip_offset) == sizeof(uintptr_t) * 4);
81+
bh_static_assert(offsetof(AOTFrame, sp) == sizeof(uintptr_t) * 5);
82+
bh_static_assert(offsetof(AOTFrame, frame_ref) == sizeof(uintptr_t) * 6);
83+
bh_static_assert(offsetof(AOTFrame, lp) == sizeof(uintptr_t) * 7);
84+
7585
static void
7686
set_error_buf(char *error_buf, uint32 error_buf_size, const char *string)
7787
{
@@ -804,6 +814,7 @@ memory_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
804814
bh_assert(memory_idx == 0);
805815
bh_assert(parent->memory_count > memory_idx);
806816
shared_memory_instance = parent->memories[memory_idx];
817+
shared_memory_instance->ref_count++;
807818
shared_memory_inc_reference(shared_memory_instance);
808819
return shared_memory_instance;
809820
}
@@ -992,6 +1003,9 @@ memories_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
9921003
AOTMemoryInstance *memories, *memory_inst;
9931004
AOTMemInitData *data_seg;
9941005
uint64 total_size;
1006+
#if WASM_ENABLE_SHARED_MEMORY != 0
1007+
bool is_shared_memory;
1008+
#endif
9951009

9961010
module_inst->memory_count = memory_count;
9971011
total_size = sizeof(AOTMemoryInstance *) * (uint64)memory_count;
@@ -1019,6 +1033,15 @@ memories_instantiate(AOTModuleInstance *module_inst, AOTModuleInstance *parent,
10191033
return true;
10201034
}
10211035

1036+
#if WASM_ENABLE_SHARED_MEMORY != 0
1037+
/* Currently we have only one memory instance */
1038+
is_shared_memory = module->memories[0].memory_flags & 0x02 ? true : false;
1039+
if (is_shared_memory && parent != NULL) {
1040+
/* Ignore setting memory init data if the memory has been initialized */
1041+
return true;
1042+
}
1043+
#endif
1044+
10221045
for (i = 0; i < module->mem_init_data_count; i++) {
10231046
data_seg = module->mem_init_data_list[i];
10241047
#if WASM_ENABLE_BULK_MEMORY != 0
@@ -1860,6 +1883,7 @@ destroy_c_api_frames(Vector *frames)
18601883
void
18611884
aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
18621885
{
1886+
#if WASM_ENABLE_CHECKPOINT_RESTORE == 0
18631887
WASMModuleInstanceExtraCommon *common =
18641888
&((AOTModuleInstanceExtra *)module_inst->e)->common;
18651889
if (module_inst->exec_env_singleton) {
@@ -1934,6 +1958,7 @@ aot_deinstantiate(AOTModuleInstance *module_inst, bool is_sub_inst)
19341958
#endif
19351959

19361960
wasm_runtime_free(module_inst);
1961+
#endif
19371962
}
19381963

19391964
AOTFunctionInstance *
@@ -2225,6 +2250,7 @@ aot_call_function(WASMExecEnv *exec_env, AOTFunctionInstance *function,
22252250
while (exec_env->cur_frame != prev_frame)
22262251
aot_free_frame(exec_env);
22272252
#endif
2253+
// checkpoint
22282254
if (!ret) {
22292255
if (argv1 != argv1_buf)
22302256
wasm_runtime_free(argv1);
@@ -2789,6 +2815,17 @@ aot_call_indirect(WASMExecEnv *exec_env, uint32 tbl_idx, uint32 table_elem_idx,
27892815
}
27902816

27912817
tbl_elem_val = ((table_elem_type_t *)tbl_inst->elems)[table_elem_idx];
2818+
#if WASM_ENABLE_CHECKPOINT_RESTORE != 0
2819+
if (exec_env->is_restore && exec_env->restore_call_chain) {
2820+
struct AOTFrame *rcc = *(exec_env->restore_call_chain);
2821+
while (rcc->prev_frame) {
2822+
rcc = rcc->prev_frame;
2823+
}
2824+
LOG_DEBUG("func_idx: %d instead of %d of thread %ld\n", rcc->func_index,
2825+
func_idx, exec_env->handle);
2826+
func_idx = rcc->func_index;
2827+
}
2828+
#endif
27922829
if (tbl_elem_val == NULL_REF) {
27932830
aot_set_exception_with_id(module_inst, EXCE_UNINITIALIZED_ELEMENT);
27942831
goto fail;
@@ -3449,6 +3486,12 @@ get_func_name_from_index(const AOTModuleInstance *module_inst,
34493486
#endif /* end of WASM_ENABLE_DUMP_CALL_STACK != 0 || \
34503487
WASM_ENABLE_PERF_PROFILING != 0 */
34513488

3489+
void
3490+
aot_raise(WASMExecEnv *exec_env, int sig)
3491+
{
3492+
raise(sig);
3493+
}
3494+
34523495
#if WASM_ENABLE_GC == 0
34533496
bool
34543497
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
@@ -3528,6 +3571,9 @@ aot_free_frame(WASMExecEnv *exec_env)
35283571
bool
35293572
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
35303573
{
3574+
#if WASM_ENABLE_CHECKPOINT_RESTORE != 0
3575+
LOG_DEBUG("aot_alloc_frame %u thread %d\n", func_index, exec_env->handle);
3576+
#endif
35313577
AOTModuleInstance *module_inst = (AOTModuleInstance *)exec_env->module_inst;
35323578
AOTModule *module = (AOTModule *)module_inst->module;
35333579
#if WASM_ENABLE_PERF_PROFILING != 0
@@ -3537,6 +3583,27 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
35373583
AOTFrame *frame;
35383584
uint32 max_local_cell_num, max_stack_cell_num, all_cell_num;
35393585
uint32 aot_func_idx, frame_size;
3586+
#if WASM_ENABLE_CHECKPOINT_RESTORE != 0
3587+
if (exec_env->restore_call_chain) {
3588+
frame = exec_env->restore_call_chain[exec_env->call_chain_size - 1];
3589+
LOG_DEBUG("frame restored, func idx %zu\n", frame->func_index);
3590+
exec_env->call_chain_size--;
3591+
frame->prev_frame = (AOTFrame *)exec_env->cur_frame;
3592+
exec_env->cur_frame = (struct WASMInterpFrame *)frame;
3593+
if (exec_env->call_chain_size == 0) {
3594+
// TODO: fix memory leak
3595+
exec_env->restore_call_chain = NULL;
3596+
}
3597+
LOG_DEBUG("restore call chain %zu==%u, %p, %p, %d\n",
3598+
((AOTFrame *)exec_env->cur_frame)->func_index, func_index,
3599+
exec_env, exec_env->restore_call_chain, exec_env->handle);
3600+
if (((AOTFrame *)exec_env->cur_frame)->func_index != func_index) {
3601+
LOG_DEBUG("NOT MATCH!!!\n");
3602+
exit(1);
3603+
}
3604+
return true;
3605+
}
3606+
#endif
35403607

35413608
if (func_index >= module->import_func_count) {
35423609
aot_func_idx = func_index - module->import_func_count;
@@ -3568,6 +3635,11 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
35683635
frame->time_started = (uintptr_t)os_time_thread_cputime_us();
35693636
frame->func_perf_prof_info = func_perf_prof;
35703637
#endif
3638+
frame->ip_offset = 0;
3639+
frame->sp = frame->lp + max_local_cell_num;
3640+
#if WASM_ENABLE_GC != 0
3641+
frame->frame_ref = frame->sp + max_stack_cell_num;
3642+
#endif
35713643

35723644
#if WASM_ENABLE_GC != 0
35733645
frame->sp = frame->lp + max_local_cell_num;
@@ -3584,6 +3656,10 @@ aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index)
35843656
static inline void
35853657
aot_free_frame_internal(WASMExecEnv *exec_env)
35863658
{
3659+
#if WASM_ENABLE_CHECKPOINT_RESTORE != 0
3660+
int func_index = ((AOTFrame *)exec_env->cur_frame)->func_index;
3661+
LOG_DEBUG("aot_free_frame %zu %d\n", func_index, exec_env->handle);
3662+
#endif
35873663
AOTFrame *cur_frame = (AOTFrame *)exec_env->cur_frame;
35883664
AOTFrame *prev_frame = cur_frame->prev_frame;
35893665

@@ -3598,7 +3674,6 @@ aot_free_frame_internal(WASMExecEnv *exec_env)
35983674
if (prev_frame)
35993675
prev_frame->func_perf_prof_info->children_exec_time += time_elapsed;
36003676
#endif
3601-
36023677
wasm_exec_env_free_wasm_frame(exec_env, cur_frame);
36033678
exec_env->cur_frame = (struct WASMInterpFrame *)prev_frame;
36043679
}

core/iwasm/aot/aot_runtime.h

+3
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,9 @@ aot_table_grow(AOTModuleInstance *module_inst, uint32 tbl_idx,
673673
bool
674674
aot_alloc_frame(WASMExecEnv *exec_env, uint32 func_index);
675675

676+
void
677+
aot_raise(WASMExecEnv *exec_env, int exception);
678+
676679
void
677680
aot_free_frame(WASMExecEnv *exec_env);
678681

core/iwasm/common/wasm_exec_env.c

+6
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
7272
exec_env->wasm_stack.top_boundary =
7373
exec_env->wasm_stack.bottom + stack_size;
7474
exec_env->wasm_stack.top = exec_env->wasm_stack.bottom;
75+
exec_env->is_checkpoint = false;
7576

7677
#if WASM_ENABLE_AOT != 0
7778
if (module_inst->module_type == Wasm_Module_AoT) {
@@ -85,6 +86,10 @@ wasm_exec_env_create_internal(struct WASMModuleInstanceCommon *module_inst,
8586
wasm_runtime_dump_exec_env_mem_consumption(exec_env);
8687
#endif
8788

89+
exec_env->is_checkpoint = false;
90+
exec_env->is_restore = false;
91+
exec_env->call_chain_size = 0;
92+
exec_env->restore_call_chain = NULL;
8893
return exec_env;
8994

9095
#ifdef OS_ENABLE_HW_BOUND_CHECK
@@ -174,6 +179,7 @@ wasm_exec_env_create(struct WASMModuleInstanceCommon *module_inst,
174179
}
175180
#endif
176181

182+
177183
#if WASM_ENABLE_THREAD_MGR != 0
178184
/* Create a new cluster for this exec_env */
179185
if (!(cluster = wasm_cluster_create(exec_env))) {

core/iwasm/common/wasm_exec_env.h

+6
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,12 @@ typedef struct WASMExecEnv {
158158

159159
/* The WASM stack size */
160160
uint32 wasm_stack_size;
161+
/* Whether is checkpoint */
162+
bool is_checkpoint;
163+
/* Whether is restore */
164+
bool is_restore;
165+
size_t call_chain_size;
166+
struct AOTFrame **restore_call_chain;
161167

162168
/* The WASM stack of current thread */
163169
union {

core/iwasm/common/wasm_memory.c

+29-24
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ wasm_runtime_get_mem_alloc_info(mem_alloc_info_t *mem_alloc_info)
280280
return false;
281281
}
282282

283+
283284
bool
284285
wasm_runtime_validate_app_addr(WASMModuleInstanceCommon *module_inst_comm,
285286
uint32 app_offset, uint32 size)
@@ -553,6 +554,8 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
553554
return false;
554555
}
555556

557+
SHARED_MEMORY_LOCK(memory_inst);
558+
556559
native_addr = memory_inst->memory_data + app_buf_addr;
557560

558561
bounds_checks = is_bounds_checks_enabled((wasm_module_inst_t)module_inst);
@@ -566,32 +569,34 @@ wasm_check_app_addr_and_convert(WASMModuleInstance *module_inst, bool is_str,
566569

567570
/* No need to check the app_offset and buf_size if memory access
568571
boundary check with hardware trap is enabled */
569-
#ifndef OS_ENABLE_HW_BOUND_CHECK
570-
SHARED_MEMORY_LOCK(memory_inst);
571-
572-
if (app_buf_addr >= memory_inst->memory_data_size) {
573-
goto fail;
574-
}
575-
576-
if (!is_str) {
577-
if (app_buf_size > memory_inst->memory_data_size - app_buf_addr) {
578-
goto fail;
579-
}
580-
}
581-
else {
582-
const char *str, *str_end;
583-
584-
/* The whole string must be in the linear memory */
585-
str = (const char *)native_addr;
586-
str_end = (const char *)memory_inst->memory_data_end;
587-
while (str < str_end && *str != '\0')
588-
str++;
589-
if (str == str_end)
590-
goto fail;
591-
}
572+
// #ifndef OS_ENABLE_HW_BOUND_CHECK
573+
// SHARED_MEMORY_LOCK(memory_inst);
574+
575+
// if (app_buf_addr >= memory_inst->memory_data_size) {
576+
// goto fail;
577+
// }
578+
579+
// if (!is_str) {
580+
// if (app_buf_size > memory_inst->memory_data_size - app_buf_addr) {
581+
// goto fail;
582+
// }
583+
// }
584+
// else {
585+
// const char *str, *str_end;
586+
587+
// /* The whole string must be in the linear memory */
588+
// str = (const char *)native_addr;
589+
// str_end = (const char *)memory_inst->memory_data_end;
590+
// while (str < str_end && *str != '\0')
591+
// str++;
592+
// if (str == str_end)
593+
// goto fail;
594+
// }
595+
596+
// SHARED_MEMORY_UNLOCK(memory_inst);
597+
// #endif
592598

593599
SHARED_MEMORY_UNLOCK(memory_inst);
594-
#endif
595600

596601
success:
597602
*p_native_addr = (void *)native_addr;

0 commit comments

Comments
 (0)