Skip to content

i#5383: macOS a64 client threads and private TLS #7300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ set(ARCH_SRCS
arch/${ARCH_NAME_SHARED}/mangle.c
arch/clean_call_opt_shared.c
arch/${ARCH_NAME}/clean_call_opt.c
arch/x86_code.c
arch/asm_aux.c
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, I had thoughts about renaming this file in the past.

${arch_core_asm_src}
${archshared_core_asm_src}
arch/${ARCH_NAME}/optimize.c
Expand Down Expand Up @@ -421,7 +421,11 @@ if (UNIX)
set(OS_SRCS ${OS_SRCS} unix/memquery.c)
set(OS_SRCS ${OS_SRCS} unix/module_macho.c)
set(OS_SRCS ${OS_SRCS} unix/ksynch_macos.c)
set(OS_SRCS ${OS_SRCS} unix/tls_macos.c)
if (AARCH64)
set(OS_SRCS ${OS_SRCS} unix/tls_macos_aarch64.c)
elseif (X86)
set(OS_SRCS ${OS_SRCS} unix/tls_macos_x86.c)
endif()
set(OS_SRCS ${OS_SRCS} unix/signal_macos.c)
set(OS_SRCS ${OS_SRCS} unix/native_macho.c)
# XXX i#1286: implement nudge_macos.c
Expand Down
21 changes: 17 additions & 4 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ cat_thread_only:
CALLC0(GLOBAL_REF(dynamo_thread_exit))
cat_no_thread:
/* switch to d_r_initstack for cleanup of dstack */
AARCH64_ADRP_GOT(GLOBAL_REF(initstack_mutex), x26)
AARCH64_ADRP_GOT(GLOBAL_REF(initstack_mutex), x0)
cat_spin:
CALLC2(GLOBAL_REF(atomic_swap), x26, #1)
CALLC2(GLOBAL_REF(atomic_swap), x0, #1)
cbz w0, cat_have_lock
yield
b cat_spin
Expand Down Expand Up @@ -560,8 +560,21 @@ GLOBAL_LABEL(dynamorio_sys_exit:)

DECLARE_FUNC(new_bsdthread_intercept)
GLOBAL_LABEL(new_bsdthread_intercept:)
/* TODO i#5383: Get correct syscall number for svc. */
brk 0xb003 /* For now we break with a unique code. */
/* We assume we can clobber callee-saved */
mov x9, ARG1 /* This is the clone_rec argument set in pre_system_call */

/* Push a priv_mcontext on the stack */
sub sp, sp, #priv_mcontext_t_SIZE
stp x0, x1, [sp, #(0 * ARG_SZ*2)]
add x0, sp, #(priv_mcontext_t_SIZE + 16) /* compute original SP */
stp x30, x0, [sp, #(15 * ARG_SZ*2)]
str x30, [sp, #(16 * ARG_SZ*2)] /* save LR as PC */

CALLC1(save_priv_mcontext_helper, sp)

CALLC1(GLOBAL_REF(new_bsdthread_setup), sp)
/* Should not return */
bl GLOBAL_REF(unexpected_return)
END_FUNC(new_bsdthread_intercept)
#endif

Expand Down
2 changes: 1 addition & 1 deletion core/arch/arch_exports.h
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ dr_fxrstor32(byte *buf_aligned);
# define DYNAMO_START_XSP_ADJUST 0
#endif

/* x86_code.c */
/* asm_aux.c */
void
dynamo_start(priv_mcontext_t *mc);

Expand Down
23 changes: 15 additions & 8 deletions core/arch/x86_code.c → core/arch/asm_aux.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
/* Copyright (c) 2001 Hewlett-Packard Company */

/*
* x86_code.c - auxiliary C routines to assembly routines in x86.asm
* asm_aux.c - auxiliary C routines to assembly routines in x86.asm
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/x86/<arch>.asm/

*/
#include "../globals.h"
#include "../fragment.h"
Expand Down Expand Up @@ -325,11 +325,11 @@ new_thread_setup(priv_mcontext_t *mc)
ASSERT_NOT_REACHED();
}

# if defined(MACOS) && defined(X86)
/* Called from new_bsdthread_intercept for targeting a bsd thread user function.
# if defined(MACOS) && (defined(X86) || defined(AARCH64))
/* Called from new_bsdthread_intercept (asm) for targeting a bsd thread user function.
* new_bsdthread_intercept stored the arg to the user thread func in
* mc->xax. We're on the app stack -- but this is a temporary solution.
* i#1403 covers intercepting in an earlier and better manner.
* mc->xax (X86) or mc->r9 (ARM64). We're on the app stack -- but this
* is a temporary solution. i#1403 covers intercepting in an earlier and better manner.
*/
void
new_bsdthread_setup(priv_mcontext_t *mc)
Expand All @@ -341,7 +341,11 @@ new_bsdthread_setup(priv_mcontext_t *mc)
*/
ENTERING_DR();

# if defined(X86)
crec = (void *)mc->xax; /* placed there by new_bsdthread_intercept */
# elif defined(AARCH64)
crec = (void *)mc->r9; /* placed there by new_bsdthread_intercept */
# endif
func_arg = (void *)get_clone_record_thread_arg(crec);
LOG(GLOBAL, LOG_INTERP, 1,
"new_thread_setup: thread " TIDFMT ", dstack " PFX " clone record " PFX "\n",
Expand All @@ -352,13 +356,16 @@ new_bsdthread_setup(priv_mcontext_t *mc)
dcontext = get_thread_private_dcontext();
ASSERT(dcontext != NULL);
crec = NULL; /* now freed */
thread_starting(dcontext);

dynamo_thread_under_dynamo(dcontext);

/* We assume that the only state that matters is the arg to the function. */
# ifdef X64
# if defined(X86) && defined(X64)
mc->rdi = (reg_t)func_arg;
# else
# elif defined(X86)
*(reg_t *)(mc->xsp + sizeof(reg_t)) = (reg_t)func_arg;
# elif defined(AARCH64)
mc->r0 = (reg_t)func_arg;
# endif

call_switch_stack(dcontext, dcontext->dstack, (void (*)(void *))d_r_dispatch,
Expand Down
2 changes: 1 addition & 1 deletion core/arch/x86/x86.asm
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ DECL_EXTERN(load_dynamo_failure)
* required for non early follow children, we have to explicitly tell the
* linker to do so. This is done in the Makefile.
* Note that if it weren't for wanting local go-native code we would have
* auto_setup in x86_code.c be dynamo_auto_start.
* auto_setup in asm_aux.c be dynamo_auto_start.
*/
DECLARE_FUNC(dynamo_auto_start)
GLOBAL_LABEL(dynamo_auto_start:)
Expand Down
67 changes: 41 additions & 26 deletions core/drlibc/drlibc_aarch64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -75,31 +75,32 @@ GLOBAL_LABEL(dynamorio_syscall:)
* x1 = number of arguments
* sp+8*n = argument n
*/
mov x16, x1
ldr x1, [sp]
sub x16, x16, 1
cbz x16, do_svc
ldr x2, [sp, #8]
sub x16, x16, 1
cbz x16, do_svc
ldr x3, [sp, #16]
sub x16, x16, 1
cbz x16, do_svc
ldr x4, [sp, #24]
sub x16, x16, 1
cbz x16, do_svc
ldr x5, [sp, #32]
sub x16, x16, 1
cbz x16, do_svc
ldr x6, [sp, #40]
sub x16, x16, 1
cbz x16, do_svc
ldr x7, [sp, #48]
sub x16, x16, 1
cbz x16, do_svc
ldr x8, [sp, #56]
mov x16, x0 /* syscall number goes in x16 */
mov x17, x1
cbz x17, do_svc
ldr x0, [sp]
sub x17, x17, 1
cbz x17, do_svc
ldr x1, [sp, #8]
sub x17, x17, 1
cbz x17, do_svc
ldr x2, [sp, #16]
sub x17, x17, 1
cbz x17, do_svc
ldr x3, [sp, #24]
sub x17, x17, 1
cbz x17, do_svc
ldr x4, [sp, #32]
sub x17, x17, 1
cbz x17, do_svc
ldr x5, [sp, #40]
sub x17, x17, 1
cbz x17, do_svc
ldr x6, [sp, #48]
sub x17, x17, 1
cbz x17, do_svc
ldr x7, [sp, #56]
do_svc:
mov x16, #0
svc #0x80
b.cs err_cf
ret
Expand All @@ -121,12 +122,26 @@ GLOBAL_LABEL(dr_fpu_exception_init:)
#ifdef MACOS
DECLARE_FUNC(dynamorio_mach_dep_syscall)
GLOBAL_LABEL(dynamorio_mach_dep_syscall:)
/* TODO i#5383: Use proper gateway. */
brk 0xc001 /* For now we break with a unique code. */
/* mach_dep syscalls use x16=0x80000000 and x3=num, we'll assume up to 3 args */
mov x3, x0
mov x16, #0x80000000
mov x17, x1
cbz x17, mach_dep_svc
ldr x0, [sp]
sub x17, x17, 1
cbz x17, mach_dep_svc
ldr x1, [sp, #8]
sub x17, x17, 1
cbz x17, mach_dep_svc
ldr x2, [sp, #16]
mach_dep_svc:
svc #0x80
ret
END_FUNC(dynamorio_mach_dep_syscall)

DECLARE_FUNC(dynamorio_mach_syscall)
GLOBAL_LABEL(dynamorio_mach_syscall:)
sub x0, xzr, x0 /* On ARM64 the mach syscalls use negated numbers */
b _dynamorio_syscall
END_FUNC(dynamorio_mach_syscall)
#endif
Expand Down
15 changes: 15 additions & 0 deletions core/dynamo.c
Original file line number Diff line number Diff line change
Expand Up @@ -2253,6 +2253,11 @@ dynamo_thread_init(byte *dstack_in, priv_mcontext_t *mc, void *os_data,
return SUCCESS;
}

/* macOS aarch64 will sometimes crash when acquiring locks if TLS is NULL */
#if defined(MACOS) && defined(AARCH64)
void *tmp_tls = os_tls_thread_init_temp();
#endif

/* note that ENTERING_DR is assumed to have already happened: in apc handler
* for win32, in new_thread_setup for linux, in main init for 1st thread
*/
Expand Down Expand Up @@ -2307,6 +2312,11 @@ dynamo_thread_init(byte *dstack_in, priv_mcontext_t *mc, void *os_data,
}

os_tls_init();
#if defined(MACOS) && defined(AARCH64)
if (tmp_tls) {
os_tls_thread_free_temp(tmp_tls);
}
#endif
dcontext = create_new_dynamo_context(true /*initial*/, dstack_in, mc);
initialize_dynamo_context(dcontext);
set_thread_private_dcontext(dcontext);
Expand Down Expand Up @@ -2583,7 +2593,12 @@ dynamo_thread_exit_common(dcontext_t *dcontext, thread_id_t id,
* we called event callbacks.
*/
if (!other_thread) {
#if !(defined(MACOS) && defined(AARCH64))
/* i5383: on macOS a64 app TLS has already been free'd and we must remain
* on priv TLS until os_tls_exit below, when we can zero the thread reg.
*/
dynamo_thread_not_under_dynamo(dcontext);
#endif
#ifdef WINDOWS
/* We don't do this inside os_thread_not_under_dynamo b/c we do it in
* context switches. os_loader_exit() will call this, but it has no
Expand Down
5 changes: 5 additions & 0 deletions core/unix/include/syscall_mach.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,9 @@
#define MACH_mk_timer_cancel_trap 94
#define MACH_iokit_user_client_trap 100

#ifdef AARCH64
# define MACHDEP_thread_set_tsd 2
# define MACHDEP_thread_get_tsd 3
#endif

#endif /* _SYSCALL_MACH_H_ */
7 changes: 4 additions & 3 deletions core/unix/loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ privload_create_os_privmod_data(privmod_t *privmod, bool dyn_reloc);
static void
privload_delete_os_privmod_data(privmod_t *privmod);

#ifdef LINUX
void
privload_mod_tls_init(privmod_t *mod);

#ifdef LINUX
void
privload_mod_tls_primary_thread_init(privmod_t *mod);
#endif
Expand Down Expand Up @@ -1406,8 +1406,8 @@ privload_relocate_os_privmod_data(os_privmod_data_t *opd, byte *mod_base)
static void
privload_relocate_mod(privmod_t *mod)
{
#ifdef LINUX
os_privmod_data_t *opd = (os_privmod_data_t *)mod->os_privmod_data;
#ifdef LINUX

ASSERT_OWN_RECURSIVE_LOCK(true, &privload_lock);

Expand All @@ -1428,7 +1428,8 @@ privload_relocate_mod(privmod_t *mod)
if (opd->tls_block_size != 0)
privload_mod_tls_primary_thread_init(mod);
#else
/* XXX i#1285: implement MacOS private loader */
if (opd->tls_block_size != 0)
privload_mod_tls_init(mod);
#endif
}

Expand Down
Loading
Loading