diff --git a/core/app.cc b/core/app.cc index c5d152b7df..d9a4c6a629 100644 --- a/core/app.cc +++ b/core/app.cc @@ -169,7 +169,31 @@ application::application(const std::string& command, } merge_in_environ(new_program, env); - _lib = current_program->get_library(_command); + prepare_argv(current_program); + // + // Some applications (specifically Golang ones) during initialization of it's ELF object + // invoke init functions that access variables from TLS (Thread Local Storage) memory + // area by offset determined during compilation. This type of TLS access is called + // "initial exec" (or static) and is best explained by the Ulrich Drepper's paper - + // https://www.uclibc.org/docs/tls.pdf. + // + // In essence library code accesses thread-local variables located in application static + // TLS memory area that belongs to the current thread. The static TLS memory area is + // setup by thread::setup_tcb() during thread construction phase based on TLS templates + // of the corresponding application ELF object. The TLS templates are captured by + // object::init_static_tls() (called by program::get_library()) but only used when + // creating new threads to run the application code. So the TLS memory area of the current + // thread is not affected. + // + // Hopefully it is clear by now that the TLS memory area of the thread invoking this constructor, + // which typically is a parent to new application thread yet to be started, is not setup + // and therefore should not be accessed by init functions during ELF object initialization. + // In order to overcome this constraint, which by the way is well explained by the + // issue #810 and very hard to fix correctly, we need to delay initialization of the application + // ELF object and pass delay_init set to true (3rd argument) to the get_library method below. + // The ELF object will be initialized by explicitly calling program::init_library() from + // application::main() invoked by new thread later. + _lib = current_program->get_library(_command, {}, true); } catch (const launch_error &e) { throw; } catch (const std::exception &e) { @@ -274,7 +298,12 @@ TRACEPOINT(trace_app_main_ret, "return_code=%d", int); void application::main() { __libc_stack_end = __builtin_frame_address(0); - + // + // Explicitly initialize the application ELF object which would have been + // loaded earlier most likely by parent thread in application constructor. + // Effectively the ELF initialization has been delayed until this moment + // for reasons explained in application::application(). + elf::get_program()->init_library(_args.size(), _argv.get()); sched::thread::current()->set_name(_command); if (_main) { @@ -292,65 +321,81 @@ void application::main() // _entry_point() doesn't return } -void application::run_main(std::string path, int argc, char** argv) +void application::prepare_argv(elf::program *program) { - char *c_path = (char *)(path.c_str()); - // path is guaranteed to keep existing this function + // Prepare program_* variable used by the libc + char *c_path = (char *)(_command.c_str()); program_invocation_name = c_path; program_invocation_short_name = basename(c_path); - unsigned sz = argc; // for the trailing 0's. - for (int i = 0; i < argc; ++i) { - sz += strlen(argv[i]); + // Allocate a continuous buffer for arguments: _argv_buf + // First count the trailing zeroes + auto sz = _args.size(); + // Then add the sum of each argument size to sz + for (auto &str: _args) { + sz += str.size(); } + _argv_buf.reset(new char[sz]); - std::unique_ptr argv_buf(new char[sz]); - char *ab = argv_buf.get(); // In Linux, the pointer arrays argv[] and envp[] are continguous. // Unfortunately, some programs rely on this fact (e.g., libgo's // runtime_goenvs_unix()) so it is useful that we do this too. + + // First count the number of environment variables int envcount = 0; while (environ[envcount]) { envcount++; } - char *contig_argv[argc + 1 + envcount + 1]; - for (int i = 0; i < argc; ++i) { - size_t asize = strlen(argv[i]); - memcpy(ab, argv[i], asize); - ab[asize] = '\0'; + // Allocate the continuous buffer for argv[] and envp[] + _argv.reset(new char*[_args.size() + 1 + envcount + 1 + sizeof(Elf64_auxv_t) * 3]); + + // Fill the argv part of these buffers + char *ab = _argv_buf.get(); + char **contig_argv = _argv.get(); + for (size_t i = 0; i < _args.size(); i++) { + auto &str = _args[i]; + memcpy(ab, str.c_str(), str.size()); + ab[str.size()] = '\0'; contig_argv[i] = ab; - ab += asize + 1; + ab += str.size() + 1; } - contig_argv[argc] = nullptr; + contig_argv[_args.size()] = nullptr; + // Do the same for environ for (int i = 0; i < envcount; i++) { - contig_argv[argc + 1 + i] = environ[i]; + contig_argv[_args.size() + 1 + i] = environ[i]; } - contig_argv[argc + 1 + envcount] = nullptr; + contig_argv[_args.size() + 1 + envcount] = nullptr; - // make sure to have a fresh optind across calls - // FIXME: fails if run() is executed in parallel - int old_optind = optind; - optind = 0; - _return_code = _main(argc, contig_argv); - optind = old_optind; + _libvdso = program->get_library("libvdso.so"); + if (!_libvdso) { + abort("could not load libvdso.so\n"); + } + + // Pass the VDSO library to the application. + Elf64_auxv_t* _auxv = + reinterpret_cast(&contig_argv[_args.size() + 1 + envcount + 1]); + _auxv[0].a_type = AT_SYSINFO_EHDR; + _auxv[0].a_un.a_val = reinterpret_cast(_libvdso->base()); + + _auxv[1].a_type = AT_PAGESZ; + _auxv[1].a_un.a_val = sysconf(_SC_PAGESIZE); + + _auxv[2].a_type = AT_NULL; + _auxv[2].a_un.a_val = 0; } void application::run_main() { trace_app_main(this, _command.c_str()); - // C main wants mutable arguments, so we have can't use strings directly - std::vector> mut_args; - transform(_args, back_inserter(mut_args), - [](std::string s) { return std::vector(s.data(), s.data() + s.size() + 1); }); - std::vector argv; - transform(mut_args.begin(), mut_args.end(), back_inserter(argv), - [](std::vector& s) { return s.data(); }); - auto argc = argv.size(); - argv.push_back(nullptr); - run_main(_command, argc, argv.data()); + // make sure to have a fresh optind across calls + // FIXME: fails if run() is executed in parallel + int old_optind = optind; + optind = 0; + _return_code = _main(_args.size(), _argv.get()); + optind = old_optind; if (_return_code) { debug("program %s returned %d\n", _command.c_str(), _return_code); diff --git a/core/elf.cc b/core/elf.cc index 2a46633d06..1ec841fbed 100644 --- a/core/elf.cc +++ b/core/elf.cc @@ -937,19 +937,21 @@ std::string object::pathname() } // Run the object's static constructors or similar initialization -void object::run_init_funcs() +void object::run_init_funcs(int argc, char** argv) { + // Invoke any init functions if present and pass in argc and argv + // The reason why we pass argv and argc is explained in issue #795 if (dynamic_exists(DT_INIT)) { auto func = dynamic_ptr(DT_INIT); if (func) { - reinterpret_cast(func)(); + reinterpret_cast(func)(argc, argv); } } if (dynamic_exists(DT_INIT_ARRAY)) { - auto funcs = dynamic_ptr(DT_INIT_ARRAY); + auto funcs = dynamic_ptr(DT_INIT_ARRAY); auto nr = dynamic_val(DT_INIT_ARRAYSZ) / sizeof(*funcs); for (auto i = 0u; i < nr; ++i) { - funcs[i](); + funcs[i](argc, argv); } } } @@ -1176,27 +1178,60 @@ program::load_object(std::string name, std::vector extra_path, } std::shared_ptr -program::get_library(std::string name, std::vector extra_path) +program::get_library(std::string name, std::vector extra_path, bool delay_init) { SCOPE_LOCK(_mutex); + // + // Shared library needs to be initialized before any of its symbols (function or variable) + // is accessed. The initialization involves invoking so called init functions and is handled by + // the init_library() method below. The parameter delay_init determines whether init_library is + // called right away or at some arbitrary time later. + // + // Because init_library() needs to access the library object itself and it's dependencies possibly + // later we push the loaded objects list on a _loaded_objects_stack member variable of the program. + // + // Since a library can load another one like java.so does in OSv, we want a stack + // structure so each init_library call gets it's corresponding list of objects to operate on. + // std::vector> loaded_objects; auto ret = load_object(name, extra_path, loaded_objects); + _loaded_objects_stack.push(loaded_objects); + if (ret) { ret->init_static_tls(); } - // After loading the object and all its needed objects, run these objects' - // init functions in reverse order (so those of deepest needed object runs - // first) and finally make the loaded objects visible in search order. - auto size = loaded_objects.size(); - for (int i = size - 1; i >= 0; i--) { - loaded_objects[i]->run_init_funcs(); - } - for (unsigned i = 0; i < size; i++) { - loaded_objects[i]->setprivate(false); + + if (!delay_init) { + init_library(); } + return ret; } +void program::init_library(int argc, char** argv) +{ + // Get the list of pointers to shared objects from stack before iterating on them + if(!_loaded_objects_stack.empty()) { + std::vector> loaded_objects = + _loaded_objects_stack.top(); + // + // After loading the object and all its needed objects, run these objects' + // init functions in reverse order (so those of deepest needed object runs + // first) and finally make the loaded objects visible in search order. + auto size = loaded_objects.size(); + for (unsigned i = 0; i < size; i++) { + loaded_objects[i]->setprivate(true); + } + for (int i = size - 1; i >= 0; i--) { + loaded_objects[i]->run_init_funcs(argc, argv); + } + for (unsigned i = 0; i < size; i++) { + loaded_objects[i]->setprivate(false); + } + _loaded_objects_stack.pop(); + } +} + void program::remove_object(object *ef) { SCOPE_LOCK(_mutex); diff --git a/include/osv/app.hh b/include/osv/app.hh index 1a39e327c5..73a1b38f8b 100644 --- a/include/osv/app.hh +++ b/include/osv/app.hh @@ -20,6 +20,10 @@ #include #include +#include "musl/include/elf.h" +#undef AT_UID // prevent collisions +#undef AT_GID + extern "C" void __libc_start_main(int(*)(int, char**), int, char**, void(*)(), void(*)(), void(*)(), void*); @@ -203,6 +207,7 @@ private: void start_and_join(waiter* setup_waiter); void main(); void run_main(std::string path, int argc, char** argv); + void prepare_argv(elf::program *program); void run_main(); friend void ::__libc_start_main(int(*)(int, char**), int, char**, void(*)(), void(*)(), void(*)(), void*); @@ -219,10 +224,18 @@ private: mutex _termination_mutex; std::shared_ptr _lib; std::shared_ptr _libenviron; + std::shared_ptr _libvdso; main_func_t* _main; void (*_entry_point)(); static app_registry apps; + // _argv is set by prepare_argv() called from the constructor and needs to be + // retained as member variable so that it later can be passed as argument by either + // application::main and application::run_main() or application::run_main() called + // from __libc_start_main() + std::unique_ptr _argv; + std::unique_ptr _argv_buf; // actual arguments content _argv points to + // Must be destroyed before _lib, because contained function objects may // have destructors which are part of the application's code. std::list> _termination_request_callbacks; diff --git a/include/osv/elf.hh b/include/osv/elf.hh index fabc6e6881..e0966c4029 100644 --- a/include/osv/elf.hh +++ b/include/osv/elf.hh @@ -11,6 +11,7 @@ #include "fs/fs.hh" #include #include +#include #include #include #include @@ -334,7 +335,7 @@ public: const std::vector *phdrs(); std::string soname(); std::string pathname(); - void run_init_funcs(); + void run_init_funcs(int argc, char** argv); void run_fini_funcs(); template T* lookup(const char* name); @@ -523,9 +524,22 @@ public: * \param[in] extra_path Additional directories to search in addition to * the default search path which is set with * set_search_path(). + * \param[in] delay_init If true the init functions in the library and its + * dependencies will not be executed until some later + * time when the init_library() is called. By default + * the init functions are executed right away. */ std::shared_ptr - get_library(std::string lib, std::vector extra_path = {}); + get_library(std::string lib, std::vector extra_path = {}, bool delay_init = false); + + /** + * Execute init functions of the library itself and its dependencies. + * + * Any arguments passed in are relayed to the init functions. Right now + * the only place that explicitly invokes init_library is application::main() + * method which also passes any argv passed to the application. + */ + void init_library(int argc = 0, char **argv = nullptr); /** * Set the default search path for get_library(). @@ -596,6 +610,9 @@ private: friend elf::file::~file(); friend class object; + // this allows the objects resolved by get_library() get initialized + // by init_library() at arbitrary time later - the delayed initialization scenario + std::stack>> _loaded_objects_stack; }; void create_main_program();