Initial support runtime dlopen in multi-threaded applications (#15317)

This change maintains a shared global list of DSOs loaded and adds a
`emscripten_thread_sync_code` helper function which can be used to bring
the current thread up to date by loading all the modules in the this
list.

The static table region allocated for a particular module is allocated
by the first loading thread.  All other threads will use the same
pre-allocated region.

This change does not yet deal with sychronizing pointers loaded by
`dlsym`.
This commit is contained in:
Sam Clegg 2021-11-01 10:47:06 -07:00 коммит произвёл GitHub
Родитель 0e68b98e85
Коммит bbc208cfdf
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 329 добавлений и 23 удалений

Просмотреть файл

@ -419,7 +419,11 @@ var LibraryDylink = {
// Loads a side module from binary data or compiled Module. Returns the module's exports or a
// promise that resolves to its exports if the loadAsync flag is set.
$loadWebAssemblyModule__deps: ['$loadDynamicLibrary', '$createInvokeFunction', '$getMemory', '$relocateExports', '$resolveGlobalSymbol', '$GOTHandler', '$getDylinkMetadata', '$alignMemory'],
$loadWebAssemblyModule__deps: [
'$loadDynamicLibrary', '$createInvokeFunction', '$getMemory',
'$relocateExports', '$resolveGlobalSymbol', '$GOTHandler',
'$getDylinkMetadata', '$alignMemory', '$zeroMemory',
],
$loadWebAssemblyModule: function(binary, flags, handle) {
var metadata = getDylinkMetadata(binary);
#if ASSERTIONS
@ -429,17 +433,42 @@ var LibraryDylink = {
// loadModule loads the wasm module after all its dependencies have been loaded.
// can be called both sync/async.
function loadModule() {
// alignments are powers of 2
var memAlign = Math.pow(2, metadata.memoryAlign);
// finalize alignments and verify them
memAlign = Math.max(memAlign, STACK_ALIGN); // we at least need stack alignment
// prepare memory
var memoryBase = metadata.memorySize ? alignMemory(getMemory(metadata.memorySize + memAlign), memAlign) : 0; // TODO: add to cleanups
// The first thread to load a given module needs to allocate the static
// table and memory regions. Later threads re-use the same table region
// and can ignore the memory region (since memory is shared between
// threads already).
var needsAllocation = !handle || !{{{ makeGetValue('handle', C_STRUCTS.dso.mem_allocated, 'i8') }}};
if (needsAllocation) {
// alignments are powers of 2
var memAlign = Math.pow(2, metadata.memoryAlign);
// finalize alignments and verify them
memAlign = Math.max(memAlign, STACK_ALIGN); // we at least need stack alignment
// prepare memory
var memoryBase = metadata.memorySize ? alignMemory(getMemory(metadata.memorySize + memAlign), memAlign) : 0; // TODO: add to cleanups
var tableBase = metadata.tableSize ? wasmTable.length : 0;
if (handle) {
{{{ makeSetValue('handle', C_STRUCTS.dso.mem_allocated, '1', 'i8') }}};
{{{ makeSetValue('handle', C_STRUCTS.dso.mem_addr, 'memoryBase', '*') }}};
{{{ makeSetValue('handle', C_STRUCTS.dso.mem_size, 'metadata.memorySize', 'i32') }}};
{{{ makeSetValue('handle', C_STRUCTS.dso.table_addr, 'tableBase', '*') }}};
{{{ makeSetValue('handle', C_STRUCTS.dso.table_size, 'metadata.tableSize', 'i32') }}};
}
} else {
memoryBase = {{{ makeGetValue('handle', C_STRUCTS.dso.mem_addr, '*') }}};
tableBase = {{{ makeGetValue('handle', C_STRUCTS.dso.table_addr, '*') }}};
}
var tableGrowthNeeded = tableBase + metadata.tableSize - wasmTable.length;
if (tableGrowthNeeded > 0) {
#if DYLINK_DEBUG
err("loadModule: memoryBase=" + memoryBase);
err("loadModule: growing table: " + tableGrowthNeeded);
#endif
wasmTable.grow(tableGrowthNeeded);
}
#if DYLINK_DEBUG
err("loadModule: memory[" + memoryBase + ":" + (memoryBase + metadata.memorySize) + "]" +
" table[" + tableBase + ":" + (tableBase + metadata.tableSize) + "]");
#endif
var tableBase = metadata.tableSize ? wasmTable.length : 0;
wasmTable.grow(metadata.tableSize);
// This is the export map that we ultimately return. We declare it here
// so it can be used within resolveSymbol. We resolve symbols against

Просмотреть файл

@ -33,6 +33,11 @@
"structs": {
"dso": [
"flags",
"mem_allocated",
"mem_addr",
"mem_size",
"table_addr",
"table_size",
"name"
]
}

Просмотреть файл

@ -349,6 +349,9 @@ int emscripten_pthread_attr_settransferredcanvases(pthread_attr_t *a, const char
// blocking is not enabled, see ALLOW_BLOCKING_ON_MAIN_THREAD.
void emscripten_check_blocking_allowed(void);
// Experimental API for syncing loaded code between pthreads.
void _emscripten_thread_sync_code();
#ifdef __cplusplus
}
#endif

Просмотреть файл

@ -12,6 +12,7 @@
#include <assert.h>
#include <dlfcn.h>
#include <pthread.h>
#include <threads.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@ -26,7 +27,8 @@ extern void _emscripten_dlopen_js(struct dso* handle,
em_arg_callback_func onsuccess,
em_arg_callback_func onerror);
static struct dso *head, *tail;
static struct dso * _Atomic head, * _Atomic tail;
static thread_local struct dso* thread_local_tail;
static pthread_rwlock_t lock;
void __dl_vseterr(const char*, va_list);
@ -48,12 +50,17 @@ int __dl_invalid_handle(void* h) {
}
static void load_library_done(struct dso* p) {
#ifdef DYLINK_DEBUG
fprintf(stderr, "%p: load_library_done: dso=%p mem_addr=%p mem_size=%d table_addr=%p table_size=%d\n", pthread_self(), p, p->mem_addr, p->mem_size, p->table_addr, p->table_size);
#endif
// insert into linked list
p->prev = tail;
if (tail) {
tail->next = p;
}
tail = p;
thread_local_tail = p;
if (!head) {
head = p;
@ -78,7 +85,7 @@ static struct dso* load_library_start(const char* name, int flags) {
static void dlopen_js_onsuccess(void* handle) {
struct dso* p = (struct dso*)handle;
#ifdef DYLINK_DEBUG
fprintf(stderr, "%p: dlopen_js_onsuccess: dso=%p\n", pthread_self(), p);
fprintf(stderr, "%p: dlopen_js_onsuccess: dso=%p mem_addr=%p mem_size=%p\n", pthread_self(), p, p->mem_addr, p->mem_size);
#endif
load_library_done(p);
pthread_rwlock_unlock(&lock);
@ -95,7 +102,12 @@ static void dlopen_js_onerror(void* handle) {
free(p);
}
static void init_dso_list() {
// This function is called at the start of all entry points so that the dso
// list gets initialized on first use.
static void ensure_init() {
if (head) {
return;
}
// Initialize the dso list. This happens on first run.
pthread_rwlock_wrlock(&lock);
if (!head) {
@ -111,9 +123,7 @@ static void init_dso_list() {
}
void* dlopen(const char* file, int flags) {
if (!head) {
init_dso_list();
}
ensure_init();
if (!file) {
return head;
}
@ -161,9 +171,7 @@ end:
void emscripten_dlopen(const char* filename, int flags, void* user_data,
em_dlopen_callback onsuccess, em_arg_callback_func onerror) {
if (!head) {
init_dso_list();
}
ensure_init();
if (!filename) {
onsuccess(user_data, head);
return;
@ -208,3 +216,33 @@ int dladdr(const void* addr, Dl_info* info) {
info->dli_saddr = NULL;
return 1;
}
void _emscripten_thread_sync_code() {
ensure_init();
if (thread_local_tail == tail) {
#ifdef DYLINK_DEBUG
fprintf(stderr, "%p: emscripten_thread_sync_code: already in sync\n", pthread_self());
#endif
return;
}
pthread_rwlock_rdlock(&lock);
if (!thread_local_tail) {
thread_local_tail = head;
}
while (thread_local_tail->next) {
struct dso* p = thread_local_tail->next;
#ifdef DYLINK_DEBUG
fprintf(stderr, "%p: emscripten_thread_sync_code: %s mem_addr=%p mem_size=%d table_addr=%p table_size=%d\n", pthread_self(), p->name, p->mem_addr, p->mem_size, p->table_addr, p->table_size);
#endif
void* success = _dlopen_js(p);
if (!success) {
fprintf(stderr, "dlerror: %s\n", dlerror());
}
assert(success);
thread_local_tail = p;
}
pthread_rwlock_unlock(&lock);
#ifdef DYLINK_DEBUG
fprintf(stderr, "%p: emscripten_thread_sync_code done\n", pthread_self());
#endif
}

Просмотреть файл

@ -16,10 +16,19 @@ struct dso {
em_arg_callback_func onerror;
void* user_data;
// Flags used to open the library. We need to cache these so that
// (in the future) other threads can mirror the open library state.
// Flags used to open the library. We need to cache these so that other
// threads can mirror the open library state.
int flags;
// Location in memory/table of static data/static function addresses
// The first thread to load a given module alloces the memory and table
// address space and then sets this field to non-zero.
uint8_t mem_allocated;
void* mem_addr;
size_t mem_size;
void* table_addr;
size_t table_size;
// Flexible array; must be final element of struct
char name[];
};

Просмотреть файл

@ -0,0 +1,86 @@
#include <assert.h>
#include <stdbool.h>
#include <stdatomic.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <emscripten/threading.h>
typedef int* (*sidey_data_type)();
typedef int (*func_t)();
typedef func_t (*sidey_func_type)();
static sidey_data_type p_side_data_address;
static sidey_func_type p_side_func_address;
static int* expected_data_addr;
static func_t expected_func_addr;
static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t ready_mutex = PTHREAD_MUTEX_INITIALIZER;
static atomic_bool ready = false;
static void* thread_main() {
while (!ready) {
pthread_mutex_lock(&ready_mutex);
pthread_cond_wait(&ready_cond, &ready_mutex);
pthread_mutex_unlock(&ready_mutex);
}
printf("in thread_main\n");
_emscripten_thread_sync_code();
printf("calling p_side_data_address=%p\n", p_side_data_address);
int* data_addr = p_side_data_address();
assert(data_addr == expected_data_addr);
printf("calling p_side_func_address=%p\n", p_side_func_address);
func_t func_addr = p_side_func_address();
assert(expected_func_addr == func_addr);
assert(func_addr() == 43);
printf("thread_main done\n");
return 0;
}
int main() {
printf("in main\n");
// Start a thread before loading the shared library
pthread_t t;
int rc = pthread_create(&t, NULL, thread_main, NULL);
assert(rc == 0);
printf("loading dylib\n");
void* handle = dlopen("liblib.so", RTLD_NOW|RTLD_GLOBAL);
if (!handle) {
printf("dlerror: %s\n", dlerror());
}
assert(handle);
p_side_data_address = dlsym(handle, "side_data_address");
printf("p_side_data_address=%p\n", p_side_data_address);
p_side_func_address = dlsym(handle, "side_func_address");
printf("p_side_func_address=%p\n", p_side_func_address);
expected_data_addr = p_side_data_address();
// side_func_address return the address of a function
// internal to the side module (i.e. part of its static
// table region).
expected_func_addr = p_side_func_address();
printf("p_side_func_address -> %p\n", expected_func_addr);
assert(expected_func_addr() == 43);
pthread_mutex_lock(&ready_mutex);
ready = true;
pthread_cond_signal(&ready_cond);
pthread_mutex_unlock(&ready_mutex);
printf("joining\n");
rc = pthread_join(t, NULL);
assert(rc == 0);
printf("done join\n");
dlclose(handle);
return 0;
}

Просмотреть файл

@ -0,0 +1,22 @@
typedef int (*myfunc_type)();
static int mydata[10] = { 44 };
static int myfunc() {
return 43;
}
// Exposing the address of `mydata` in this way forces it to
// be present in the .data segment with its address calculated
// relative the `__memory_base` at which the DSO is loaded.
int* side_data_address() {
return mydata;
}
// Exposing the address of `mydata` in this way forces it to
// be present in static table region on the DSO and its
// address will be calculated relative to the `__table_base`
// at which the DSO is loaded.
myfunc_type side_func_address() {
return &myfunc;
}

Просмотреть файл

@ -0,0 +1,72 @@
#include <assert.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <emscripten/threading.h>
typedef int (*func_t)();
func_t g_one;
func_t g_two;
func_t g_three;
void* open_lib() {
void* handle = dlopen("liblib.so", RTLD_NOW|RTLD_GLOBAL);
if (!handle) {
printf("dlerror: %s\n", dlerror());
assert(handle);
}
printf("dlopen returned: %p\n", handle);
return handle;
}
static void test_order1() {
void* handle = open_lib();
g_one = dlsym(handle, "one");
g_two = dlsym(handle, "two");
g_three = dlsym(handle, "three");
assert(g_one() == 1);
assert(g_two() == 2);
assert(g_three() == 3);
}
static void test_order2() {
void* handle = open_lib();
func_t three = dlsym(handle, "three");
func_t two = dlsym(handle, "two");
func_t one = dlsym(handle, "one");
printf("one: %p -> %d\n", one, one());
printf("two: %p -> %d\n", two, two());
printf("three: %p -> %d\n", three, three());
assert(one() == 1);
assert(two() == 2);
assert(three() == 3);
assert(one == g_one);
assert(two == g_two);
assert(three == g_three);
}
static void* thread_main() {
printf("in thread_main\n");
_emscripten_thread_sync_code();
test_order2();
printf("thread_main done\n");
return 0;
}
int main() {
printf("in main\n");
test_order1();
pthread_t t;
int rc = pthread_create(&t, NULL, thread_main, NULL);
assert(rc == 0);
pthread_join(t, NULL);
return 0;
}

Просмотреть файл

@ -0,0 +1,11 @@
int one() {
return 1;
}
int two() {
return 2;
}
int three() {
return 3;
}

9
tests/reference_struct_info.json поставляемый
Просмотреть файл

@ -1228,9 +1228,14 @@
"d_type": 18
},
"dso": {
"__size__": 24,
"__size__": 44,
"flags": 20,
"name": 24
"mem_addr": 28,
"mem_allocated": 24,
"mem_size": 32,
"name": 44,
"table_addr": 36,
"table_size": 40
},
"emscripten_fetch_attr_t": {
"__size__": 92,

26
tests/test_core.py поставляемый
Просмотреть файл

@ -8543,6 +8543,32 @@ NODEFS is no longer included by default; build with -lnodefs.js
self.dylink_testf(main, so_name=very_long_name,
need_reverse=False)
@needs_dylink
@node_pthreads
def test_pthread_dlopen(self):
self.set_setting('USE_PTHREADS')
self.emcc_args.append('-Wno-experimental')
self.build_dlfcn_lib(test_file('core/pthread/test_pthread_dlopen_side.c'))
self.prep_dlfcn_main()
self.set_setting('EXIT_RUNTIME')
self.set_setting('PTHREAD_POOL_SIZE', 2)
self.set_setting('PROXY_TO_PTHREAD')
self.do_runf(test_file('core/pthread/test_pthread_dlopen.c'))
@needs_dylink
@node_pthreads
def test_pthread_dlsym(self):
self.set_setting('USE_PTHREADS')
self.emcc_args.append('-Wno-experimental')
self.build_dlfcn_lib(test_file('core/pthread/test_pthread_dlsym_side.c'))
self.prep_dlfcn_main()
self.set_setting('EXIT_RUNTIME')
self.set_setting('PTHREAD_POOL_SIZE', 2)
self.set_setting('PROXY_TO_PTHREAD')
self.do_runf(test_file('core/pthread/test_pthread_dlsym.c'))
@needs_dylink
@node_pthreads
def test_pthread_dylink_tls(self):