From 685a4e5be77ec376f29a180c7ed9fbee23e05bac Mon Sep 17 00:00:00 2001 From: Koichi Sasada Date: Wed, 3 Jul 2024 11:56:49 +0900 Subject: [PATCH] VM barrier needs to store GC root On the VM barrier waiting, it needs to store machine context as a GC root. Also it needs to wait for barrier synchronization correctly by `while` (for continuous barrier sync by another ractor). This is why GC marking misses are occerred on ARM machines. like: https://rubyci.s3.amazonaws.com/fedora40-arm/ruby-master/log/20240702T063002Z.fail.html.gz --- thread_pthread.c | 3 ++- vm_sync.c | 40 +++++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/thread_pthread.c b/thread_pthread.c index b9421559f2..1046a8aabe 100644 --- a/thread_pthread.c +++ b/thread_pthread.c @@ -592,7 +592,7 @@ thread_sched_setup_running_threads(struct rb_thread_sched *sched, rb_ractor_t *c } if (add_th) { - if (UNLIKELY(vm->ractor.sched.barrier_waiting)) { + while (UNLIKELY(vm->ractor.sched.barrier_waiting)) { RUBY_DEBUG_LOG("barrier-wait"); ractor_sched_barrier_join_signal_locked(vm); @@ -605,6 +605,7 @@ thread_sched_setup_running_threads(struct rb_thread_sched *sched, rb_ractor_t *c ccan_list_add(&vm->ractor.sched.running_threads, &add_th->sched.node.running_threads); vm->ractor.sched.running_cnt++; sched->is_running = true; + VM_ASSERT(!vm->ractor.sched.barrier_waiting); } if (add_timeslice_th) { diff --git a/vm_sync.c b/vm_sync.c index 4bef232f20..b57bd86647 100644 --- a/vm_sync.c +++ b/vm_sync.c @@ -40,6 +40,26 @@ rb_vm_locked_p(void) return vm_locked(GET_VM()); } +static bool +vm_need_barrier_waiting(const rb_vm_t *vm) +{ +#ifdef RUBY_THREAD_PTHREAD_H + return vm->ractor.sched.barrier_waiting; +#else + return vm->ractor.sync.barrier_waiting; +#endif +} + +static bool +vm_need_barrier(bool no_barrier, const rb_ractor_t *cr, const rb_vm_t *vm) +{ +#ifdef RUBY_THREAD_PTHREAD_H + return !no_barrier && cr->threads.sched.running != NULL && vm_need_barrier_waiting(vm); // ractor has running threads. +#else + return !no_barrier && vm_need_barrier_waiting(vm); +#endif +} + static void vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsigned int *lev APPEND_LOCATION_ARGS) { @@ -58,23 +78,17 @@ vm_lock_enter(rb_ractor_t *cr, rb_vm_t *vm, bool locked, bool no_barrier, unsign VM_ASSERT(vm->ractor.sync.lock_owner == NULL); VM_ASSERT(vm->ractor.sync.lock_rec == 0); -#ifdef RUBY_THREAD_PTHREAD_H - if (!no_barrier && - cr->threads.sched.running != NULL // ractor has running threads. - ) { + // barrier + if (vm_need_barrier(no_barrier, cr, vm)) { + rb_execution_context_t *ec = GET_EC(); + RB_VM_SAVE_MACHINE_CONTEXT(rb_ec_thread_ptr(ec)); - while (vm->ractor.sched.barrier_waiting) { + do { + VM_ASSERT(vm_need_barrier_waiting(vm)); RUBY_DEBUG_LOG("barrier serial:%u", vm->ractor.sched.barrier_serial); rb_ractor_sched_barrier_join(vm, cr); - } + } while (vm_need_barrier_waiting(vm)); } -#else - if (!no_barrier) { - while (vm->ractor.sync.barrier_waiting) { - rb_ractor_sched_barrier_join(vm, cr); - } - } -#endif VM_ASSERT(vm->ractor.sync.lock_rec == 0); VM_ASSERT(vm->ractor.sync.lock_owner == NULL);