diff --git a/src/Makefile b/src/Makefile index 52e673aa6cc1aa..7fcc2cfa2ffebb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -318,6 +318,7 @@ $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,de $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h +$(BUILDDIR)/gc-stacks.o $(BUILDDIR)/gc-stacks.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 0d17eafbcfd1e5..c7798dbb34a8b4 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "gc-common.h" +#include "gc-stock.h" #include "threading.h" #ifndef _OS_WINDOWS_ # include @@ -202,11 +203,8 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO return stk; } -extern _Atomic(int) gc_ptls_sweep_idx; -extern _Atomic(int) gc_n_threads_sweeping; -extern _Atomic(int) gc_stack_free_idx; -void sweep_stack_pools(void) JL_NOTSAFEPOINT +void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT { // Stack sweeping algorithm: // // deallocate stacks if we have too many sitting around unused diff --git a/src/gc-stock.c b/src/gc-stock.c index ba0ef74515e8f6..00c936be5f601d 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -1027,6 +1027,20 @@ void gc_sweep_wait_for_all_stacks(void) } } +void sweep_stack_pools(jl_ptls_t ptls) JL_NOTSAFEPOINT +{ + // initialize ptls index for parallel sweeping of stack pools + int stack_free_idx = jl_atomic_load_relaxed(&gc_stack_free_idx); + if (stack_free_idx + 1 == gc_n_threads) + jl_atomic_store_relaxed(&gc_stack_free_idx, 0); + else + jl_atomic_store_relaxed(&gc_stack_free_idx, stack_free_idx + 1); + jl_atomic_store_release(&gc_ptls_sweep_idx, gc_n_threads - 1); // idx == gc_n_threads = release stacks to the OS so it's serial + gc_sweep_wake_all_stacks(ptls); + sweep_stack_pool_loop(); + gc_sweep_wait_for_all_stacks(); +} + static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT { assert(pg->fl_begin_offset != UINT16_MAX); @@ -3095,16 +3109,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) #endif current_sweep_full = sweep_full; sweep_weak_refs(); - // initialize ptls index for parallel sweeping of stack pools - int stack_free_idx = jl_atomic_load_relaxed(&gc_stack_free_idx); - if (stack_free_idx + 1 == gc_n_threads) - jl_atomic_store_relaxed(&gc_stack_free_idx, 0); - else - jl_atomic_store_relaxed(&gc_stack_free_idx, stack_free_idx + 1); - jl_atomic_store_release(&gc_ptls_sweep_idx, gc_n_threads - 1); // idx == gc_n_threads = release stacks to the OS so it's serial - gc_sweep_wake_all_stacks(ptls); - sweep_stack_pools(); - gc_sweep_wait_for_all_stacks(); + sweep_stack_pools(ptls); gc_sweep_other(ptls, sweep_full); gc_scrub(); gc_verify_tags(); @@ -3516,6 +3521,10 @@ STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0); } +STATIC_INLINE int may_sweep_stack(jl_ptls_t ptls) JL_NOTSAFEPOINT +{ + return (jl_atomic_load(&ptls->gc_tls.gc_stack_sweep_requested) > 0); +} // parallel gc thread function void jl_parallel_gc_threadfun(void *arg) { @@ -3544,10 +3553,14 @@ void jl_parallel_gc_threadfun(void *arg) uv_mutex_unlock(&gc_threads_lock); assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD); gc_mark_loop_parallel(ptls, 0); + if (may_sweep_stack(ptls)) { + assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD); + sweep_stack_pool_loop(); + jl_atomic_fetch_add(&ptls->gc_tls.gc_stack_sweep_requested, -1); + } if (may_sweep(ptls)) { assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD); gc_sweep_pool_parallel(ptls); - sweep_stack_pools(); jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1); } } diff --git a/src/gc-stock.h b/src/gc-stock.h index 45c93bf4289ae9..afed1d99c8d911 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -511,6 +511,8 @@ extern uv_cond_t gc_threads_cond; extern uv_sem_t gc_sweep_assists_needed; extern _Atomic(int) gc_n_threads_marking; extern _Atomic(int) gc_n_threads_sweeping; +extern _Atomic(int) gc_ptls_sweep_idx; +extern _Atomic(int) gc_stack_free_idx; extern _Atomic(int) n_threads_running; extern uv_barrier_t thread_init_done; void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq); @@ -521,7 +523,7 @@ void gc_mark_loop_serial(jl_ptls_t ptls); void gc_mark_loop_parallel(jl_ptls_t ptls, int master); void gc_sweep_pool_parallel(jl_ptls_t ptls); void gc_free_pages(void); -void sweep_stack_pools(void) JL_NOTSAFEPOINT; +void sweep_stack_pool_loop(void) JL_NOTSAFEPOINT; void jl_gc_debug_init(void); // GC pages diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db846..183016cd915151 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -82,6 +82,7 @@ typedef struct { jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; + _Atomic(uint8_t) gc_stack_sweep_requested; arraylist_t sweep_objs; } jl_gc_tls_states_t;