Provide GC.config to disable major GC collections

This feature provides a new method `GC.config` that configures internal
GC configuration variables provided by an individual GC implementation.

Implemented in this PR is the option `full_mark`: a boolean value that
will determine whether the Ruby GC is allowed to run a major collection
while the process is running.

It has the following semantics

This feature configures Ruby's GC to only run minor GC's. It's designed
to give users relying on Out of Band GC complete control over when a
major GC is run. Configuring `full_mark: false` does two main things:

* Never runs a Major GC. When the heap runs out of space during a minor
  and when a major would traditionally be run, instead we allocate more
  heap pages, and mark objspace as needing a major GC.
* Don't increment object ages. We don't promote objects during GC, this
  will cause every object to be scanned on every minor. This is an
  intentional trade-off between minor GC's doing more work every time,
  and potentially promoting objects that will then never be GC'd.

The intention behind not aging objects is that users of this feature
should use a preforking web server, or some other method of pre-warming
the oldgen (like Nakayoshi fork)before disabling Majors. That way most
objects that are going to be old will have already been promoted.

This will interleave major and minor GC collections in exactly the same
what that the Ruby GC runs in versions previously to this. This is the
default behaviour.

* This new method has the following extra semantics:
  - `GC.config` with no arguments returns a hash of the keys of the
    currently configured GC
  - `GC.config` with a key pair (eg. `GC.config(full_mark: true)` sets
    the matching config key to the corresponding value and returns the
    entire known config hash, including the new values. If the key does
    not exist, `nil` is returned

* When a minor GC is run, Ruby sets an internal status flag to determine
  whether the next GC will be a major or a minor. When `full_mark:
  false` this flag is ignored and every GC will be a minor.

  This status flag can be accessed at
  `GC.latest_gc_info(:needs_major_by)`. Any value other than `nil` means
  that the next collection would have been a major.

  Thus it's possible to use this feature to check at a predetermined
  time, whether a major GC is necessary and run one if it is. eg. After
  a request has finished processing.

  ```ruby
  if GC.latest_gc_info(:needs_major_by)
    GC.start(full_mark: true)
  end
  ```

[Feature #20443]
This commit is contained in:
Matt Valentine-House 2024-07-04 15:21:09 +01:00
Родитель 00d0ddd48a
Коммит f543c68e1c
6 изменённых файлов: 235 добавлений и 5 удалений

Просмотреть файл

@ -7511,8 +7511,10 @@ gc_impl.$(OBJEXT): $(top_srcdir)/gc/default.c
gc_impl.$(OBJEXT): $(top_srcdir)/gc/gc_impl.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/bits.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/compilers.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/hash.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/string.h
gc_impl.$(OBJEXT): $(top_srcdir)/internal/warnings.h
gc_impl.$(OBJEXT): {$(VPATH)}assert.h
gc_impl.$(OBJEXT): {$(VPATH)}atomic.h
@ -7530,6 +7532,7 @@ gc_impl.$(OBJEXT): {$(VPATH)}darray.h
gc_impl.$(OBJEXT): {$(VPATH)}debug.h
gc_impl.$(OBJEXT): {$(VPATH)}debug_counter.h
gc_impl.$(OBJEXT): {$(VPATH)}defines.h
gc_impl.$(OBJEXT): {$(VPATH)}encoding.h
gc_impl.$(OBJEXT): {$(VPATH)}intern.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/abi.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/anyargs.h
@ -7603,8 +7606,14 @@ gc_impl.$(OBJEXT): {$(VPATH)}internal/ctype.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/dllexport.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/dosish.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/coderange.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/ctype.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/encoding.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/pathname.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/re.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/sprintf.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/string.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/symbol.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/encoding/transcode.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/error.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/eval.h
gc_impl.$(OBJEXT): {$(VPATH)}internal/event.h

18
gc.c
Просмотреть файл

@ -618,6 +618,8 @@ typedef struct gc_function_map {
void (*gc_enable)(void *objspace_ptr);
void (*gc_disable)(void *objspace_ptr, bool finish_current_gc);
bool (*gc_enabled_p)(void *objspace_ptr);
VALUE (*config_get)(void *objpace_ptr);
VALUE (*config_set)(void *objspace_ptr, VALUE hash);
void (*stress_set)(void *objspace_ptr, VALUE flag);
VALUE (*stress_get)(void *objspace_ptr);
// Object allocation
@ -747,6 +749,8 @@ ruby_external_gc_init(void)
load_external_gc_func(gc_enable);
load_external_gc_func(gc_disable);
load_external_gc_func(gc_enabled_p);
load_external_gc_func(config_set);
load_external_gc_func(config_get);
load_external_gc_func(stress_set);
load_external_gc_func(stress_get);
// Object allocation
@ -824,6 +828,8 @@ ruby_external_gc_init(void)
# define rb_gc_impl_gc_enable rb_gc_functions.gc_enable
# define rb_gc_impl_gc_disable rb_gc_functions.gc_disable
# define rb_gc_impl_gc_enabled_p rb_gc_functions.gc_enabled_p
# define rb_gc_impl_config_get rb_gc_functions.config_get
# define rb_gc_impl_config_set rb_gc_functions.config_set
# define rb_gc_impl_stress_set rb_gc_functions.stress_set
# define rb_gc_impl_stress_get rb_gc_functions.stress_get
// Object allocation
@ -3567,6 +3573,18 @@ gc_stat_heap(rb_execution_context_t *ec, VALUE self, VALUE heap_name, VALUE arg)
}
}
static VALUE
gc_config_get(rb_execution_context_t *ec, VALUE self)
{
return rb_gc_impl_config_get(rb_gc_get_objspace());
}
static VALUE
gc_config_set(rb_execution_context_t *ec, VALUE self, VALUE hash)
{
return rb_gc_impl_config_set(rb_gc_get_objspace(), hash);
}
static VALUE
gc_stress_get(rb_execution_context_t *ec, VALUE self)
{

24
gc.rb
Просмотреть файл

@ -253,6 +253,30 @@ module GC
Primitive.gc_stat_heap heap_name, hash_or_key
end
# call-seq:
# GC.config -> hash
# GC.config(hash) -> hash
#
# Sets or gets information about the current GC config.
#
# The contents of the hash are implementation specific and may change in
# the future without notice.
#
# If the optional argument, hash, is given, it is overwritten and returned.
#
# This method is only expected to work on CRuby.
#
# The hash includes the following keys about the internal information in
# the \GC:
#
# [slot_size]
# The slot size of the heap in bytes.
def self.config hash = nil
return Primitive.gc_config_get unless hash
Primitive.gc_config_set hash
end
# call-seq:
# GC.latest_gc_info -> hash
# GC.latest_gc_info(hash) -> hash

Просмотреть файл

@ -12,6 +12,9 @@
# include <sys/user.h>
#endif
#include "internal/string.h"
#include "internal/hash.h"
#include "ruby/ruby.h"
#include "ruby/atomic.h"
#include "ruby/debug.h"
@ -512,6 +515,10 @@ typedef struct rb_objspace {
#endif
} malloc_params;
struct rb_gc_config {
bool full_mark;
} gc_config;
struct {
unsigned int mode : 2;
unsigned int immediate_sweep : 1;
@ -904,6 +911,9 @@ VALUE *ruby_initial_gc_stress_ptr = &ruby_initial_gc_stress;
#define dont_gc_val() (objspace->flags.dont_gc)
#endif
#define gc_config_full_mark_set(b) (((int)b), objspace->gc_config.full_mark = (b))
#define gc_config_full_mark_val (objspace->gc_config.full_mark)
#define DURING_GC_COULD_MALLOC_REGION_START() \
assert(rb_during_gc()); \
bool _prev_enabled = rb_gc_impl_gc_enabled_p(objspace); \
@ -1818,6 +1828,22 @@ heap_page_add_freeobj(rb_objspace_t *objspace, struct heap_page *page, VALUE obj
gc_report(3, objspace, "heap_page_add_freeobj: add %p to freelist\n", (void *)obj);
}
static size_t
heap_extend_pages(rb_objspace_t *objspace, rb_size_pool_t *size_pool,
size_t free_slots, size_t total_slots, size_t used);
static void
size_pool_allocatable_pages_expand(rb_objspace_t *objspace,
rb_size_pool_t *size_pool, size_t swept_slots, size_t total_slots, size_t total_pages)
{
size_t extend_page_count = heap_extend_pages(objspace, size_pool, swept_slots,
total_slots, total_pages);
if (extend_page_count > size_pool->allocatable_pages) {
size_pool_allocatable_pages_set(objspace, size_pool, extend_page_count);
}
}
static inline void
heap_add_freepage(rb_heap_t *heap, struct heap_page *page)
{
@ -2297,6 +2323,13 @@ heap_prepare(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap
rb_memerror();
}
else {
if (size_pool->allocatable_pages == 0 && !gc_config_full_mark_val) {
size_pool_allocatable_pages_expand(objspace, size_pool,
size_pool->freed_slots + size_pool->empty_slots,
heap->total_slots + SIZE_POOL_TOMB_HEAP(size_pool)->total_slots,
heap->total_pages + SIZE_POOL_TOMB_HEAP(size_pool)->total_pages);
GC_ASSERT(size_pool->allocatable_pages > 0);
}
/* Do steps of incremental marking or lazy sweeping if the GC run permits. */
gc_continue(objspace, size_pool, heap);
@ -4037,11 +4070,8 @@ gc_sweep_finish_size_pool(rb_objspace_t *objspace, rb_size_pool_t *size_pool)
}
if (grow_heap) {
size_t extend_page_count = heap_extend_pages(objspace, size_pool, swept_slots, total_slots, total_pages);
if (extend_page_count > size_pool->allocatable_pages) {
size_pool_allocatable_pages_set(objspace, size_pool, extend_page_count);
}
size_pool_allocatable_pages_expand(objspace, size_pool, swept_slots,
total_slots, total_pages);
}
}
}
@ -4596,6 +4626,16 @@ gc_mark_set(rb_objspace_t *objspace, VALUE obj)
static void
gc_aging(rb_objspace_t *objspace, VALUE obj)
{
/* Disable aging if Major GC's are disabled. This will prevent longish lived
* objects filling up the heap at the expense of marking many more objects.
*
* We should always pre-warm our process when disabling majors, by running
* GC manually several times so that most objects likely to become oldgen
* are already oldgen.
*/
if(!gc_config_full_mark_val)
return;
struct heap_page *page = GET_HEAP_PAGE(obj);
GC_ASSERT(RVALUE_MARKING(objspace, obj) == FALSE);
@ -6597,6 +6637,10 @@ gc_start(rb_objspace_t *objspace, unsigned int reason)
do_full_mark = TRUE;
}
/* if major gc has been disabled, never do a full mark */
if (!gc_config_full_mark_val) {
do_full_mark = FALSE;
}
gc_needs_major_flags = GPR_FLAG_NONE;
if (do_full_mark && (reason & GPR_FLAG_MAJOR_MASK) == 0) {
@ -6991,6 +7035,9 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i
GPR_FLAG_IMMEDIATE_SWEEP |
GPR_FLAG_METHOD);
int full_marking_p = gc_config_full_mark_val;
gc_config_full_mark_set(TRUE);
/* For now, compact implies full mark / sweep, so ignore other flags */
if (compact) {
GC_ASSERT(GC_COMPACTION_SUPPORTED);
@ -7005,6 +7052,8 @@ rb_gc_impl_start(void *objspace_ptr, bool full_mark, bool immediate_mark, bool i
garbage_collect(objspace, reason);
gc_finalize_deferred(objspace);
gc_config_full_mark_set(full_marking_p);
}
static void
@ -8007,6 +8056,51 @@ rb_gc_impl_stat_heap(void *objspace_ptr, VALUE heap_name, VALUE hash_or_sym)
return 0;
}
/* I could include internal.h for this, but doing so undefines some Array macros
* necessary for initialising objects, and I don't want to include all the array
* headers to get them back
* TODO: Investigate why RARRAY_AREF gets undefined in internal.h
*/
#ifndef RBOOL
#define RBOOL(v) (v ? Qtrue : Qfalse)
#endif
VALUE
rb_gc_impl_config_get(void *objspace_ptr)
{
#define sym(name) ID2SYM(rb_intern_const(name))
rb_objspace_t *objspace = objspace_ptr;
VALUE hash = rb_hash_new();
rb_hash_aset(hash, sym("full_mark"), RBOOL(gc_config_full_mark_val));
return hash;
}
static int
gc_config_set_key(st_data_t key, st_data_t value, st_data_t data)
{
rb_objspace_t *objspace = (rb_objspace_t *)data;
if (!strcmp(rb_str_to_cstr(rb_sym2str(key)), "full_mark")) {
gc_rest(objspace);
gc_config_full_mark_set(RBOOL(value));
}
return ST_CONTINUE;
}
VALUE
rb_gc_impl_config_set(void *objspace_ptr, VALUE hash)
{
rb_objspace_t *objspace = objspace_ptr;
if(!RB_TYPE_P(hash, T_HASH)) {
rb_raise(rb_eArgError, "expected keyword arguments");
}
rb_hash_stlike_foreach(hash, gc_config_set_key, (st_data_t)objspace);
return rb_gc_impl_config_get(objspace_ptr);
}
VALUE
rb_gc_impl_stress_get(void *objspace_ptr)
{
@ -9405,8 +9499,13 @@ gc_compact_stats(VALUE self)
static VALUE
gc_compact(VALUE self)
{
rb_objspace_t *objspace = rb_gc_get_objspace();
int full_marking_p = gc_config_full_mark_val;
gc_config_full_mark_set(TRUE);
/* Run GC with compaction enabled */
rb_gc_impl_start(rb_gc_get_objspace(), true, true, true, true);
gc_config_full_mark_set(full_marking_p);
return gc_compact_stats(self);
}
@ -9639,6 +9738,8 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
{
rb_objspace_t *objspace = objspace_ptr;
gc_config_full_mark_set(TRUE);
objspace->flags.gc_stressful = RTEST(initial_stress);
objspace->gc_stress_mode = initial_stress;

Просмотреть файл

@ -24,6 +24,8 @@ void rb_gc_impl_gc_disable(void *objspace_ptr, bool finish_current_gc);
bool rb_gc_impl_gc_enabled_p(void *objspace_ptr);
void rb_gc_impl_stress_set(void *objspace_ptr, VALUE flag);
VALUE rb_gc_impl_stress_get(void *objspace_ptr);
VALUE rb_gc_impl_config_get(void *objspace_ptr);
VALUE rb_gc_impl_config_set(void *objspace_ptr, VALUE hash);
// Object allocation
VALUE rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags, VALUE v1, VALUE v2, VALUE v3, bool wb_protected, size_t alloc_size);
size_t rb_gc_impl_obj_slot_size(VALUE obj);
@ -77,4 +79,5 @@ bool rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE obj);
void rb_gc_impl_set_event_hook(void *objspace_ptr, const rb_event_flag_t event);
void rb_gc_impl_copy_attributes(void *objspace_ptr, VALUE dest, VALUE obj);
#endif

Просмотреть файл

@ -52,6 +52,81 @@ class TestGc < Test::Unit::TestCase
GC.enable
end
def test_gc_config_full_mark_by_default
omit "unsupoported platform/GC" unless defined?(GC.config)
config = GC.config
assert_not_empty(config)
assert_true(config[:full_mark])
end
def test_gc_config_invalid_args
omit "unsupoported platform/GC" unless defined?(GC.config)
assert_raise(ArgumentError) { GC.config(0) }
end
def test_gc_config_setting_returns_updated_config_hash
omit "unsupoported platform/GC" unless defined?(GC.config)
old_value = GC.config[:full_mark]
assert_true(old_value)
new_value = GC.config(full_mark: false)[:full_mark]
assert_false(new_value)
ensure
GC.config(full_mark: true)
GC.start
end
def test_gc_config_setting_returns_nil_for_missing_keys
omit "unsupoported platform/GC" unless defined?(GC.config)
missing_value = GC.config(no_such_key: true)[:no_such_key]
assert_nil(missing_value)
ensure
GC.config(full_mark: true)
GC.start
end
def test_gc_config_disable_major
omit "unsupoported platform/GC" unless defined?(GC.config)
GC.enable
GC.start
GC.config(full_mark: false)
major_count = GC.stat[:major_gc_count]
minor_count = GC.stat[:minor_gc_count]
arr = []
(GC.stat_heap[0][:heap_eden_slots] * 2).times do
arr << Object.new
Object.new
end
assert_equal(major_count, GC.stat[:major_gc_count])
assert_operator(minor_count, :<=, GC.stat[:minor_gc_count])
assert_nil(GC.start)
ensure
GC.config(full_mark: true)
GC.start
end
def test_gc_config_disable_major_gc_start_always_works
omit "unsupoported platform/GC" unless defined?(GC.config)
GC.config(full_mark: false)
major_count = GC.stat[:major_gc_count]
GC.start
assert_operator(major_count, :<, GC.stat[:major_gc_count])
ensure
GC.config(full_mark: true)
GC.start
end
def test_start_full_mark
return unless use_rgengc?
omit 'stress' if GC.stress