[Feature #19236]

When building a large hash, pre-allocating it with enough
capacity can save many re-hashes and significantly improve
performance.

```
/opt/rubies/3.3.0/bin/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
	            --executables="compare-ruby::../miniruby-master -I.ext/common --disable-gem" \
	            --executables="built-ruby::./miniruby --disable-gem" \
	            --output=markdown --output-compare -v $(find ./benchmark -maxdepth 1 -name 'hash_new' -o -name '*hash_new*.yml' -o -name '*hash_new*.rb' | sort)
compare-ruby: ruby 3.4.0dev (2024-03-25T11:48:11Z master f53209f023) +YJIT dev [arm64-darwin23]
last_commit=[ruby/irb] Cache RDoc::RI::Driver.new (https://github.com/ruby/irb/pull/911)
built-ruby: ruby 3.4.0dev (2024-03-25T15:29:40Z hash-new-rb 77652b08a2) +YJIT dev [arm64-darwin23]
warming up...

|                    |compare-ruby|built-ruby|
|:-------------------|-----------:|---------:|
|new                 |      7.614M|    5.976M|
|                    |       1.27x|         -|
|new_with_capa_1k    |     13.931k|   15.698k|
|                    |           -|     1.13x|
|new_with_capa_100k  |     124.746|   148.283|
|                    |           -|     1.19x|
```
This commit is contained in:
Jean Boussier 2024-03-25 13:03:14 +01:00 коммит произвёл Jean Boussier
Родитель bfb8cad771
Коммит 9594db0cf2
6 изменённых файлов: 99 добавлений и 46 удалений

16
benchmark/hash_new.yml Normal file
Просмотреть файл

@ -0,0 +1,16 @@
prelude: |
has_hash_with_capa = Hash.instance_method(:initialize).parameters.include?([:key, :capacity])
strings_1k = 1_000.times.map { |i| -i.to_s.freeze }
strings_100k = 100_000.times.map { |i| -i.to_s.freeze }
benchmark:
new: Hash.new
new_with_capa_1k: |
h = has_hash_with_capa ? Hash.new(capacity: strings_1k.size) : {}
strings_1k.each do |x|
h[x] = true
end
new_with_capa_100k: |
h = has_hash_with_capa ? Hash.new(capacity: strings_100k.size) : {}
strings_100k.each do |x|
h[x] = true
end

Просмотреть файл

@ -1204,6 +1204,7 @@ BUILTIN_RB_SRCS = \
$(srcdir)/trace_point.rb \
$(srcdir)/warning.rb \
$(srcdir)/array.rb \
$(srcdir)/hash.rb \
$(srcdir)/kernel.rb \
$(srcdir)/ractor.rb \
$(srcdir)/symbol.rb \
@ -7968,12 +7969,14 @@ hash.$(OBJEXT): {$(VPATH)}backward/2/limits.h
hash.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
hash.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
hash.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
hash.$(OBJEXT): {$(VPATH)}builtin.h
hash.$(OBJEXT): {$(VPATH)}config.h
hash.$(OBJEXT): {$(VPATH)}constant.h
hash.$(OBJEXT): {$(VPATH)}debug_counter.h
hash.$(OBJEXT): {$(VPATH)}defines.h
hash.$(OBJEXT): {$(VPATH)}encoding.h
hash.$(OBJEXT): {$(VPATH)}hash.c
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
hash.$(OBJEXT): {$(VPATH)}id.h
hash.$(OBJEXT): {$(VPATH)}id_table.h
hash.$(OBJEXT): {$(VPATH)}intern.h

65
hash.c
Просмотреть файл

@ -48,6 +48,7 @@
#include "ruby/thread_native.h"
#include "ruby/ractor.h"
#include "vm_sync.h"
#include "builtin.h"
/* Flags of RHash
*
@ -1762,58 +1763,31 @@ set_proc_default(VALUE hash, VALUE proc)
RHASH_SET_IFNONE(hash, proc);
}
/*
* call-seq:
* Hash.new(default_value = nil) -> new_hash
* Hash.new {|hash, key| ... } -> new_hash
*
* Returns a new empty +Hash+ object.
*
* The initial default value and initial default proc for the new hash
* depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
*
* If neither an argument nor a block given,
* initializes both the default value and the default proc to <tt>nil</tt>:
* h = Hash.new
* h.default # => nil
* h.default_proc # => nil
*
* If argument <tt>default_value</tt> given but no block given,
* initializes the default value to the given <tt>default_value</tt>
* and the default proc to <tt>nil</tt>:
* h = Hash.new(false)
* h.default # => false
* h.default_proc # => nil
*
* If a block given but no argument, stores the block as the default proc
* and sets the default value to <tt>nil</tt>:
* h = Hash.new {|hash, key| "Default value for #{key}" }
* h.default # => nil
* h.default_proc.class # => Proc
* h[:nosuch] # => "Default value for nosuch"
*/
static VALUE
rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
rb_hash_init(rb_execution_context_t *ec, VALUE hash, VALUE capa_value, VALUE ifnone_unset, VALUE ifnone, VALUE block)
{
rb_hash_modify(hash);
if (rb_block_given_p()) {
rb_check_arity(argc, 0, 0);
SET_PROC_DEFAULT(hash, rb_block_proc());
if (capa_value != INT2FIX(0)) {
long capa = NUM2LONG(capa_value);
if (capa > 0 && RHASH_SIZE(hash) == 0 && RHASH_AR_TABLE_P(hash)) {
hash_st_table_init(hash, &objhash, capa);
}
}
if (!NIL_P(block)) {
if (ifnone_unset != Qtrue) {
rb_check_arity(1, 0, 0);
}
else {
SET_PROC_DEFAULT(hash, block);
}
}
else {
rb_check_arity(argc, 0, 1);
VALUE options, ifnone;
rb_scan_args(argc, argv, "01:", &ifnone, &options);
if (NIL_P(ifnone) && !NIL_P(options)) {
ifnone = options;
rb_warn_deprecated_to_remove("3.4", "Calling Hash.new with keyword arguments", "Hash.new({ key: value })");
}
RHASH_SET_IFNONE(hash, ifnone);
RHASH_SET_IFNONE(hash, ifnone_unset == Qtrue ? Qnil : ifnone);
}
hash_verify(hash);
return hash;
}
@ -7150,7 +7124,6 @@ Init_Hash(void)
rb_define_alloc_func(rb_cHash, empty_hash_alloc);
rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);
@ -7477,3 +7450,5 @@ Init_Hash(void)
HASH_ASSERT(sizeof(ar_hint_t) * RHASH_AR_TABLE_MAX_SIZE == sizeof(VALUE));
}
#include "hash.rbinc"

40
hash.rb Normal file
Просмотреть файл

@ -0,0 +1,40 @@
class Hash
# call-seq:
# Hash.new(default_value = nil) -> new_hash
# Hash.new(default_value = nil, capacity: size) -> new_hash
# Hash.new {|hash, key| ... } -> new_hash
# Hash.new(capacity: size) {|hash, key| ... } -> new_hash
#
# Returns a new empty +Hash+ object.
#
# The initial default value and initial default proc for the new hash
# depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
#
# If neither an argument nor a block is given,
# initializes both the default value and the default proc to <tt>nil</tt>:
# h = Hash.new
# h.default # => nil
# h.default_proc # => nil
#
# If argument <tt>default_value</tt> is given but no block is given,
# initializes the default value to the given <tt>default_value</tt>
# and the default proc to <tt>nil</tt>:
# h = Hash.new(false)
# h.default # => false
# h.default_proc # => nil
#
# If a block is given but no <tt>default_value</tt>, stores the block as the default proc
# and sets the default value to <tt>nil</tt>:
# h = Hash.new {|hash, key| "Default value for #{key}" }
# h.default # => nil
# h.default_proc.class # => Proc
# h[:nosuch] # => "Default value for nosuch"
#
# If both a block and a <tt>default_value</tt> are given, raises an +ArgumentError+
#
# If the optional keyword argument +capacity+ is given, the hash will be allocated
# with enough capacity to accomodate this many keys without having to be resized.
def initialize(ifnone = (ifnone_unset = true), capacity: 0, &block)
Primitive.rb_hash_init(capacity, ifnone_unset, ifnone, block)
end
end

Просмотреть файл

@ -94,6 +94,7 @@ rb_call_builtin_inits(void)
BUILTIN(pack);
BUILTIN(warning);
BUILTIN(array);
BUILTIN(hash);
BUILTIN(kernel);
BUILTIN(symbol);
BUILTIN(timev);

Просмотреть файл

@ -34,7 +34,7 @@ describe "Hash.new" do
-> { Hash.new(nil) { 0 } }.should raise_error(ArgumentError)
end
ruby_version_is "3.3" do
ruby_version_is "3.3"..."3.4" do
it "emits a deprecation warning if keyword arguments are passed" do
-> { Hash.new(unknown: true) }.should complain(
Regexp.new(Regexp.escape("Calling Hash.new with keyword arguments is deprecated and will be removed in Ruby 3.4; use Hash.new({ key: value }) instead"))
@ -46,4 +46,22 @@ describe "Hash.new" do
Hash.new({ unknown: true }).default.should == { unknown: true }
end
end
ruby_version_is "3.4" do
it "accepts a capacity: argument" do
Hash.new(5, capacity: 42).default.should == 5
Hash.new(capacity: 42).default.should == nil
(Hash.new(capacity: 42) { 1 }).default_proc.should_not == nil
end
it "ignores negative capacity" do
-> { Hash.new(capacity: -42) }.should_not raise_error
end
it "raises an error if unknown keyword arguments are passed" do
-> { Hash.new(unknown: true) }.should raise_error(ArgumentError)
-> { Hash.new(1, unknown: true) }.should raise_error(ArgumentError)
-> { Hash.new(unknown: true) { 0 } }.should raise_error(ArgumentError)
end
end
end