зеркало из https://github.com/github/ruby.git
Implement Hash.new(capacity:)
[Feature #19236]
When building a large hash, pre-allocating it with enough
capacity can save many re-hashes and significantly improve
performance.
```
/opt/rubies/3.3.0/bin/ruby --disable=gems -rrubygems -I./benchmark/lib ./benchmark/benchmark-driver/exe/benchmark-driver \
--executables="compare-ruby::../miniruby-master -I.ext/common --disable-gem" \
--executables="built-ruby::./miniruby --disable-gem" \
--output=markdown --output-compare -v $(find ./benchmark -maxdepth 1 -name 'hash_new' -o -name '*hash_new*.yml' -o -name '*hash_new*.rb' | sort)
compare-ruby: ruby 3.4.0dev (2024-03-25T11:48:11Z master f53209f023
) +YJIT dev [arm64-darwin23]
last_commit=[ruby/irb] Cache RDoc::RI::Driver.new (https://github.com/ruby/irb/pull/911)
built-ruby: ruby 3.4.0dev (2024-03-25T15:29:40Z hash-new-rb 77652b08a2) +YJIT dev [arm64-darwin23]
warming up...
| |compare-ruby|built-ruby|
|:-------------------|-----------:|---------:|
|new | 7.614M| 5.976M|
| | 1.27x| -|
|new_with_capa_1k | 13.931k| 15.698k|
| | -| 1.13x|
|new_with_capa_100k | 124.746| 148.283|
| | -| 1.19x|
```
This commit is contained in:
Родитель
bfb8cad771
Коммит
9594db0cf2
|
@ -0,0 +1,16 @@
|
|||
prelude: |
|
||||
has_hash_with_capa = Hash.instance_method(:initialize).parameters.include?([:key, :capacity])
|
||||
strings_1k = 1_000.times.map { |i| -i.to_s.freeze }
|
||||
strings_100k = 100_000.times.map { |i| -i.to_s.freeze }
|
||||
benchmark:
|
||||
new: Hash.new
|
||||
new_with_capa_1k: |
|
||||
h = has_hash_with_capa ? Hash.new(capacity: strings_1k.size) : {}
|
||||
strings_1k.each do |x|
|
||||
h[x] = true
|
||||
end
|
||||
new_with_capa_100k: |
|
||||
h = has_hash_with_capa ? Hash.new(capacity: strings_100k.size) : {}
|
||||
strings_100k.each do |x|
|
||||
h[x] = true
|
||||
end
|
|
@ -1204,6 +1204,7 @@ BUILTIN_RB_SRCS = \
|
|||
$(srcdir)/trace_point.rb \
|
||||
$(srcdir)/warning.rb \
|
||||
$(srcdir)/array.rb \
|
||||
$(srcdir)/hash.rb \
|
||||
$(srcdir)/kernel.rb \
|
||||
$(srcdir)/ractor.rb \
|
||||
$(srcdir)/symbol.rb \
|
||||
|
@ -7968,12 +7969,14 @@ hash.$(OBJEXT): {$(VPATH)}backward/2/limits.h
|
|||
hash.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
|
||||
hash.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
|
||||
hash.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
|
||||
hash.$(OBJEXT): {$(VPATH)}builtin.h
|
||||
hash.$(OBJEXT): {$(VPATH)}config.h
|
||||
hash.$(OBJEXT): {$(VPATH)}constant.h
|
||||
hash.$(OBJEXT): {$(VPATH)}debug_counter.h
|
||||
hash.$(OBJEXT): {$(VPATH)}defines.h
|
||||
hash.$(OBJEXT): {$(VPATH)}encoding.h
|
||||
hash.$(OBJEXT): {$(VPATH)}hash.c
|
||||
hash.$(OBJEXT): {$(VPATH)}hash.rbinc
|
||||
hash.$(OBJEXT): {$(VPATH)}id.h
|
||||
hash.$(OBJEXT): {$(VPATH)}id_table.h
|
||||
hash.$(OBJEXT): {$(VPATH)}intern.h
|
||||
|
|
65
hash.c
65
hash.c
|
@ -48,6 +48,7 @@
|
|||
#include "ruby/thread_native.h"
|
||||
#include "ruby/ractor.h"
|
||||
#include "vm_sync.h"
|
||||
#include "builtin.h"
|
||||
|
||||
/* Flags of RHash
|
||||
*
|
||||
|
@ -1762,58 +1763,31 @@ set_proc_default(VALUE hash, VALUE proc)
|
|||
RHASH_SET_IFNONE(hash, proc);
|
||||
}
|
||||
|
||||
/*
|
||||
* call-seq:
|
||||
* Hash.new(default_value = nil) -> new_hash
|
||||
* Hash.new {|hash, key| ... } -> new_hash
|
||||
*
|
||||
* Returns a new empty +Hash+ object.
|
||||
*
|
||||
* The initial default value and initial default proc for the new hash
|
||||
* depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
|
||||
*
|
||||
* If neither an argument nor a block given,
|
||||
* initializes both the default value and the default proc to <tt>nil</tt>:
|
||||
* h = Hash.new
|
||||
* h.default # => nil
|
||||
* h.default_proc # => nil
|
||||
*
|
||||
* If argument <tt>default_value</tt> given but no block given,
|
||||
* initializes the default value to the given <tt>default_value</tt>
|
||||
* and the default proc to <tt>nil</tt>:
|
||||
* h = Hash.new(false)
|
||||
* h.default # => false
|
||||
* h.default_proc # => nil
|
||||
*
|
||||
* If a block given but no argument, stores the block as the default proc
|
||||
* and sets the default value to <tt>nil</tt>:
|
||||
* h = Hash.new {|hash, key| "Default value for #{key}" }
|
||||
* h.default # => nil
|
||||
* h.default_proc.class # => Proc
|
||||
* h[:nosuch] # => "Default value for nosuch"
|
||||
*/
|
||||
|
||||
static VALUE
|
||||
rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
|
||||
rb_hash_init(rb_execution_context_t *ec, VALUE hash, VALUE capa_value, VALUE ifnone_unset, VALUE ifnone, VALUE block)
|
||||
{
|
||||
rb_hash_modify(hash);
|
||||
|
||||
if (rb_block_given_p()) {
|
||||
rb_check_arity(argc, 0, 0);
|
||||
SET_PROC_DEFAULT(hash, rb_block_proc());
|
||||
if (capa_value != INT2FIX(0)) {
|
||||
long capa = NUM2LONG(capa_value);
|
||||
if (capa > 0 && RHASH_SIZE(hash) == 0 && RHASH_AR_TABLE_P(hash)) {
|
||||
hash_st_table_init(hash, &objhash, capa);
|
||||
}
|
||||
}
|
||||
|
||||
if (!NIL_P(block)) {
|
||||
if (ifnone_unset != Qtrue) {
|
||||
rb_check_arity(1, 0, 0);
|
||||
}
|
||||
else {
|
||||
SET_PROC_DEFAULT(hash, block);
|
||||
}
|
||||
}
|
||||
else {
|
||||
rb_check_arity(argc, 0, 1);
|
||||
|
||||
VALUE options, ifnone;
|
||||
rb_scan_args(argc, argv, "01:", &ifnone, &options);
|
||||
if (NIL_P(ifnone) && !NIL_P(options)) {
|
||||
ifnone = options;
|
||||
rb_warn_deprecated_to_remove("3.4", "Calling Hash.new with keyword arguments", "Hash.new({ key: value })");
|
||||
}
|
||||
RHASH_SET_IFNONE(hash, ifnone);
|
||||
RHASH_SET_IFNONE(hash, ifnone_unset == Qtrue ? Qnil : ifnone);
|
||||
}
|
||||
|
||||
hash_verify(hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
@ -7150,7 +7124,6 @@ Init_Hash(void)
|
|||
rb_define_alloc_func(rb_cHash, empty_hash_alloc);
|
||||
rb_define_singleton_method(rb_cHash, "[]", rb_hash_s_create, -1);
|
||||
rb_define_singleton_method(rb_cHash, "try_convert", rb_hash_s_try_convert, 1);
|
||||
rb_define_method(rb_cHash, "initialize", rb_hash_initialize, -1);
|
||||
rb_define_method(rb_cHash, "initialize_copy", rb_hash_replace, 1);
|
||||
rb_define_method(rb_cHash, "rehash", rb_hash_rehash, 0);
|
||||
|
||||
|
@ -7477,3 +7450,5 @@ Init_Hash(void)
|
|||
|
||||
HASH_ASSERT(sizeof(ar_hint_t) * RHASH_AR_TABLE_MAX_SIZE == sizeof(VALUE));
|
||||
}
|
||||
|
||||
#include "hash.rbinc"
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
class Hash
|
||||
# call-seq:
|
||||
# Hash.new(default_value = nil) -> new_hash
|
||||
# Hash.new(default_value = nil, capacity: size) -> new_hash
|
||||
# Hash.new {|hash, key| ... } -> new_hash
|
||||
# Hash.new(capacity: size) {|hash, key| ... } -> new_hash
|
||||
#
|
||||
# Returns a new empty +Hash+ object.
|
||||
#
|
||||
# The initial default value and initial default proc for the new hash
|
||||
# depend on which form above was used. See {Default Values}[rdoc-ref:Hash@Default+Values].
|
||||
#
|
||||
# If neither an argument nor a block is given,
|
||||
# initializes both the default value and the default proc to <tt>nil</tt>:
|
||||
# h = Hash.new
|
||||
# h.default # => nil
|
||||
# h.default_proc # => nil
|
||||
#
|
||||
# If argument <tt>default_value</tt> is given but no block is given,
|
||||
# initializes the default value to the given <tt>default_value</tt>
|
||||
# and the default proc to <tt>nil</tt>:
|
||||
# h = Hash.new(false)
|
||||
# h.default # => false
|
||||
# h.default_proc # => nil
|
||||
#
|
||||
# If a block is given but no <tt>default_value</tt>, stores the block as the default proc
|
||||
# and sets the default value to <tt>nil</tt>:
|
||||
# h = Hash.new {|hash, key| "Default value for #{key}" }
|
||||
# h.default # => nil
|
||||
# h.default_proc.class # => Proc
|
||||
# h[:nosuch] # => "Default value for nosuch"
|
||||
#
|
||||
# If both a block and a <tt>default_value</tt> are given, raises an +ArgumentError+
|
||||
#
|
||||
# If the optional keyword argument +capacity+ is given, the hash will be allocated
|
||||
# with enough capacity to accomodate this many keys without having to be resized.
|
||||
def initialize(ifnone = (ifnone_unset = true), capacity: 0, &block)
|
||||
Primitive.rb_hash_init(capacity, ifnone_unset, ifnone, block)
|
||||
end
|
||||
end
|
1
inits.c
1
inits.c
|
@ -94,6 +94,7 @@ rb_call_builtin_inits(void)
|
|||
BUILTIN(pack);
|
||||
BUILTIN(warning);
|
||||
BUILTIN(array);
|
||||
BUILTIN(hash);
|
||||
BUILTIN(kernel);
|
||||
BUILTIN(symbol);
|
||||
BUILTIN(timev);
|
||||
|
|
|
@ -34,7 +34,7 @@ describe "Hash.new" do
|
|||
-> { Hash.new(nil) { 0 } }.should raise_error(ArgumentError)
|
||||
end
|
||||
|
||||
ruby_version_is "3.3" do
|
||||
ruby_version_is "3.3"..."3.4" do
|
||||
it "emits a deprecation warning if keyword arguments are passed" do
|
||||
-> { Hash.new(unknown: true) }.should complain(
|
||||
Regexp.new(Regexp.escape("Calling Hash.new with keyword arguments is deprecated and will be removed in Ruby 3.4; use Hash.new({ key: value }) instead"))
|
||||
|
@ -46,4 +46,22 @@ describe "Hash.new" do
|
|||
Hash.new({ unknown: true }).default.should == { unknown: true }
|
||||
end
|
||||
end
|
||||
|
||||
ruby_version_is "3.4" do
|
||||
it "accepts a capacity: argument" do
|
||||
Hash.new(5, capacity: 42).default.should == 5
|
||||
Hash.new(capacity: 42).default.should == nil
|
||||
(Hash.new(capacity: 42) { 1 }).default_proc.should_not == nil
|
||||
end
|
||||
|
||||
it "ignores negative capacity" do
|
||||
-> { Hash.new(capacity: -42) }.should_not raise_error
|
||||
end
|
||||
|
||||
it "raises an error if unknown keyword arguments are passed" do
|
||||
-> { Hash.new(unknown: true) }.should raise_error(ArgumentError)
|
||||
-> { Hash.new(1, unknown: true) }.should raise_error(ArgumentError)
|
||||
-> { Hash.new(unknown: true) { 0 } }.should raise_error(ArgumentError)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Загрузка…
Ссылка в новой задаче