* array.c (rb_ary_bsearch): add Array#bsearch for binary search.

[ruby-core:36390] [Feature #4766]

* test/ruby/test_array.rb: add a test for above.

* range.c (range_bsearch): add Range#bsearch for binary search.
  [ruby-core:36390] [Feature #4766]

* test/ruby/test_range.rb: add a test for above

* NEWS: added the two new methods.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37655 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
mame 2012-11-14 15:53:50 +00:00
Родитель c8b0b5362c
Коммит de07850e47
6 изменённых файлов: 511 добавлений и 0 удалений

Просмотреть файл

@ -1,3 +1,17 @@
Thu Nov 15 00:47:20 2012 Yusuke Endoh <mame@tsg.ne.jp>
* array.c (rb_ary_bsearch): add Array#bsearch for binary search.
[ruby-core:36390] [Feature #4766]
* test/ruby/test_array.rb: add a test for above.
* range.c (range_bsearch): add Range#bsearch for binary search.
[ruby-core:36390] [Feature #4766]
* test/ruby/test_range.rb: add a test for above
* NEWS: added the two new methods.
Wed Nov 14 13:25:00 2012 Zachary Scott <zachary@zacharyscott.net>
* lib/fileutils.rb (chmod): Add "X" to modes, convert format to table

3
NEWS
Просмотреть файл

@ -19,6 +19,8 @@ with all sufficient information, see the ChangeLog file.
* builtin classes
* Array
* added method:
* added Array#bsearch for binary search.
* incompatible changes:
* random parameter of Array#shuffle! and Array#sample now
will be called with one argument, maximum value.
@ -98,6 +100,7 @@ with all sufficient information, see the ChangeLog file.
* Range
* added method:
* added Range#size for lazy size evaluation.
* added Range#bsearch for binary search.
* Signal
* incompatible changes:

98
array.c
Просмотреть файл

@ -2373,6 +2373,103 @@ rb_ary_sort(VALUE ary)
return ary;
}
/*
* call-seq:
* ary.bsearch {|x| block } -> elem
*
* By using binary search, finds a value from this array which meets
* the given condition in O(n log n) where n is the size of the array.
*
* You can use this method in two use cases: a find-minimum mode and
* a find-any mode. In either case, the elements of the array must be
* monotone (or sorted) with respect to the block.
*
* In find-minimum mode (this is a good choice for typical use case),
* the block must return true or false, and there must be an index i
* (0 <= i <= ary.size) so that:
*
* - the block returns false for any element whose index is less than
* i, and
* - the block returns true for any element whose index is greater
* than or equal to i.
*
* This method returns the i-th element. If i is equal to ary.size,
* it returns nil.
*
* ary = [0, 4, 7, 10, 12]
* ary.bsearch {|x| x >= 4 } #=> 4
* ary.bsearch {|x| x >= 6 } #=> 7
* ary.bsearch {|x| x >= -1 } #=> 0
* ary.bsearch {|x| x >= 100 } #=> nil
*
* In find-any mode (this behaves like libc's bsearch(3)), the block
* must return a number, and there must be two indices i and j
* (0 <= i <= j <= ary.size) so that:
*
* - the block returns a positive number for ary[k] if 0 <= k < i,
* - the block returns zero for ary[k] if i <= k < j, and
* - the block returns a negative number for ary[k] if
* j <= k < ary.size.
*
* Under this condition, this method returns any element whose index
* is within i...j. If i is equal to j (i.e., there is no element
* that satisfies the block), this method returns nil.
*
* ary = [0, 4, 7, 10, 12]
* # try to find v such that 4 <= v < 8
* ary.bsearch {|x| 1 - x / 4 } #=> 4 or 7
* # try to find v such that 8 <= v < 10
* ary.bsearch {|x| 4 - x / 2 } #=> nil
*
* You must not mix the two modes at a time; the block must always
* return either true/false, or always return a number. It is
* undefined which value is actually picked up at each iteration.
*/
static VALUE
rb_ary_bsearch(VALUE ary)
{
long low = 0, high = RARRAY_LEN(ary), mid;
int smaller = 0, satisfied = 0;
VALUE v, val;
while (low < high) {
mid = low + ((high - low) / 2);
val = rb_ary_entry(ary, mid);
v = rb_yield(val);
if (FIXNUM_P(v)) {
if (FIX2INT(v) == 0) return val;
smaller = FIX2INT(v) < 0;
}
else if (v == Qtrue) {
satisfied = 1;
smaller = 1;
}
else if (v == Qfalse || v == Qnil) {
smaller = 0;
}
else if (rb_obj_is_kind_of(v, rb_cNumeric)) {
switch (rb_cmpint(rb_funcall(v, id_cmp, 1, INT2FIX(0)), v, INT2FIX(0))) {
case 0: return val;
case 1: smaller = 1;
case -1: smaller = 0;
}
}
else {
smaller = RTEST(v);
}
if (smaller) {
high = mid;
}
else {
low = mid + 1;
}
}
if (low == RARRAY_LEN(ary)) return Qnil;
if (!satisfied) return Qnil;
return rb_ary_entry(ary, low);
}
static VALUE
sort_by_i(VALUE i)
@ -5399,6 +5496,7 @@ Init_Array(void)
rb_define_method(rb_cArray, "take_while", rb_ary_take_while, 0);
rb_define_method(rb_cArray, "drop", rb_ary_drop, 1);
rb_define_method(rb_cArray, "drop_while", rb_ary_drop_while, 0);
rb_define_method(rb_cArray, "bsearch", rb_ary_bsearch, 0);
id_cmp = rb_intern("<=>");
sym_random = ID2SYM(rb_intern("random"));

291
range.c
Просмотреть файл

@ -13,6 +13,11 @@
#include "ruby/encoding.h"
#include "internal.h"
#ifdef HAVE_FLOAT_H
#include <float.h>
#endif
#include <math.h>
VALUE rb_cRange;
static ID id_cmp, id_succ, id_beg, id_end, id_excl;
@ -465,6 +470,291 @@ range_step(int argc, VALUE *argv, VALUE range)
return range;
}
/*
* call-seq:
* rng.bsearch {|obj| block } -> value
*
* By using binary search, finds a value in range which meets the given
* condition in O(n log n) where n is the size of the array.
*
* You can use this method in two use cases: a find-minimum mode and
* a find-any mode. In either case, the elements of the array must be
* monotone (or sorted) with respect to the block.
*
* In find-minimum mode (this is a good choice for typical use case),
* the block must return true or false, and there must be a value x
* so that:
*
* - the block returns false for any value which is less than x, and
* - the block returns true for any value which is greater than or
* equal to i.
*
* If x is within the range, this method returns the value x.
* Otherwise, it returns nil.
*
* ary = [0, 4, 7, 10, 12]
* (0...ary.size).bsearch {|i| ary[i] >= 4 } #=> 1
* (0...ary.size).bsearch {|i| ary[i] >= 6 } #=> 2
* (0...ary.size).bsearch {|i| ary[i] >= 8 } #=> 3
* (0...ary.size).bsearch {|i| ary[i] >= 100 } #=> nil
*
* (0.0...Float::INFINITY).bsearch {|x| Math.log(x) >= 0 } #=> 1.0
*
* In find-any mode (this behaves like libc's bsearch(3)), the block
* must return a number, and there must be two values x and y (x <= y)
* so that:
*
* - the block returns a positive number for v if v < x,
* - the block returns zero for v if x <= v < y, and
* - the block returns a negative number for v if y <= v.
*
* This method returns any value which is within the intersection of
* the given range and x...y (if any). If there is no value that
* satisfies the condition, it returns nil.
*
* ary = [0, 100, 100, 100, 200]
* (0..4).bsearch {|i| 100 - i } #=> 1, 2 or 3
* (0..4).bsearch {|i| 300 - i } #=> nil
* (0..4).bsearch {|i| 50 - i } #=> nil
*
* You must not mix the two modes at a time; the block must always
* return either true/false, or always return a number. It is
* undefined which value is actually picked up at each iteration.
*/
static VALUE
range_bsearch(VALUE range)
{
VALUE beg, end;
int smaller, satisfied = 0;
#define BSEARCH_CHECK(val) \
do { \
VALUE v = rb_yield(val); \
if (FIXNUM_P(v)) { \
if (FIX2INT(v) == 0) return val; \
smaller = FIX2INT(v) < 0; \
} \
else if (v == Qtrue) { \
satisfied = 1; \
smaller = 1; \
} \
else if (v == Qfalse || v == Qnil) { \
smaller = 0; \
} \
else if (rb_obj_is_kind_of(v, rb_cNumeric)) { \
switch (rb_cmpint(rb_funcall(v, id_cmp, 1, INT2FIX(0)), v, INT2FIX(0)) < 0) { \
case 0: return val; \
case 1: smaller = 1; \
case -1: smaller = 0; \
} \
} \
else { \
smaller = RTEST(v); \
} \
} while (0)
beg = RANGE_BEG(range);
end = RANGE_END(range);
if (FIXNUM_P(beg) && FIXNUM_P(end)) {
long low = FIX2LONG(beg);
long high = FIX2LONG(end);
long mid, org_high;
if (EXCL(range)) high--;
org_high = high;
while (low < high) {
mid = low + ((high - low) / 2);
BSEARCH_CHECK(INT2FIX(mid));
if (smaller) {
high = mid;
}
else {
low = mid + 1;
}
}
if (low == org_high) {
BSEARCH_CHECK(INT2FIX(low));
if (!smaller) return Qnil;
}
if (!satisfied) return Qnil;
return INT2FIX(low);
}
else if (TYPE(beg) == T_FLOAT || TYPE(end) == T_FLOAT) {
double low = RFLOAT_VALUE(rb_Float(beg));
double high = RFLOAT_VALUE(rb_Float(end));
double mid, org_high;
int count;
#ifdef FLT_RADIX
#ifdef DBL_MANT_DIG
#define BSEARCH_MAXCOUNT (((FLT_RADIX) - 1) * (DBL_MANT_DIG + DBL_MAX_EXP) + 100)
#else
#define BSEARCH_MAXCOUNT (53 + 1023 + 100)
#endif
#else
#define BSEARCH_MAXCOUNT (53 + 1023 + 100)
#endif
if (isinf(high) && high > 0) {
/* the range is (low..INFINITY) */
double nhigh = 1.0, inc;
if (nhigh < low) nhigh = low;
count = BSEARCH_MAXCOUNT;
/* find upper bound by checking low, low*2, low*4, ... */
while (count >= 0 && !isinf(nhigh)) {
BSEARCH_CHECK(DBL2NUM(nhigh));
if (smaller) break;
high = nhigh;
nhigh *= 2;
count--;
}
if (isinf(nhigh) || count < 0) {
/* upper bound not found; then, search in very near INFINITY */
/* (x..INFINITY where x is not INFINITY but x*2 is INFINITY) */
inc = high / 2;
count = BSEARCH_MAXCOUNT;
while (count >= 0 && inc > 0) {
nhigh = high + inc;
if (!isinf(nhigh)) {
BSEARCH_CHECK(DBL2NUM(nhigh));
if (smaller) {
/* upper bound found; */
/* the desired value is within high..nhigh */
low = high;
high = nhigh;
goto binsearch;
}
else {
high = nhigh;
}
}
inc /= 2;
count--;
}
/* lower bound not found; */
/* there is no candidate except INFINITY itself */
high *= 2; /* generate INFINITY */
if (isinf(high) && !EXCL(range)) {
BSEARCH_CHECK(DBL2NUM(high));
if (!satisfied) return Qnil;
if (smaller) return DBL2NUM(high);
}
return Qnil;
}
/* upper bound found; the desired value is within low..nhigh */
high = nhigh;
}
if (isinf(low) && low < 0) {
/* the range is (-INFINITY..high) */
double nlow = -1.0, dec;
if (nlow > high) nlow = high;
count = BSEARCH_MAXCOUNT;
/* find lower bound by checking low, low*2, low*4, ... */
while (count >= 0 && !isinf(nlow)) {
BSEARCH_CHECK(DBL2NUM(nlow));
if (!smaller) break;
low = nlow;
nlow *= 2;
count--;
}
if (isinf(nlow) || count < 0) {
/* lower bound not found; then, search in very near -INFINITY */
/* (-INFINITY..x where x is not -INFINITY but x*2 is -INFINITY) */
dec = low / 2;
count = BSEARCH_MAXCOUNT;
while (count >= 0 && dec < 0) {
nlow = low + dec;
if (!isinf(nlow)) {
BSEARCH_CHECK(DBL2NUM(nlow));
if (!smaller) {
/* lower bound found; */
/* the desired value is within nlow..low */
high = low;
low = nlow;
goto binsearch;
}
else {
low = nlow;
}
}
dec /= 2;
count--;
}
/* lower bound not found; */
/* there is no candidate except -INFINITY itself */
nlow = low * 2; /* generate -INFINITY */
if (isinf(nlow)) {
BSEARCH_CHECK(DBL2NUM(nlow));
if (!satisfied) return Qnil;
if (smaller) return DBL2NUM(nlow);
}
if (!satisfied) return Qnil;
return DBL2NUM(low);
}
low = nlow;
}
binsearch:
/* find the desired value within low..high */
/* where low is not -INFINITY and high is not INFINITY */
org_high = high;
count = BSEARCH_MAXCOUNT;
while (low < high && count >= 0) {
mid = low + ((high - low) / 2);
BSEARCH_CHECK(DBL2NUM(mid));
if (smaller) {
high = mid;
}
else {
low = mid;
}
count--;
}
BSEARCH_CHECK(DBL2NUM(low));
if (!smaller) {
BSEARCH_CHECK(DBL2NUM(high));
if (!smaller) {
return Qnil;
}
low = high;
}
if (!satisfied) return Qnil;
if (EXCL(range) && low >= org_high) return Qnil;
return DBL2NUM(low);
#undef BSEARCH_MAXCOUNT
}
else if (!NIL_P(rb_check_to_integer(beg, "to_int")) &&
!NIL_P(rb_check_to_integer(end, "to_int"))) {
VALUE low = beg;
VALUE high = end;
VALUE mid, org_high;
if (EXCL(range)) high = rb_funcall(high, '-', 1, INT2FIX(1));
org_high = high;
while (rb_cmpint(rb_funcall(low, id_cmp, 1, high), low, high) < 0) {
mid = rb_funcall(rb_funcall(high, '+', 1, low), '/', 1, INT2FIX(2));
BSEARCH_CHECK(mid);
if (smaller) {
high = mid;
}
else {
low = rb_funcall(mid, '+', 1, INT2FIX(1));
}
}
if (rb_equal(low, org_high)) {
BSEARCH_CHECK(low);
if (!smaller) return Qnil;
}
if (!satisfied) return Qnil;
return low;
}
else {
rb_raise(rb_eTypeError, "can't do binary search for %s", rb_obj_classname(beg));
}
return range;
}
static VALUE
each_i(VALUE v, void *arg)
{
@ -1112,6 +1402,7 @@ Init_Range(void)
rb_define_method(rb_cRange, "hash", range_hash, 0);
rb_define_method(rb_cRange, "each", range_each, 0);
rb_define_method(rb_cRange, "step", range_step, -1);
rb_define_method(rb_cRange, "bsearch", range_bsearch, 0);
rb_define_method(rb_cRange, "begin", range_begin, 0);
rb_define_method(rb_cRange, "end", range_end, 0);
rb_define_method(rb_cRange, "first", range_first, -1);

Просмотреть файл

@ -2240,4 +2240,31 @@ class TestArray < Test::Unit::TestCase
a = [1,2,3]
assert_raise(ArgumentError) { a.rotate!(1, 1) }
end
def test_bsearch_in_find_minimum_mode
a = [0, 4, 7, 10, 12]
assert_equal(4, a.bsearch {|x| x >= 4 })
assert_equal(7, a.bsearch {|x| x >= 6 })
assert_equal(0, a.bsearch {|x| x >= -1 })
assert_equal(nil, a.bsearch {|x| x >= 100 })
end
def test_bsearch_in_find_any_mode
a = [0, 4, 7, 10, 12]
assert_include([4, 7], a.bsearch {|x| 1 - x / 4 })
assert_equal(nil, a.bsearch {|x| 4 - x / 2 })
assert_equal(nil, a.bsearch {|x| 1 })
assert_equal(nil, a.bsearch {|x| -1 })
assert_include([4, 7], a.bsearch {|x| (1 - x / 4) * (2**100) })
assert_equal(nil, a.bsearch {|x| 1 * (2**100) })
assert_equal(nil, a.bsearch {|x| (-1) * (2**100) })
assert_include([4, 7], a.bsearch {|x| (2**100).coerce((1 - x / 4) * (2**100)).first })
end
def test_bsearch_undefined
a = [0, 4, 7, 10, 12]
assert_equal(nil, a.bsearch {|x| "foo" }) # undefined behavior
end
end

Просмотреть файл

@ -355,4 +355,82 @@ class TestRange < Test::Unit::TestCase
assert_equal 5, (1.1...6).size
assert_equal 42, (1..42).each.size
end
def test_bsearch_for_fixnum
ary = [3, 4, 7, 9, 12]
assert_equal(0, (0...ary.size).bsearch {|i| ary[i] >= 2 })
assert_equal(1, (0...ary.size).bsearch {|i| ary[i] >= 4 })
assert_equal(2, (0...ary.size).bsearch {|i| ary[i] >= 6 })
assert_equal(3, (0...ary.size).bsearch {|i| ary[i] >= 8 })
assert_equal(4, (0...ary.size).bsearch {|i| ary[i] >= 10 })
assert_equal(nil, (0...ary.size).bsearch {|i| ary[i] >= 100 })
assert_equal(0, (0...ary.size).bsearch {|i| true })
assert_equal(nil, (0...ary.size).bsearch {|i| false })
ary = [0, 100, 100, 100, 200]
assert_equal(1, (0...ary.size).bsearch {|i| ary[i] >= 100 })
end
def test_bsearch_for_float
inf = Float::INFINITY
assert_in_delta(10.0, (0.0...100.0).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_in_delta(10.0, (0.0...inf).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_in_delta(-10.0, (-inf..100.0).bsearch {|x| x >= 0 || Math.log(-x / 10) < 0 }, 0.0001)
assert_in_delta(10.0, (-inf..inf).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_equal(nil, (-inf..5).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_in_delta(10.0, (-inf.. 10).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_equal(nil, (-inf...10).bsearch {|x| x > 0 && Math.log(x / 10) >= 0 }, 0.0001)
assert_equal(nil, (-inf..inf).bsearch { false })
assert_equal(-inf, (-inf..inf).bsearch { true })
assert_equal(inf, (0..inf).bsearch {|x| x == inf })
assert_equal(nil, (0...inf).bsearch {|x| x == inf })
v = (-inf..0).bsearch {|x| x != -inf }
assert_operator(-Float::MAX, :>=, v)
assert_operator(-inf, :<, v)
v = (0.0..1.0).bsearch {|x| x > 0 } # the nearest positive value to 0.0
assert_in_delta(0, v, 0.0001)
assert_operator(0, :<, v)
assert_equal(0.0, (-1.0..0.0).bsearch {|x| x >= 0 })
assert_equal(nil, (-1.0...0.0).bsearch {|x| x >= 0 })
v = (0..Float::MAX).bsearch {|x| x >= Float::MAX }
assert_in_delta(Float::MAX, v)
assert_equal(nil, v.infinite?)
v = (0..inf).bsearch {|x| x >= Float::MAX }
assert_in_delta(Float::MAX, v)
assert_equal(nil, v.infinite?)
v = (-Float::MAX..0).bsearch {|x| x > -Float::MAX }
assert_operator(-Float::MAX, :<, v)
assert_equal(nil, v.infinite?)
v = (-inf..0).bsearch {|x| x >= -Float::MAX }
assert_in_delta(-Float::MAX, v)
assert_equal(nil, v.infinite?)
v = (-inf..0).bsearch {|x| x > -Float::MAX }
assert_operator(-Float::MAX, :<, v)
assert_equal(nil, v.infinite?)
end
def test_bsearch_for_bignum
bignum = 2**100
ary = [3, 4, 7, 9, 12]
assert_equal(bignum + 0, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 2 })
assert_equal(bignum + 1, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 4 })
assert_equal(bignum + 2, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 6 })
assert_equal(bignum + 3, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 8 })
assert_equal(bignum + 4, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 10 })
assert_equal(nil, (bignum...bignum+ary.size).bsearch {|i| ary[i - bignum] >= 100 })
assert_equal(bignum + 0, (bignum...bignum+ary.size).bsearch {|i| true })
assert_equal(nil, (bignum...bignum+ary.size).bsearch {|i| false })
assert_raise(TypeError) { ("a".."z").bsearch {} }
end
end