* re.c (rb_memsearch_ss): simple shift search.

* re.c (rb_memsearch_qs): quick search.

* re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.

* re.c (rb_memsearch_qs_utf8_hash): hash functions for above.

* re.c (rb_memsearch): use above functions.

* string.c (rb_str_index): give enc to rb_memsearch.

* include/ruby/intern.h (rb_memsearch): move to encoding.h.

* include/ruby/encoding.h (rb_memsearch): move from intern.h.

* common.mk (PREP): add dependency.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15792 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2008-03-17 19:04:29 +00:00
Родитель 89afc5431b
Коммит e58adeae0f
7 изменённых файлов: 160 добавлений и 34 удалений

Просмотреть файл

@ -1,3 +1,23 @@
Tue Mar 18 04:00:27 2008 NARUSE, Yui <naruse@ruby-lang.org>
* re.c (rb_memsearch_ss): simple shift search.
* re.c (rb_memsearch_qs): quick search.
* re.c (rb_memsearch_qs_utf8): quick search for UTF-8 string.
* re.c (rb_memsearch_qs_utf8_hash): hash functions for above.
* re.c (rb_memsearch): use above functions.
* string.c (rb_str_index): give enc to rb_memsearch.
* include/ruby/intern.h (rb_memsearch): move to encoding.h.
* include/ruby/encoding.h (rb_memsearch): move from intern.h.
* common.mk (PREP): add dependency.
Mon Mar 17 22:23:54 2008 Yusuke Endoh <mame@tsg.ne.jp>
* array.c (rb_ary_take, rb_ary_take_while, rb_ary_drop,

Просмотреть файл

@ -110,6 +110,8 @@ all: $(MKFILES) $(PREP) encdb transdb $(RBCONFIG) $(LIBRUBY) encs
@$(MINIRUBY) $(srcdir)/ext/extmk.rb --make="$(MAKE)" $(EXTMK_ARGS)
prog: $(PROGRAM) $(WPROGRAM)
$(PREP): $(MKFILES)
miniruby$(EXEEXT): config.status $(NORMALMAINOBJ) $(MINIOBJS) $(COMMONOBJS) $(DMYEXT) $(ARCHFILE)
GORUBY = go$(RUBY_INSTALL_NAME)

Просмотреть файл

@ -176,5 +176,6 @@ int rb_usascii_encindex(void);
VALUE rb_enc_default_external(void);
void rb_enc_set_default_external(VALUE encoding);
VALUE rb_locale_charmap(VALUE klass);
long rb_memsearch(const void*,long,const void*,long,rb_encoding*);
#endif /* RUBY_ENCODING_H */

Просмотреть файл

@ -466,7 +466,6 @@ double rb_genrand_real(void);
/* re.c */
#define rb_memcmp memcmp
int rb_memcicmp(const void*,const void*,long);
long rb_memsearch(const void*,long,const void*,long);
VALUE rb_reg_nth_defined(int, VALUE);
VALUE rb_reg_nth_match(int, VALUE);
VALUE rb_reg_last_match(VALUE);

162
re.c
Просмотреть файл

@ -95,41 +95,145 @@ rb_memcmp(const void *p1, const void *p2, long len)
return memcmp(p1, p2, len);
}
long
rb_memsearch(const void *x0, long m, const void *y0, long n)
static inline long
rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
const unsigned char *x = x0, *y = y0;
const unsigned char *s, *e;
long i;
int d;
unsigned long hx, hy;
const unsigned char *x = xs, *xe = xs + m;
const unsigned char *y = ys, *ye = ys + n;
#ifndef VALUE_MAX
# if SIZEOF_VALUE == 8
# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
# elif SIZEOF_VALUE == 4
# define VALUE_MAX 0xFFFFFFFFUL
# endif
#endif
VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
#define KR_REHASH(a, b, h) (((h) << 1) - (((unsigned long)(a))<<d) + (b))
if (m > SIZEOF_VALUE)
rb_bug("!!too long pattern string!!");
if (m > n) return -1;
s = y; e = s + n - m;
/* Preprocessing */
/* computes d = 2^(m-1) with
the left-shift operator */
d = sizeof(hx) * CHAR_BIT - 1;
if (d > m) d = m;
if (n == m) {
return memcmp(x, s, m) == 0 ? 0 : -1;
}
/* Prepare hash value */
for (hy = hx = i = 0; i < d; ++i) {
hx = KR_REHASH(0, x[i], hx);
hy = KR_REHASH(0, s[i], hy);
/* Prepare hash value */
for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
hx <<= CHAR_BIT;
hy <<= CHAR_BIT;
hx |= *x;
hy |= *y;
}
/* Searching */
while (hx != hy || memcmp(x, s, m)) {
if (s >= e) return -1;
hy = KR_REHASH(*s, *(s+d), hy);
s++;
while (hx != hy) {
if (y == ye)
return -1;
hy <<= CHAR_BIT;
hy |= *y;
hy &= mask;
y++;
}
return y - ys - m;
}
static inline long
rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
const unsigned char *x = xs, *xe = xs + m;
const unsigned char *y = ys, *ye = ys + n;
VALUE i, qstable[256];
/* Preprocessing */
for (i = 0; i < 256; ++i)
qstable[i] = m + 1;
for (; x < xe; ++x)
qstable[*x] = xe - x;
/* Searching */
for (; y < ye; y += *(qstable + y[m])) {
if (*xs == *y && memcmp(xs, y, m) == 0)
return y - ys;
}
return -1;
}
static inline unsigned int
rb_memsearch_qs_utf8_hash(const unsigned char *x)
{
register const unsigned int mix = 8353;
register unsigned int h = *x;
if (h < 0xC0) {
return h + 256;
}
else if (h < 0xE0) {
h *= mix;
h += x[1];
}
else if (h < 0xF0) {
h *= mix;
h += x[1];
h *= mix;
h += x[2];
}
else if (h < 0xF5) {
h *= mix;
h += x[1];
h *= mix;
h += x[2];
h *= mix;
h += x[3];
}
else {
return h + 256;
}
return (unsigned char)h;
}
static inline long
rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
const unsigned char *x = xs, *xe = xs + m;
const unsigned char *y = ys, *ye = ys + n;
VALUE i, qstable[512];
/* Preprocessing */
for (i = 0; i < 512; ++i) {
qstable[i] = m + 1;
}
for (; x < xe; ++x) {
qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
}
/* Searching */
for (; y < ye; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
if (*xs == *y && memcmp(xs, y, m) == 0)
return y - ys;
}
return -1;
}
long
rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
{
const unsigned char *x = x0, *y = y0;
if (m > n) return -1;
else if (m == n) {
return memcmp(x0, y0, m) == 0 ? 0 : -1;
}
else if (m < 1) {
return 0;
}
else if (m == 1) {
const unsigned char *ys = y, *ye = ys + n;
for (; y < ye; ++y) {
if (*x == *y)
return y - ys;
}
return -1;
}
else if (m <= SIZEOF_VALUE) {
return rb_memsearch_ss(x0, m, y0, n);
}
else if (enc == rb_utf8_encoding()){
return rb_memsearch_qs_utf8(x0, m, y0, n);
}
else {
return rb_memsearch_qs(x0, m, y0, n);
}
return s-y;
}
#define REG_LITERAL FL_USER5

Просмотреть файл

@ -2088,7 +2088,7 @@ rb_str_index(VALUE str, VALUE sub, long offset)
len = RSTRING_LEN(str) - offset;
for (;;) {
char *t;
pos = rb_memsearch(sptr, slen, s, len);
pos = rb_memsearch(sptr, slen, s, len, enc);
if (pos < 0) return pos;
t = rb_enc_right_char_head(s, s+pos, enc);
if (t == s + pos) break;

Просмотреть файл

@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0"
#define RUBY_RELEASE_DATE "2008-03-17"
#define RUBY_RELEASE_DATE "2008-03-18"
#define RUBY_VERSION_CODE 190
#define RUBY_RELEASE_CODE 20080317
#define RUBY_RELEASE_CODE 20080318
#define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1
@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2008
#define RUBY_RELEASE_MONTH 3
#define RUBY_RELEASE_DAY 17
#define RUBY_RELEASE_DAY 18
#ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[];