зеркало из https://github.com/github/ruby.git
case-folding.rb: perfect hash for case unfolding3
* enc/unicode/case-folding.rb (lookup_hash): make perfect hash to lookup case unfolding table 3. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46272 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
8f59867651
Коммит
7e67b39679
|
@ -1,4 +1,7 @@
|
|||
Sat May 31 08:58:12 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
Sat May 31 08:58:22 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* enc/unicode/case-folding.rb (lookup_hash): make perfect hash to
|
||||
lookup case unfolding table 3.
|
||||
|
||||
* enc/unicode/case-folding.rb (lookup_hash): make perfect hash to
|
||||
lookup case unfolding table 2.
|
||||
|
|
|
@ -128,6 +128,15 @@ code2_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
||||
{
|
||||
if (x[0] != y[0]) return 0;
|
||||
if (x[1] != y[1]) return 0;
|
||||
if (x[2] != y[2]) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#include "enc/unicode/casefold.h"
|
||||
|
||||
#include "enc/unicode/name2ctype.h"
|
||||
|
@ -211,45 +220,12 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
|
|||
}
|
||||
|
||||
|
||||
static int
|
||||
code3_cmp(st_data_t x0, st_data_t y0)
|
||||
{
|
||||
const OnigCodePoint *x = (const OnigCodePoint *)x0;
|
||||
const OnigCodePoint *y = (const OnigCodePoint *)y0;
|
||||
if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static st_index_t
|
||||
code3_hash(st_data_t x0)
|
||||
{
|
||||
const OnigCodePoint *x = (const OnigCodePoint *)x0;
|
||||
return (st_index_t )(x[0] + x[1] + x[2]);
|
||||
}
|
||||
|
||||
static const struct st_hash_type type_code3_hash = {
|
||||
code3_cmp,
|
||||
code3_hash,
|
||||
};
|
||||
|
||||
|
||||
static st_table* Unfold3Table;
|
||||
static int CaseFoldInited = 0;
|
||||
|
||||
static int init_case_fold_table(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
Unfold3Table = st_init_table_with_size(&type_code3_hash, UNFOLD3_TABLE_SIZE);
|
||||
if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
|
||||
|
||||
for (i = 0; i < numberof(CaseUnfold_13); i++) {
|
||||
const CaseUnfold_13_Type *p3 = &CaseUnfold_13[i];
|
||||
st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
|
||||
}
|
||||
|
||||
CaseFoldInited = 1;
|
||||
THREAD_ATOMIC_END;
|
||||
return 0;
|
||||
|
@ -258,17 +234,7 @@ static int init_case_fold_table(void)
|
|||
#define onigenc_unicode_fold_lookup onigenc_unicode_CaseFold_11_lookup
|
||||
#define onigenc_unicode_unfold1_lookup onigenc_unicode_CaseUnfold_11_lookup
|
||||
#define onigenc_unicode_unfold2_lookup onigenc_unicode_CaseUnfold_12_lookup
|
||||
|
||||
|
||||
static inline const CodePointList2 *
|
||||
onigenc_unicode_unfold3_lookup(const OnigCodePoint *code)
|
||||
{
|
||||
st_data_t to;
|
||||
if (onig_st_lookup(Unfold3Table, (st_data_t )code, &to) != 0) {
|
||||
return (const CodePointList2 *)to;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#define onigenc_unicode_unfold3_lookup onigenc_unicode_CaseUnfold_13_lookup
|
||||
|
||||
extern int
|
||||
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
|
||||
|
|
|
@ -163,11 +163,8 @@ class CaseFolding
|
|||
|
||||
# CaseUnfold_13
|
||||
name = "CaseUnfold_13"
|
||||
print_table(dest, name, name=>unfold[2])
|
||||
|
||||
# table sizes
|
||||
unfold3_table_size = unfold[2].size
|
||||
dest.printf("#define UNFOLD3_TABLE_SIZE\t%d\n", (unfold3_table_size * 1.7))
|
||||
data = print_table(dest, name, name=>unfold[2])
|
||||
dest.print lookup_hash(name, "CodePointList2", data)
|
||||
end
|
||||
|
||||
def self.load(*args)
|
||||
|
|
|
@ -5176,4 +5176,97 @@ static const CaseUnfold_13_Type CaseUnfold_13_Table[] = {
|
|||
{{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}},
|
||||
};
|
||||
|
||||
#define UNFOLD3_TABLE_SIZE 23
|
||||
/* C code produced by gperf version 3.0.4 */
|
||||
/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup */
|
||||
|
||||
/* maximum key range = 20, duplicates = 0 */
|
||||
|
||||
#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__cplusplus) || defined(__GNUC_STDC_INLINE__)
|
||||
inline
|
||||
#elif defined(__GNUC__)
|
||||
__inline
|
||||
#endif
|
||||
/*ARGSUSED*/
|
||||
static unsigned int
|
||||
onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes)
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
7, 4, 47, 47, 47, 47, 1, 1, 2, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 1,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 11,
|
||||
47, 47, 47, 47, 47, 10, 47, 2, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 1, 47, 47, 1,
|
||||
47, 47, 47, 9, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 1, 47, 47, 2, 47, 47, 1, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47, 47, 47,
|
||||
47, 47, 47, 47, 47, 47, 47, 47
|
||||
};
|
||||
return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
static const CodePointList2 *
|
||||
onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
|
||||
{
|
||||
enum
|
||||
{
|
||||
MIN_CODE_VALUE = 0x66,
|
||||
MAX_CODE_VALUE = 0x3c9,
|
||||
TOTAL_KEYWORDS = 14,
|
||||
MIN_WORD_LENGTH = 9,
|
||||
MAX_WORD_LENGTH = 9,
|
||||
MIN_HASH_VALUE = 27,
|
||||
MAX_HASH_VALUE = 46
|
||||
};
|
||||
|
||||
static const short wordlist[] =
|
||||
{
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1,
|
||||
/*0x03c5,0x0313,0x0342*/ 12,
|
||||
/*0x03c5,0x0308,0x0342*/ 9,
|
||||
/*0x03b9,0x0308,0x0342*/ 6,
|
||||
/*0x03c5,0x0313,0x0301*/ 11,
|
||||
/*0x03c5,0x0308,0x0301*/ 8,
|
||||
/*0x03b9,0x0308,0x0301*/ 5,
|
||||
/*0x03c5,0x0313,0x0300*/ 10,
|
||||
/*0x03c5,0x0308,0x0300*/ 7,
|
||||
/*0x03b9,0x0308,0x0300*/ 4,
|
||||
/*0x03c9,0x0342,0x03b9*/ 13,
|
||||
/*0x03b7,0x0342,0x03b9*/ 3,
|
||||
/*0x03b1,0x0342,0x03b9*/ 2,
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
/*0x0066,0x0066,0x006c*/ 1,
|
||||
/*0x0066,0x0066,0x0069*/ 0
|
||||
};
|
||||
|
||||
if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE &&
|
||||
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE &&
|
||||
codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE)
|
||||
{
|
||||
register int key = onigenc_unicode_CaseUnfold_13_hash(codes);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= 0)
|
||||
{
|
||||
register short s = wordlist[key];
|
||||
|
||||
if (s >= 0 && code3_equal(codes, CaseUnfold_13_Table[s].from))
|
||||
return &CaseUnfold_13_Table[s].to;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче