зеркало из https://github.com/github/ruby.git
257 строки
7.1 KiB
C
257 строки
7.1 KiB
C
/**********************************************************************
|
|
|
|
reggnu.c - Oniguruma (regular expression library)
|
|
|
|
Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp)
|
|
|
|
**********************************************************************/
|
|
#include "regint.h"
|
|
|
|
#ifndef ONIGGNU_H /* name changes from oniggnu.h to regex.h in ruby. */
|
|
#include "oniggnu.h"
|
|
#endif
|
|
|
|
#if defined(RUBY_PLATFORM) || defined(RUBY)
|
|
#ifndef ONIG_RUBY_M17N
|
|
#define USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef NULL
|
|
#define NULL ((void* )0)
|
|
#endif
|
|
|
|
extern void
|
|
re_free_registers(OnigRegion* r)
|
|
{
|
|
/* 0: don't free self */
|
|
onig_region_free(r, 0);
|
|
}
|
|
|
|
extern int
|
|
re_adjust_startpos(regex_t* reg, const char* string, int size,
|
|
int startpos, int range)
|
|
{
|
|
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
|
|
UChar *p;
|
|
UChar *s = (UChar* )string + startpos;
|
|
|
|
if (range > 0) {
|
|
p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
|
|
}
|
|
else {
|
|
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
|
|
}
|
|
return p - (UChar* )string;
|
|
}
|
|
|
|
return startpos;
|
|
}
|
|
|
|
extern int
|
|
re_match(regex_t* reg, const char* str, int size, int pos,
|
|
struct re_registers* regs)
|
|
{
|
|
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
|
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
|
}
|
|
|
|
extern int
|
|
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
|
struct re_registers* regs)
|
|
{
|
|
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
|
(UChar* )(string + startpos),
|
|
(UChar* )(string + startpos + range),
|
|
regs, ONIG_OPTION_NONE);
|
|
}
|
|
|
|
extern int
|
|
re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
|
{
|
|
int r;
|
|
OnigErrorInfo einfo;
|
|
|
|
r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
|
|
if (r != 0) {
|
|
if (IS_NOT_NULL(ebuf))
|
|
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
extern int
|
|
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
|
{
|
|
int r;
|
|
OnigErrorInfo einfo;
|
|
OnigEncoding enc;
|
|
|
|
/* I think encoding and options should be arguments of this function.
|
|
But this is adapted to present re.c. (2002/11/29)
|
|
*/
|
|
enc = OnigEncDefaultCharEncoding;
|
|
|
|
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
|
|
reg->options, enc, OnigDefaultSyntax, &einfo);
|
|
if (r != 0) {
|
|
if (IS_NOT_NULL(ebuf))
|
|
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
extern void
|
|
re_free_pattern(regex_t* reg)
|
|
{
|
|
onig_free(reg);
|
|
}
|
|
|
|
extern int
|
|
re_alloc_pattern(regex_t** reg)
|
|
{
|
|
return onig_alloc_init(reg, ONIG_OPTION_DEFAULT, OnigEncDefaultCharEncoding,
|
|
OnigDefaultSyntax);
|
|
}
|
|
|
|
extern void
|
|
re_set_casetable(const char* table)
|
|
{
|
|
onigenc_set_default_caseconv_table((UChar* )table);
|
|
}
|
|
|
|
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
|
static const unsigned char mbctab_ascii[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
static const unsigned char mbctab_euc[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
|
};
|
|
|
|
static const unsigned char mbctab_sjis[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
|
};
|
|
|
|
static const unsigned char mbctab_utf8[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
|
|
};
|
|
|
|
const unsigned char *re_mbctab = mbctab_ascii;
|
|
#endif
|
|
|
|
extern void
|
|
#ifdef ONIG_RUBY_M17N
|
|
re_mbcinit(OnigEncoding enc)
|
|
#else
|
|
re_mbcinit(int mb_code)
|
|
#endif
|
|
{
|
|
#ifdef ONIG_RUBY_M17N
|
|
|
|
onigenc_set_default_encoding(enc);
|
|
|
|
#else
|
|
|
|
OnigEncoding enc;
|
|
|
|
switch (mb_code) {
|
|
case MBCTYPE_ASCII:
|
|
enc = ONIG_ENCODING_ASCII;
|
|
break;
|
|
case MBCTYPE_EUC:
|
|
enc = ONIG_ENCODING_EUC_JP;
|
|
break;
|
|
case MBCTYPE_SJIS:
|
|
enc = ONIG_ENCODING_SJIS;
|
|
break;
|
|
case MBCTYPE_UTF8:
|
|
enc = ONIG_ENCODING_UTF8;
|
|
break;
|
|
default:
|
|
return ;
|
|
break;
|
|
}
|
|
|
|
onigenc_set_default_encoding(enc);
|
|
#endif
|
|
|
|
#ifdef USE_COMPATIBILITY_FOR_RUBY_EXTENSION_LIBRARY
|
|
switch (mb_code) {
|
|
case MBCTYPE_ASCII:
|
|
re_mbctab = mbctab_ascii;
|
|
break;
|
|
case MBCTYPE_EUC:
|
|
re_mbctab = mbctab_euc;
|
|
break;
|
|
case MBCTYPE_SJIS:
|
|
re_mbctab = mbctab_sjis;
|
|
break;
|
|
case MBCTYPE_UTF8:
|
|
re_mbctab = mbctab_utf8;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|