зеркало из https://github.com/github/ruby.git
Merge Onigmo 6.0.0
* https://github.com/k-takata/Onigmo/blob/Onigmo-6.0.0/HISTORY * fix for ruby 2.4: https://github.com/k-takata/Onigmo/pull/78 * suppress warning: https://github.com/k-takata/Onigmo/pull/79 * include/ruby/oniguruma.h: include onigmo.h. * template/encdb.h.tmpl: ignore duplicated definition of EUC-CN in enc/euc_kr.c. It is defined in enc/gb2313.c with CRuby macro. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57045 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
42a677c895
Коммит
2873edeafb
2
NEWS
2
NEWS
|
@ -138,6 +138,8 @@ with all sufficient information, see the ChangeLog file or Redmine
|
|||
* meta character \X matches Unicode 9.0 characters with some workarounds
|
||||
for UTR #51 Unicode Emoji, Version 4.0 emoji zwj sequences.
|
||||
|
||||
* Update Onigmo 6.0.0.
|
||||
|
||||
* Regexp/String: Updated Unicode version from 8.0.0 to 9.0.0 [Feature #12513]
|
||||
|
||||
* RubyVM::Env
|
||||
|
|
|
@ -29,9 +29,12 @@
|
|||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
#include "encindex.h"
|
||||
#ifdef RUBY
|
||||
# include "encindex.h"
|
||||
#endif
|
||||
|
||||
#ifndef ENCINDEX_ASCII
|
||||
#define ENCINDEX_ASCII 0
|
||||
# define ENCINDEX_ASCII 0
|
||||
#endif
|
||||
|
||||
OnigEncodingDefine(ascii, ASCII) = {
|
||||
|
@ -51,9 +54,9 @@ OnigEncodingDefine(ascii, ASCII) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
ENCINDEX_ASCII,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("BINARY", "ASCII-8BIT")
|
||||
ENC_REPLICATE("IBM437", "ASCII-8BIT")
|
||||
|
|
|
@ -300,9 +300,9 @@ OnigEncodingDefine(big5, BIG5) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
big5_left_adjust_char_head,
|
||||
big5_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -335,9 +335,9 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
big5_left_adjust_char_head,
|
||||
big5_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("Big5-HKSCS:2008", "Big5-HKSCS")
|
||||
|
||||
|
@ -370,7 +370,7 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
big5_left_adjust_char_head,
|
||||
big5_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
|
|
|
@ -211,9 +211,9 @@ OnigEncodingDefine(cp949, CP949) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
cp949_left_adjust_char_head,
|
||||
cp949_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
/*
|
||||
* Name: CP949
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
#include "regenc.h"
|
||||
|
||||
|
||||
#define emacsmule_islead(c) ((UChar )(c) < 0x9e)
|
||||
|
@ -334,9 +334,9 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
|
||||
ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule")
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
#include "regenc.h"
|
||||
|
||||
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
|
||||
|
||||
|
@ -576,9 +576,9 @@ OnigEncodingDefine(euc_jp, EUC_JP) = {
|
|||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
/*
|
||||
* Name: EUC-JP
|
||||
|
|
27
enc/euc_kr.c
27
enc/euc_kr.c
|
@ -188,8 +188,33 @@ OnigEncodingDefine(euc_kr, EUC_KR) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("eucKR", "EUC-KR")
|
||||
|
||||
#ifndef RUBY
|
||||
/* Same with OnigEncodingEUC_KR except the name */
|
||||
OnigEncodingDefine(euc_cn, EUC_CN) = {
|
||||
euckr_mbc_enc_len,
|
||||
"EUC-CN", /* name */
|
||||
2, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
euckr_mbc_to_code,
|
||||
onigenc_mb2_code_to_mbclen,
|
||||
euckr_code_to_mbc,
|
||||
euckr_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
euckr_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
euckr_left_adjust_char_head,
|
||||
euckr_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
};
|
||||
#endif /* RUBY */
|
||||
|
|
|
@ -221,8 +221,8 @@ OnigEncodingDefine(euc_tw, EUC_TW) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
euctw_left_adjust_char_head,
|
||||
euctw_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("eucTW", "EUC-TW")
|
||||
|
|
|
@ -597,8 +597,7 @@ OnigEncodingDefine(gb18030, GB18030) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
gb18030_left_adjust_char_head,
|
||||
gb18030_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
|
||||
|
|
|
@ -211,9 +211,9 @@ OnigEncodingDefine(gbk, GBK) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
gbk_left_adjust_char_head,
|
||||
gbk_is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
/*
|
||||
* Name: GBK
|
||||
|
|
|
@ -256,45 +256,46 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code += 0x20;
|
||||
}
|
||||
else if (code==0xAA || code==0xBA || code==0xB5 || code==0xFF) ;
|
||||
else if ((EncISO_8859_1_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if (code == 0xAA || code == 0xBA || code == 0xB5 || code == 0xFF)
|
||||
;
|
||||
else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
|
||||
|
@ -314,8 +315,8 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-1", "ISO-8859-1")
|
||||
|
|
|
@ -215,9 +215,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -225,48 +225,49 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if (code==0xBD || code==0xFF) ;
|
||||
else if (code == 0xBD || code == 0xFF)
|
||||
;
|
||||
else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_10_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_10_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code>=0xA0 && code<=0xBF)
|
||||
if (code >= 0xA0 && code <= 0xBF)
|
||||
code -= 0x10;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
|
||||
|
@ -286,8 +287,8 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-10", "ISO-8859-10")
|
||||
|
|
|
@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-11", "ISO-8859-11")
|
||||
|
||||
|
|
|
@ -208,9 +208,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -218,38 +218,39 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_13_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0xB5) ;
|
||||
else if ((EncISO_8859_13_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if (code == 0xB5)
|
||||
;
|
||||
else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xB8 || code==0xBA || code==0xBF) {
|
||||
if (code == 0xB8 || code == 0xBA || code == 0xBF) {
|
||||
code -= 0x10;
|
||||
}
|
||||
else {
|
||||
|
@ -257,11 +258,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
}
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
|
||||
|
@ -281,8 +282,8 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-13", "ISO-8859-13")
|
||||
|
|
|
@ -217,9 +217,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -227,58 +227,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
/* else if (code==0xAA || code==0xBA) ; */
|
||||
/* else if (code == 0xAA || code == 0xBA) ; */
|
||||
else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_14_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_14_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if(code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
|
||||
if (code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
|
||||
code -= 0x1;
|
||||
else if(code == 0xAB)
|
||||
else if (code == 0xAB)
|
||||
code -= 0x5;
|
||||
else if(code == 0xFF)
|
||||
else if (code == 0xFF)
|
||||
code -= 0x50;
|
||||
else if(code == 0xB9)
|
||||
else if (code == 0xB9)
|
||||
code -= 0x2;
|
||||
else if(code == 0xBF)
|
||||
else if (code == 0xBF)
|
||||
code -= 0x4;
|
||||
else if(code == 0xB8 || code == 0xBA || code == 0xBC)
|
||||
else if (code == 0xB8 || code == 0xBA || code == 0xBC)
|
||||
code -= 0x10;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
|
||||
|
@ -298,8 +298,8 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-14", "ISO-8859-14")
|
||||
|
|
|
@ -211,9 +211,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -221,54 +221,55 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if (code==0xAA || code==0xBA || code==0xB5) ;
|
||||
else if (code == 0xAA || code == 0xBA || code == 0xB5)
|
||||
;
|
||||
else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_15_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_15_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xA8)
|
||||
if (code == 0xA8)
|
||||
code -= 2;
|
||||
else if (code==0xB8)
|
||||
else if (code == 0xB8)
|
||||
code -= 4;
|
||||
else if (code==0xBD)
|
||||
else if (code == 0xBD)
|
||||
code -= 1;
|
||||
else if (code==0xFF)
|
||||
else if (code == 0xFF)
|
||||
code -= 0x41;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
|
||||
|
@ -288,8 +289,8 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-15", "ISO-8859-15")
|
||||
|
|
|
@ -213,9 +213,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -223,57 +223,57 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_16_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_16_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xA2 || code==0xBD)
|
||||
if (code == 0xA2 || code == 0xBD)
|
||||
code--;
|
||||
else if (code==0xB3 || code==0xBA || code==0xBF)
|
||||
else if (code == 0xB3 || code == 0xBA || code == 0xBF)
|
||||
code -= 0x10;
|
||||
else if (code==0xA8 || code==0xAE)
|
||||
else if (code == 0xA8 || code == 0xAE)
|
||||
code -= 0x02;
|
||||
else if (code==0xB9)
|
||||
else if (code == 0xB9)
|
||||
code -= 0x07;
|
||||
else if (code==0xB8)
|
||||
else if (code == 0xB8)
|
||||
code -= 0x04;
|
||||
else if (code==0xFF)
|
||||
else if (code == 0xFF)
|
||||
code -= 0x41;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
|
||||
|
@ -293,8 +293,8 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-16", "ISO-8859-16")
|
||||
|
|
|
@ -221,50 +221,50 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_2_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_2_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
if (code>=0xB1 && code<=0xBF){
|
||||
else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
if (code >= 0xB1 && code <= 0xBF) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code -= 0x10;
|
||||
}
|
||||
else{
|
||||
else {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code -= 0x20;
|
||||
}
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
|
||||
|
@ -284,8 +284,8 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-2", "ISO-8859-2")
|
||||
|
|
|
@ -223,45 +223,46 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
#define DOTLESS_i (0xB9)
|
||||
#define I_WITH_DOT_ABOVE (0xA9)
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if (code==0xB5) ;
|
||||
else if (code == 0xB5)
|
||||
;
|
||||
else if ((EncISO_8859_3_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='I')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
if (code == 'I')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
else
|
||||
code = ENC_ISO_8859_3_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_3_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='i')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code==DOTLESS_i)
|
||||
if (code == 'i')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code == DOTLESS_i)
|
||||
code = 'I';
|
||||
else if (code>=0xB0 && code<=0xBF ) {
|
||||
else if (code >= 0xB0 && code <= 0xBF) {
|
||||
code -= 0x10;
|
||||
}
|
||||
else {
|
||||
|
@ -269,11 +270,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
}
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
|
||||
|
@ -293,8 +294,8 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-3", "ISO-8859-3")
|
||||
|
|
|
@ -232,31 +232,32 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_4_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_4_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0xA2) ;
|
||||
else if (code == 0xA2)
|
||||
;
|
||||
else if ((EncISO_8859_4_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code>=0xA0&&code<=0xBF) {
|
||||
if (code==0xBF)
|
||||
if (code >= 0xA0 && code <= 0xBF) {
|
||||
if (code == 0xBF)
|
||||
code -= 0x02;
|
||||
else
|
||||
code -= 0x10;
|
||||
|
@ -265,11 +266,11 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
|
||||
|
@ -289,8 +290,8 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-4", "ISO-8859-4")
|
||||
|
|
|
@ -210,35 +210,35 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if ((EncISO_8859_5_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_5_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_5_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (0xF1<=code && code<=0xFF)
|
||||
if (0xF1 <= code && code <= 0xFF)
|
||||
code -= 0x50;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
|
||||
|
@ -258,8 +258,8 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-5", "ISO-8859-5")
|
||||
|
|
|
@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-6", "ISO-8859-6")
|
||||
|
||||
|
|
|
@ -206,58 +206,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==0xF2) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == 0xF2) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xD3;
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xF3;
|
||||
}
|
||||
}
|
||||
else if ((EncISO_8859_7_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_ISO_8859_7_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0xC0 || code==0xE0)
|
||||
;
|
||||
else if (code == 0xC0 || code == 0xE0)
|
||||
;
|
||||
else if ((EncISO_8859_7_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xDC) {
|
||||
code-=0x26;
|
||||
if (code == 0xDC) {
|
||||
code -= 0x26;
|
||||
}
|
||||
else if (code>=0xDD && code<=0xDF) {
|
||||
code-=0x25;
|
||||
else if (code >= 0xDD && code <= 0xDF) {
|
||||
code -= 0x25;
|
||||
}
|
||||
else if (code==0xFC) {
|
||||
code-=0x40;
|
||||
else if (code == 0xFC) {
|
||||
code -= 0x40;
|
||||
}
|
||||
else if (code==0xFD || code==0xFE) {
|
||||
code-=0x3F;
|
||||
else if (code == 0xFD || code == 0xFE) {
|
||||
code -= 0x3F;
|
||||
}
|
||||
else {
|
||||
code-=0x20;
|
||||
code -= 0x20;
|
||||
}
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
|
||||
|
@ -277,8 +277,8 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-7", "ISO-8859-7")
|
||||
|
|
|
@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-8", "ISO-8859-8")
|
||||
|
||||
|
|
|
@ -204,9 +204,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -216,53 +216,54 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
#define DOTLESS_i (0xFD)
|
||||
#define I_WITH_DOT_ABOVE (0xDD)
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if (code==0xAA || code==0xB5 || code==0xBA || code==0xFF) ;
|
||||
else if (code == 0xAA || code == 0xB5 || code == 0xBA || code == 0xFF)
|
||||
;
|
||||
else if ((EncISO_8859_9_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='I')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
if (code == 'I')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
else
|
||||
code = ENC_ISO_8859_9_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncISO_8859_9_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='i')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code==DOTLESS_i)
|
||||
if (code == 'i')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code == DOTLESS_i)
|
||||
code = 'I';
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
|
||||
|
@ -282,8 +283,8 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("ISO8859-9", "ISO-8859-9")
|
||||
|
|
|
@ -214,9 +214,8 @@ OnigEncodingDefine(koi8_r, KOI8_R) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("CP878", "KOI8-R")
|
||||
|
||||
|
|
|
@ -218,7 +218,7 @@ OnigEncodingDefine(koi8_u, KOI8_U) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
mktable.c
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -31,7 +31,10 @@
|
|||
#include <stdio.h>
|
||||
#include <locale.h>
|
||||
|
||||
#ifndef __USE_ISOC99
|
||||
#define __USE_ISOC99
|
||||
#endif
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "regenc.h"
|
||||
|
@ -1108,11 +1111,13 @@ static int exec(FILE* fp, ENC_INFO* einfo)
|
|||
#define NCOL 8
|
||||
|
||||
int c, val, enc;
|
||||
int r;
|
||||
|
||||
enc = einfo->num;
|
||||
|
||||
fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
|
||||
einfo->name);
|
||||
r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
|
||||
einfo->name);
|
||||
if (r < 0) return -1;
|
||||
|
||||
for (c = 0; c < 256; c++) {
|
||||
val = 0;
|
||||
|
@ -1131,20 +1136,33 @@ static int exec(FILE* fp, ENC_INFO* einfo)
|
|||
if (IsWord (enc, c)) val |= BIT_CTYPE_WORD;
|
||||
if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII;
|
||||
|
||||
if (c % NCOL == 0) fputs(" ", fp);
|
||||
fprintf(fp, "0x%04x", val);
|
||||
if (c != 255) fputs(",", fp);
|
||||
if (c % NCOL == 0) {
|
||||
r = fputs(" ", fp);
|
||||
if (r < 0) return -1;
|
||||
}
|
||||
r = fprintf(fp, "0x%04x", val);
|
||||
if (r < 0) return -1;
|
||||
|
||||
if (c != 255) {
|
||||
r = fputs(",", fp);
|
||||
if (r < 0) return -1;
|
||||
}
|
||||
if (c != 0 && c % NCOL == (NCOL-1))
|
||||
fputs("\n", fp);
|
||||
r = fputs("\n", fp);
|
||||
else
|
||||
fputs(" ", fp);
|
||||
r = fputs(" ", fp);
|
||||
|
||||
if (r < 0) return -1;
|
||||
}
|
||||
fprintf(fp, "};\n");
|
||||
r = fprintf(fp, "};\n");
|
||||
if (r < 0) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
|
||||
{
|
||||
int r;
|
||||
int i;
|
||||
FILE* fp = stdout;
|
||||
|
||||
|
@ -1155,7 +1173,11 @@ extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
|
|||
/* setlocale(LC_ALL, "fr_FR.iso88591"); */
|
||||
|
||||
for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
|
||||
exec(fp, &Info[i]);
|
||||
r = exec(fp, &Info[i]);
|
||||
if (r < 0) {
|
||||
fprintf(stderr, "FAIL exec(): %d\n", r);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
#include "regenc.h"
|
||||
|
||||
static const int EncLen_SJIS[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
|
@ -563,9 +563,9 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = {
|
|||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
/*
|
||||
* Name: Shift_JIS
|
||||
|
|
248
enc/unicode.c
248
enc/unicode.c
|
@ -139,17 +139,17 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
|
||||
/* macros related to ONIGENC_CASE flags */
|
||||
/* defined here because not used in other files */
|
||||
#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_IS_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
|
||||
#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL)
|
||||
|
||||
/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
|
||||
#define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */
|
||||
#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset)
|
||||
#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
|
||||
#define SpecialsLengthEncode(n) ((n)<<SpecialsLengthOffset)
|
||||
#define SpecialsLengthExtract(n) ((n) >> SpecialsLengthOffset)
|
||||
#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1))
|
||||
#define SpecialsLengthEncode(n) ((n) << SpecialsLengthOffset)
|
||||
|
||||
#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexEncode(n) ((n)<<OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexMask (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexEncode(n) ((n) << OnigSpecialIndexShift)
|
||||
#define OnigSpecialIndexDecode(n) (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift)
|
||||
|
||||
/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
|
||||
#define U ONIGENC_CASE_UPCASE
|
||||
|
@ -660,128 +660,130 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
int codepoint_length;
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
int codepoint_length;
|
||||
|
||||
to_end -= CASE_MAPPING_SLACK;
|
||||
/* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
|
||||
* ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
|
||||
flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
|
||||
to_end -= CASE_MAPPING_SLACK;
|
||||
/* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
|
||||
* ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
|
||||
flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET;
|
||||
|
||||
while (*pp<end && to<=to_end) {
|
||||
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
|
||||
if (codepoint_length < 0)
|
||||
return codepoint_length; /* encoding invalid */
|
||||
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
|
||||
*pp += codepoint_length;
|
||||
while (*pp < end && to <= to_end) {
|
||||
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
|
||||
if (codepoint_length < 0)
|
||||
return codepoint_length; /* encoding invalid */
|
||||
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
|
||||
*pp += codepoint_length;
|
||||
|
||||
if (code<='z') { /* ASCII comes first */
|
||||
if (code>='a' && code<='z') {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
MODIFIED;
|
||||
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='i')
|
||||
code = I_WITH_DOT_ABOVE;
|
||||
else
|
||||
code += 'A'-'a';
|
||||
}
|
||||
}
|
||||
else if (code>='A' && code<='Z') {
|
||||
if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
|
||||
MODIFIED;
|
||||
if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='I')
|
||||
code = DOTLESS_i;
|
||||
else
|
||||
code += 'a'-'A';
|
||||
}
|
||||
}
|
||||
if (code <= 'z') { /* ASCII comes first */
|
||||
if (code >= 'a' && code <= 'z') {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
MODIFIED;
|
||||
if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
|
||||
code = I_WITH_DOT_ABOVE;
|
||||
else
|
||||
code += 'A' - 'a';
|
||||
}
|
||||
else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
|
||||
const CodePointList3 *folded;
|
||||
|
||||
if (code==I_WITH_DOT_ABOVE) {
|
||||
if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
|
||||
MODIFIED;
|
||||
code = 'i';
|
||||
if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
code = DOT_ABOVE;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (code==DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
|
||||
if (flags&ONIGENC_CASE_UPCASE)
|
||||
MODIFIED, code = 'I';
|
||||
}
|
||||
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
|
||||
if ((flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
|
||||
&& (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
|
||||
/* already Titlecase, no changes needed */
|
||||
}
|
||||
else if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
|
||||
const OnigCodePoint *next;
|
||||
int count;
|
||||
|
||||
MODIFIED;
|
||||
if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { /* special */
|
||||
const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
|
||||
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
|
||||
if ((flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))
|
||||
== (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
|
||||
goto SpecialsCopy;
|
||||
else /* swapCASE not needed */
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { /* Titlecase available */
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
|
||||
goto SpecialsCopy;
|
||||
else /* Titlecase not needed */
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
|
||||
if (!(flags&ONIGENC_CASE_DOWN_SPECIAL))
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
/* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
|
||||
SpecialsCopy:
|
||||
count = SpecialsLengthExtract(*SpecialsStart);
|
||||
next = SpecialsStart;
|
||||
code = SpecialsCodepointExtract(*next++);
|
||||
}
|
||||
else { /* no specials */
|
||||
count = OnigCodePointCount(folded->n);
|
||||
next = folded->code;
|
||||
code = *next++;
|
||||
}
|
||||
if (count==1)
|
||||
;
|
||||
else if (count==2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
code = *next;
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
|
||||
&& flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
|
||||
MODIFIED;
|
||||
code = folded->code[(flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) ? 1 : 0];
|
||||
}
|
||||
}
|
||||
else if (code >= 'A' && code <= 'Z') {
|
||||
if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
|
||||
MODIFIED;
|
||||
if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I')
|
||||
code = DOTLESS_i;
|
||||
else
|
||||
code += 'a' - 'A';
|
||||
}
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
/* switch from titlecase to lowercase for capitalize */
|
||||
if (flags & ONIGENC_CASE_TITLECASE)
|
||||
flags ^= (ONIGENC_CASE_UPCASE |ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE|
|
||||
ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL);
|
||||
}
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
|
||||
const CodePointList3 *folded;
|
||||
|
||||
if (code == I_WITH_DOT_ABOVE) {
|
||||
if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
|
||||
MODIFIED;
|
||||
code = 'i';
|
||||
if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
code = DOT_ABOVE;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
MODIFIED;
|
||||
code = 'I';
|
||||
}
|
||||
}
|
||||
else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
|
||||
if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
|
||||
&& (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
|
||||
/* already Titlecase, no changes needed */
|
||||
}
|
||||
else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
|
||||
const OnigCodePoint *next;
|
||||
int count;
|
||||
|
||||
MODIFIED;
|
||||
if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */
|
||||
const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
|
||||
|
||||
if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
|
||||
if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE))
|
||||
== (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
|
||||
goto SpecialsCopy;
|
||||
else /* swapCASE not needed */
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
|
||||
goto SpecialsCopy;
|
||||
else /* Titlecase not needed */
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) {
|
||||
if (!(flags & ONIGENC_CASE_DOWN_SPECIAL))
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
/* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
|
||||
SpecialsCopy:
|
||||
count = SpecialsLengthExtract(*SpecialsStart);
|
||||
next = SpecialsStart;
|
||||
code = SpecialsCodepointExtract(*next++);
|
||||
}
|
||||
else { /* no specials */
|
||||
count = OnigCodePointCount(folded->n);
|
||||
next = folded->code;
|
||||
code = *next++;
|
||||
}
|
||||
if (count == 1)
|
||||
;
|
||||
else if (count == 2) {
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
code = *next;
|
||||
}
|
||||
else { /* count == 3 */
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
|
||||
code = *next;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
|
||||
&& flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
|
||||
MODIFIED;
|
||||
code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0];
|
||||
}
|
||||
}
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
/* switch from titlecase to lowercase for capitalize */
|
||||
if (flags & ONIGENC_CASE_TITLECASE)
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE |
|
||||
ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
#include "regenc.h"
|
||||
#include "encindex.h"
|
||||
#ifdef RUBY
|
||||
# include "encindex.h"
|
||||
#endif
|
||||
|
||||
#ifndef ENCINDEX_US_ASCII
|
||||
#define ENCINDEX_US_ASCII 0
|
||||
# define ENCINDEX_US_ASCII 0
|
||||
#endif
|
||||
|
||||
static int
|
||||
|
@ -29,9 +32,9 @@ OnigEncodingDefine(us_ascii, US_ASCII) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
ENCINDEX_US_ASCII,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_single_byte_ascii_only_case_map,
|
||||
};
|
||||
ENC_ALIAS("ASCII", "US-ASCII")
|
||||
ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII")
|
||||
|
|
|
@ -249,8 +249,8 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = {
|
|||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
onigenc_unicode_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
onigenc_unicode_case_map,
|
||||
};
|
||||
ENC_ALIAS("UCS-2BE", "UTF-16BE")
|
||||
|
|
|
@ -242,7 +242,7 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = {
|
|||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
onigenc_unicode_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
onigenc_unicode_case_map,
|
||||
};
|
||||
|
|
|
@ -187,9 +187,8 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = {
|
|||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32be_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
onigenc_unicode_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
onigenc_unicode_case_map,
|
||||
};
|
||||
ENC_ALIAS("UCS-4BE", "UTF-32BE")
|
||||
|
||||
|
|
|
@ -187,8 +187,8 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = {
|
|||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf32le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
onigenc_unicode_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
onigenc_unicode_case_map,
|
||||
};
|
||||
ENC_ALIAS("UCS-4LE", "UTF-32LE")
|
||||
|
|
14
enc/utf_8.c
14
enc/utf_8.c
|
@ -28,17 +28,20 @@
|
|||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
#include "encindex.h"
|
||||
#ifdef RUBY
|
||||
# include "encindex.h"
|
||||
#endif
|
||||
|
||||
#ifndef ENCINDEX_UTF_8
|
||||
#define ENCINDEX_UTF_8 0
|
||||
# define ENCINDEX_UTF_8 0
|
||||
#endif
|
||||
|
||||
#define USE_INVALID_CODE_SCHEME
|
||||
|
||||
#ifdef USE_INVALID_CODE_SCHEME
|
||||
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
|
||||
#define INVALID_CODE_FE 0xfffffffe
|
||||
#define INVALID_CODE_FF 0xffffffff
|
||||
# define INVALID_CODE_FE 0xfffffffe
|
||||
# define INVALID_CODE_FF 0xffffffff
|
||||
#endif
|
||||
#define VALID_CODE_LIMIT 0x0010ffff
|
||||
|
||||
|
@ -428,9 +431,9 @@ OnigEncodingDefine(utf_8, UTF_8) = {
|
|||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
onigenc_unicode_case_map,
|
||||
ENCINDEX_UTF_8,
|
||||
ONIGENC_FLAG_UNICODE,
|
||||
onigenc_unicode_case_map,
|
||||
};
|
||||
ENC_ALIAS("CP65001", "UTF-8")
|
||||
|
||||
|
@ -444,4 +447,3 @@ ENC_ALIAS("CP65001", "UTF-8")
|
|||
ENC_REPLICATE("UTF8-MAC", "UTF-8")
|
||||
ENC_ALIAS("UTF-8-MAC", "UTF8-MAC")
|
||||
ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */
|
||||
|
||||
|
|
|
@ -191,40 +191,41 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncCP1250_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_CP1250_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0xB5) ;
|
||||
else if (code == 0xB5)
|
||||
;
|
||||
else if ((EncCP1250_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xB9)
|
||||
if (code == 0xB9)
|
||||
code = 0xA5;
|
||||
else if (code==0xBE)
|
||||
else if (code == 0xBE)
|
||||
code = 0xBC;
|
||||
else if (code >= 0x8A && code <= 0xBF && code!=0xB9)
|
||||
code -= 0x10;
|
||||
|
@ -232,11 +233,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1250, Windows_1250) = {
|
||||
|
@ -256,9 +257,9 @@ OnigEncodingDefine(windows_1250, Windows_1250) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
/*
|
||||
* Name: windows-1250
|
||||
|
|
|
@ -181,49 +181,50 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if ((EncCP1251_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_CP1251_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0xB5) ;
|
||||
else if (code == 0xB5)
|
||||
;
|
||||
else if ((EncCP1251_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if ((0x61<=code && code<=0x7A) || (0xE0<=code && code<=0xFF))
|
||||
if ((0x61 <= code && code <= 0x7A) || (0xE0 <= code && code <= 0xFF))
|
||||
code -= 0x20;
|
||||
else if (code==0xA2 || code==0xB3 || code==0xBE)
|
||||
else if (code == 0xA2 || code == 0xB3 || code == 0xBE)
|
||||
code -= 0x01;
|
||||
else if (code==0x83)
|
||||
else if (code == 0x83)
|
||||
code = 0x81;
|
||||
else if (code==0xBC)
|
||||
else if (code == 0xBC)
|
||||
code = 0xA3;
|
||||
else if (code==0xB4)
|
||||
else if (code == 0xB4)
|
||||
code = 0xA5;
|
||||
else
|
||||
code -= 0x10;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1251, Windows_1251) = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"Windows-1251", /* name */
|
||||
"Windows-1251",/* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
|
@ -238,9 +239,9 @@ OnigEncodingDefine(windows_1251, Windows_1251) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
/*
|
||||
* Name: windows-1251
|
||||
|
|
|
@ -190,42 +190,43 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_CP1252_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ;
|
||||
else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5)
|
||||
;
|
||||
else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0x9A || code==0x9C || code==0x9E)
|
||||
if (code == 0x9A || code == 0x9C || code == 0x9E)
|
||||
code -= 0x10;
|
||||
else if (code==0xFF)
|
||||
else if (code == 0xFF)
|
||||
code -= 0x60;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1252, Windows_1252) = {
|
||||
|
@ -245,9 +246,9 @@ OnigEncodingDefine(windows_1252, Windows_1252) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
/*
|
||||
* Name: windows-1252
|
||||
|
|
|
@ -214,62 +214,63 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
}
|
||||
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==0xF2) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == 0xF2) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xD3;
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xF3;
|
||||
}
|
||||
}
|
||||
else if (code==0xB5) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
else if (code == 0xB5) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xCC;
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = 0xEC;
|
||||
}
|
||||
}
|
||||
else if (code==0xC0 || code==0xE0 || code==0xB6) ;
|
||||
else if (code == 0xC0 || code == 0xE0 || code == 0xB6)
|
||||
;
|
||||
else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code = ENC_CP1253_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code==0xDC)
|
||||
if (code == 0xDC)
|
||||
code = 0xA2;
|
||||
else if (code>=0xDD && code<=0xDF)
|
||||
else if (code >= 0xDD && code <= 0xDF)
|
||||
code -= 0x25;
|
||||
else if (code==0xFC)
|
||||
else if (code == 0xFC)
|
||||
code = 0xBC;
|
||||
else if (code==0xFD || code==0xFE)
|
||||
else if (code == 0xFD || code == 0xFE)
|
||||
code -= 0x3F;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1253, Windows_1253) = {
|
||||
|
@ -289,8 +290,8 @@ OnigEncodingDefine(windows_1253, Windows_1253) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("CP1253", "Windows-1253")
|
||||
|
|
|
@ -212,9 +212,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -232,49 +232,50 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
|||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='I')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
if (code == 'I')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
else
|
||||
code = ENC_CP1254_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ;
|
||||
else if ((EncCP1254_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5)
|
||||
;
|
||||
else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_LOWER)
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='i')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code==DOTLESS_i)
|
||||
if (code == 'i')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code == DOTLESS_i)
|
||||
code = 'I';
|
||||
else if (code==0x9A || code==0x9C || code==0x9E)
|
||||
else if (code == 0x9A || code == 0x9C || code == 0x9E)
|
||||
code -= 0x10;
|
||||
else if (code==0xFF)
|
||||
else if (code == 0xFF)
|
||||
code -= 0x60;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1254, Windows_1254) = {
|
||||
|
@ -294,8 +295,8 @@ OnigEncodingDefine(windows_1254, Windows_1254) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
ENC_ALIAS("CP1254", "Windows-1254")
|
||||
|
|
|
@ -216,9 +216,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
|
|||
|
||||
static int
|
||||
get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[],
|
||||
OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
return onigenc_get_case_fold_codes_by_str_with_map(
|
||||
numberof(CaseFoldMap), CaseFoldMap, 1,
|
||||
|
@ -228,55 +228,56 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
#define DOTLESS_i (0xB9)
|
||||
#define I_WITH_DOT_ABOVE (0xA9)
|
||||
static int
|
||||
case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
if (code==SHARP_s) {
|
||||
if (flags&ONIGENC_CASE_UPCASE) {
|
||||
if (code == SHARP_s) {
|
||||
if (flags & ONIGENC_CASE_UPCASE) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 'S';
|
||||
code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
|
||||
}
|
||||
else if (flags&ONIGENC_CASE_FOLD) {
|
||||
else if (flags & ONIGENC_CASE_FOLD) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
*to++ = 's';
|
||||
code = 's';
|
||||
}
|
||||
}
|
||||
else if (code==0xB5) ;
|
||||
else if (code == 0xB5)
|
||||
;
|
||||
else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER)
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
|
||||
&& (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='I')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
if (code == 'I')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
|
||||
else
|
||||
code = ENC_CP1252_TO_LOWER_CASE(code);
|
||||
}
|
||||
else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER)
|
||||
&& (flags&ONIGENC_CASE_UPCASE)) {
|
||||
&& (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
if (code=='i')
|
||||
code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code==DOTLESS_i)
|
||||
if (code == 'i')
|
||||
code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
|
||||
else if (code == DOTLESS_i)
|
||||
code = 'I';
|
||||
else if (code>=0xB0 && code<=0xBF )
|
||||
else if (code >= 0xB0 && code <= 0xBF)
|
||||
code -= 0x10;
|
||||
else
|
||||
code -= 0x20;
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
OnigEncodingDefine(windows_1257, Windows_1257) = {
|
||||
|
@ -296,9 +297,8 @@ OnigEncodingDefine(windows_1257, Windows_1257) = {
|
|||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
case_map,
|
||||
};
|
||||
|
||||
ENC_ALIAS("CP1257", "Windows-1257")
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
|
||||
OnigEncodingDefine(windows_31j, Windows_31J) = {
|
||||
mbc_enc_len,
|
||||
"Windows-31J", /* name */
|
||||
"Windows-31J", /* name */
|
||||
2, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
|
@ -48,9 +48,9 @@ OnigEncodingDefine(windows_31j, Windows_31J) = {
|
|||
get_ctype_code_range,
|
||||
left_adjust_char_head,
|
||||
is_allowed_reverse_match,
|
||||
onigenc_ascii_only_case_map,
|
||||
0,
|
||||
ONIGENC_FLAG_NONE,
|
||||
onigenc_ascii_only_case_map,
|
||||
};
|
||||
/*
|
||||
* Name: Windows-31J
|
||||
|
|
|
@ -0,0 +1,934 @@
|
|||
#ifndef ONIGMO_H
|
||||
#define ONIGMO_H
|
||||
/**********************************************************************
|
||||
onigmo.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
# if 0
|
||||
} /* satisfy cc-mode */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define ONIGMO_VERSION_MAJOR 6
|
||||
#define ONIGMO_VERSION_MINOR 0
|
||||
#define ONIGMO_VERSION_TEENY 0
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
# ifdef RUBY_EXTERN
|
||||
# define ONIG_EXTERN RUBY_EXTERN
|
||||
# else
|
||||
# if defined(_WIN32) && !defined(__GNUC__)
|
||||
# if defined(EXPORT) || defined(RUBY_EXPORT)
|
||||
# define ONIG_EXTERN extern __declspec(dllexport)
|
||||
# else
|
||||
# define ONIG_EXTERN extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
# define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
#ifndef RUBY
|
||||
# ifndef RUBY_SYMBOL_EXPORT_BEGIN
|
||||
# define RUBY_SYMBOL_EXPORT_BEGIN
|
||||
# define RUBY_SYMBOL_EXPORT_END
|
||||
# endif
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
||||
#include <stddef.h> /* for size_t */
|
||||
|
||||
/* PART: character encoding */
|
||||
|
||||
#ifndef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
# define UChar OnigUChar
|
||||
#endif
|
||||
|
||||
typedef unsigned char OnigUChar;
|
||||
typedef unsigned int OnigCodePoint;
|
||||
typedef unsigned int OnigCtype;
|
||||
typedef size_t OnigDistance;
|
||||
typedef ptrdiff_t OnigPosition;
|
||||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
/*
|
||||
* Onig casefold/case mapping flags and related definitions
|
||||
*
|
||||
* Subfields (starting with 0 at LSB):
|
||||
* 0-2: Code point count in casefold.h
|
||||
* 3-12: Index into SpecialCaseMapping array in casefold.h
|
||||
* 13-22: Case folding/mapping flags
|
||||
*/
|
||||
typedef unsigned int OnigCaseFoldType; /* case fold flag */
|
||||
|
||||
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
|
||||
|
||||
/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
|
||||
#define OnigCodePointMaskWidth 3
|
||||
#define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1)
|
||||
#define OnigCodePointCount(n) ((n)&OnigCodePointMask)
|
||||
#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
|
||||
|
||||
/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
|
||||
/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
|
||||
|
||||
/* bits for index into table with separate titlecase mappings */
|
||||
/* 10 bits provide 1024 values */
|
||||
#define OnigSpecialIndexShift 3
|
||||
#define OnigSpecialIndexWidth 10
|
||||
|
||||
#define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */
|
||||
#define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */
|
||||
#define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */
|
||||
#define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bits from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */
|
||||
#define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */
|
||||
#define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */
|
||||
#define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */
|
||||
#define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
|
||||
#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
|
||||
#define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
|
||||
|
||||
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
|
||||
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
|
||||
|
||||
|
||||
#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
|
||||
/* 13 => Unicode:0x1ffc */
|
||||
|
||||
/* code range */
|
||||
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
|
||||
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
|
||||
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
|
||||
|
||||
typedef struct {
|
||||
int byte_len; /* argument(original) character(s) byte length */
|
||||
int code_len; /* number of code */
|
||||
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
|
||||
} OnigCaseFoldCodeItem;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint esc;
|
||||
OnigCodePoint anychar;
|
||||
OnigCodePoint anytime;
|
||||
OnigCodePoint zero_or_one_time;
|
||||
OnigCodePoint one_or_more_time;
|
||||
OnigCodePoint anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
|
||||
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
|
||||
|
||||
typedef struct OnigEncodingTypeST {
|
||||
int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int min_enc_len;
|
||||
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int (*code_to_mbclen)(OnigCodePoint code, const struct OnigEncodingTypeST* enc);
|
||||
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, const struct OnigEncodingTypeST* enc);
|
||||
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, const struct OnigEncodingTypeST* enc);
|
||||
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, const struct OnigEncodingTypeST* enc);
|
||||
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], const struct OnigEncodingTypeST* enc);
|
||||
int (*property_name_to_ctype)(const struct OnigEncodingTypeST* enc, const OnigUChar* p, const OnigUChar* end);
|
||||
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, const struct OnigEncodingTypeST* enc);
|
||||
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], const struct OnigEncodingTypeST* enc);
|
||||
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int (*case_map)(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
|
||||
int ruby_encoding_index;
|
||||
unsigned int flags;
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef const OnigEncodingType* OnigEncoding;
|
||||
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
|
||||
#ifndef RUBY
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_1;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_2;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_3;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_4;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_5;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_6;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_7;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_8;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_9;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_10;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_11;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_13;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_14;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_15;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_16;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_8;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16BE;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16LE;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32BE;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32LE;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_JP;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_TW;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_KR;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_CN;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingShift_JIS;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_31J;
|
||||
/* ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8; */
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_R;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_U;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1250;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1251;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1252;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1253;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1254;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1257;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingBIG5;
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingGB18030;
|
||||
#endif /* RUBY */
|
||||
|
||||
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
|
||||
#ifndef RUBY
|
||||
# define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
|
||||
# define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
|
||||
# define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
|
||||
# define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
|
||||
# define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
|
||||
# define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
|
||||
# define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
|
||||
# define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
|
||||
# define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
|
||||
# define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
|
||||
# define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
|
||||
# define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
|
||||
# define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
|
||||
# define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
|
||||
# define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
|
||||
# define ONIG_ENCODING_UTF_8 (&OnigEncodingUTF_8)
|
||||
# define ONIG_ENCODING_UTF_16BE (&OnigEncodingUTF_16BE)
|
||||
# define ONIG_ENCODING_UTF_16LE (&OnigEncodingUTF_16LE)
|
||||
# define ONIG_ENCODING_UTF_32BE (&OnigEncodingUTF_32BE)
|
||||
# define ONIG_ENCODING_UTF_32LE (&OnigEncodingUTF_32LE)
|
||||
# define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
|
||||
# define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
|
||||
# define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
|
||||
# define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
|
||||
# define ONIG_ENCODING_SHIFT_JIS (&OnigEncodingShift_JIS)
|
||||
# define ONIG_ENCODING_WINDOWS_31J (&OnigEncodingWindows_31J)
|
||||
/* # define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) */
|
||||
# define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
|
||||
# define ONIG_ENCODING_KOI8_U (&OnigEncodingKOI8_U)
|
||||
# define ONIG_ENCODING_WINDOWS_1250 (&OnigEncodingWindows_1250)
|
||||
# define ONIG_ENCODING_WINDOWS_1251 (&OnigEncodingWindows_1251)
|
||||
# define ONIG_ENCODING_WINDOWS_1252 (&OnigEncodingWindows_1252)
|
||||
# define ONIG_ENCODING_WINDOWS_1253 (&OnigEncodingWindows_1253)
|
||||
# define ONIG_ENCODING_WINDOWS_1254 (&OnigEncodingWindows_1254)
|
||||
# define ONIG_ENCODING_WINDOWS_1257 (&OnigEncodingWindows_1257)
|
||||
# define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
|
||||
# define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
|
||||
|
||||
/* old names */
|
||||
# define ONIG_ENCODING_SJIS ONIG_ENCODING_SHIFT_JIS
|
||||
# define ONIG_ENCODING_CP932 ONIG_ENCODING_WINDOWS_31J
|
||||
# define ONIG_ENCODING_CP1250 ONIG_ENCODING_WINDOWS_1250
|
||||
# define ONIG_ENCODING_CP1251 ONIG_ENCODING_WINDOWS_1251
|
||||
# define ONIG_ENCODING_CP1252 ONIG_ENCODING_WINDOWS_1252
|
||||
# define ONIG_ENCODING_CP1253 ONIG_ENCODING_WINDOWS_1253
|
||||
# define ONIG_ENCODING_CP1254 ONIG_ENCODING_WINDOWS_1254
|
||||
# define ONIG_ENCODING_CP1257 ONIG_ENCODING_WINDOWS_1257
|
||||
# define ONIG_ENCODING_UTF8 ONIG_ENCODING_UTF_8
|
||||
# define ONIG_ENCODING_UTF16_BE ONIG_ENCODING_UTF_16BE
|
||||
# define ONIG_ENCODING_UTF16_LE ONIG_ENCODING_UTF_16LE
|
||||
# define ONIG_ENCODING_UTF32_BE ONIG_ENCODING_UTF_32BE
|
||||
# define ONIG_ENCODING_UTF32_LE ONIG_ENCODING_UTF_32LE
|
||||
#endif /* RUBY */
|
||||
|
||||
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
|
||||
|
||||
/* this declaration needs to be here because it is used in string.c in Ruby */
|
||||
ONIG_EXTERN
|
||||
int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
|
||||
|
||||
|
||||
/* work size */
|
||||
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
|
||||
#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
|
||||
/* 18: 6(max-byte) * 3(case-fold chars) */
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_NEWLINE 0
|
||||
#define ONIGENC_CTYPE_ALPHA 1
|
||||
#define ONIGENC_CTYPE_BLANK 2
|
||||
#define ONIGENC_CTYPE_CNTRL 3
|
||||
#define ONIGENC_CTYPE_DIGIT 4
|
||||
#define ONIGENC_CTYPE_GRAPH 5
|
||||
#define ONIGENC_CTYPE_LOWER 6
|
||||
#define ONIGENC_CTYPE_PRINT 7
|
||||
#define ONIGENC_CTYPE_PUNCT 8
|
||||
#define ONIGENC_CTYPE_SPACE 9
|
||||
#define ONIGENC_CTYPE_UPPER 10
|
||||
#define ONIGENC_CTYPE_XDIGIT 11
|
||||
#define ONIGENC_CTYPE_WORD 12
|
||||
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
|
||||
#define ONIGENC_CTYPE_ASCII 14
|
||||
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
|
||||
|
||||
/* flags */
|
||||
#define ONIGENC_FLAG_NONE 0U
|
||||
#define ONIGENC_FLAG_UNICODE 1U
|
||||
|
||||
#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
|
||||
onigenc_ascii_is_code_ctype( \
|
||||
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)
|
||||
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
|
||||
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end,enc)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
|
||||
(enc)->left_adjust_char_head(start, s, end, enc)
|
||||
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
|
||||
(enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
|
||||
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
|
||||
onigenc_step_back((enc),(start),(s),(end),(n))
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
|
||||
#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
|
||||
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
|
||||
#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
|
||||
#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
|
||||
#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
|
||||
|
||||
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
|
||||
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
|
||||
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
|
||||
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
|
||||
(enc)->property_name_to_ctype(enc,p,end)
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
|
||||
|
||||
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
|
||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
|
||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
|
||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
|
||||
#define ONIGENC_IS_CODE_LOWER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
|
||||
#define ONIGENC_IS_CODE_UPPER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
|
||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
|
||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
|
||||
#define ONIGENC_IS_CODE_SPACE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
|
||||
#define ONIGENC_IS_CODE_BLANK(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
|
||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
|
||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
|
||||
#define ONIGENC_IS_CODE_WORD(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
|
||||
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
|
||||
(enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n);
|
||||
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
int onigenc_init(void);
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding(OnigEncoding enc);
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onigenc_get_default_encoding(void);
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev);
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end);
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p);
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p);
|
||||
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
|
||||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
|
||||
/* constants */
|
||||
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
|
||||
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
|
||||
|
||||
/* options */
|
||||
#define ONIG_OPTION_NONE 0U
|
||||
#define ONIG_OPTION_IGNORECASE 1U
|
||||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||||
#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
|
||||
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
|
||||
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
|
||||
#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NOTEOL << 1)
|
||||
#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
|
||||
/* options (ctype range) */
|
||||
#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_NOTEOS << 1)
|
||||
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
|
||||
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
/* options (newline) */
|
||||
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
|
||||
|
||||
/* syntax */
|
||||
typedef struct {
|
||||
unsigned int op;
|
||||
unsigned int op2;
|
||||
unsigned int behavior;
|
||||
OnigOptionType options; /* default option */
|
||||
OnigMetaCharTableType meta_char_table;
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58_NG;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPython;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
|
||||
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
|
||||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
|
||||
#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
|
||||
|
||||
/* default syntax */
|
||||
ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
/* syntax (operators) */
|
||||
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
|
||||
#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
|
||||
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
|
||||
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
|
||||
#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
|
||||
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
|
||||
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
|
||||
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
|
||||
#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
|
||||
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
|
||||
#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
|
||||
#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
|
||||
#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
|
||||
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
|
||||
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
|
||||
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
|
||||
#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
|
||||
#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
|
||||
#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
|
||||
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
|
||||
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
|
||||
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
|
||||
#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
|
||||
#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
|
||||
#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
|
||||
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
|
||||
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
|
||||
#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
|
||||
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
|
||||
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
|
||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
|
||||
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */
|
||||
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
|
||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
|
||||
#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
|
||||
#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
|
||||
#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
|
||||
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
|
||||
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
|
||||
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
|
||||
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
|
||||
#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
|
||||
#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
|
||||
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
|
||||
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
|
||||
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
|
||||
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X */
|
||||
#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
|
||||
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
|
||||
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
|
||||
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
|
||||
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
|
||||
#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
|
||||
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
|
||||
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
|
||||
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
|
||||
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
|
||||
#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
|
||||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
|
||||
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
|
||||
#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP (1U<<11) /* (?<x>)(?<x>)\k<x> */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
|
||||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
|
||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
|
||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
|
||||
/* syntax (behavior) warning */
|
||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
|
||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
|
||||
#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
|
||||
|
||||
/* meta character specifiers (onig_set_meta_char()) */
|
||||
#define ONIG_META_CHAR_ESCAPE 0
|
||||
#define ONIG_META_CHAR_ANYCHAR 1
|
||||
#define ONIG_META_CHAR_ANYTIME 2
|
||||
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
|
||||
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
|
||||
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
|
||||
|
||||
#define ONIG_INEFFECTIVE_META_CHAR 0
|
||||
|
||||
/* error codes */
|
||||
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
|
||||
/* normal return */
|
||||
#define ONIG_NORMAL 0
|
||||
#define ONIG_MISMATCH -1
|
||||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||||
|
||||
/* internal error */
|
||||
#define ONIGERR_MEMORY -5
|
||||
#define ONIGERR_TYPE_BUG -6
|
||||
#define ONIGERR_PARSER_BUG -11
|
||||
#define ONIGERR_STACK_BUG -12
|
||||
#define ONIGERR_UNDEFINED_BYTECODE -13
|
||||
#define ONIGERR_UNEXPECTED_BYTECODE -14
|
||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
|
||||
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
|
||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
|
||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
|
||||
/* general error */
|
||||
#define ONIGERR_INVALID_ARGUMENT -30
|
||||
/* syntax error */
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
|
||||
#define ONIGERR_EMPTY_CHAR_CLASS -102
|
||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
|
||||
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
|
||||
#define ONIGERR_END_PATTERN_AT_META -105
|
||||
#define ONIGERR_END_PATTERN_AT_CONTROL -106
|
||||
#define ONIGERR_META_CODE_SYNTAX -108
|
||||
#define ONIGERR_CONTROL_CODE_SYNTAX -109
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
|
||||
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
|
||||
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
|
||||
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
|
||||
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
|
||||
#define ONIGERR_END_PATTERN_IN_GROUP -118
|
||||
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
|
||||
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
|
||||
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
|
||||
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
|
||||
#define ONIGERR_INVALID_CONDITION_PATTERN -124
|
||||
/* values error (syntax error) */
|
||||
#define ONIGERR_TOO_BIG_NUMBER -200
|
||||
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
|
||||
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
|
||||
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
|
||||
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
|
||||
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
|
||||
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
|
||||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
|
||||
#define ONIGERR_INVALID_BACKREF -208
|
||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
|
||||
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -210
|
||||
#define ONIGERR_TOO_SHORT_DIGITS -211
|
||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
|
||||
#define ONIGERR_EMPTY_GROUP_NAME -214
|
||||
#define ONIGERR_INVALID_GROUP_NAME -215
|
||||
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
|
||||
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
|
||||
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
|
||||
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
|
||||
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
|
||||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
|
||||
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
|
||||
|
||||
/* errors related to thread */
|
||||
/* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */
|
||||
|
||||
|
||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
|
||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
||||
typedef struct OnigCaptureTreeNodeStruct {
|
||||
int group; /* group number */
|
||||
OnigPosition beg;
|
||||
OnigPosition end;
|
||||
int allocated;
|
||||
int num_childs;
|
||||
struct OnigCaptureTreeNodeStruct** childs;
|
||||
} OnigCaptureTreeNode;
|
||||
|
||||
/* match result region type */
|
||||
struct re_registers {
|
||||
int allocated;
|
||||
int num_regs;
|
||||
OnigPosition* beg;
|
||||
OnigPosition* end;
|
||||
/* extended */
|
||||
OnigCaptureTreeNode* history_root; /* capture history tree root */
|
||||
};
|
||||
|
||||
/* capture tree traverse */
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
|
||||
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
|
||||
|
||||
|
||||
#define ONIG_REGION_NOTPOS -1
|
||||
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
||||
typedef struct {
|
||||
OnigEncoding enc;
|
||||
OnigUChar* par;
|
||||
OnigUChar* par_end;
|
||||
} OnigErrorInfo;
|
||||
|
||||
typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
} OnigRepeatRange;
|
||||
|
||||
typedef void (*OnigWarnFunc)(const char* s);
|
||||
extern void onig_null_warn(const char* s);
|
||||
#define ONIG_NULL_WARN onig_null_warn
|
||||
|
||||
#define ONIG_CHAR_TABLE_SIZE 256
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* combination explosion check */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
|
||||
OnigOptionType options;
|
||||
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
const OnigSyntaxType* syntax;
|
||||
void* name_table;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigDistance dmin; /* min-distance of exact or map */
|
||||
OnigDistance dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
} OnigRegexType;
|
||||
|
||||
typedef OnigRegexType* OnigRegex;
|
||||
|
||||
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
|
||||
typedef OnigRegexType regex_t;
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
int num_of_elements;
|
||||
OnigEncoding pattern_enc;
|
||||
OnigEncoding target_enc;
|
||||
const OnigSyntaxType* syntax;
|
||||
OnigOptionType option;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
/* Oniguruma Native API */
|
||||
ONIG_EXTERN
|
||||
int onig_initialize(OnigEncoding encodings[], int n);
|
||||
ONIG_EXTERN
|
||||
int onig_init(void);
|
||||
ONIG_EXTERN
|
||||
int onig_error_code_to_str(OnigUChar* s, OnigPosition err_code, ...);
|
||||
ONIG_EXTERN
|
||||
void onig_set_warn_func(OnigWarnFunc f);
|
||||
ONIG_EXTERN
|
||||
void onig_set_verb_warn_func(OnigWarnFunc f);
|
||||
ONIG_EXTERN
|
||||
int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
|
||||
ONIG_EXTERN
|
||||
int onig_reg_init(OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
|
||||
ONIG_EXTERN
|
||||
int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
|
||||
ONIG_EXTERN
|
||||
void onig_free(OnigRegex);
|
||||
ONIG_EXTERN
|
||||
void onig_free_body(OnigRegex);
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg);
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search_gpos(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option);
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new(void);
|
||||
ONIG_EXTERN
|
||||
void onig_region_init(OnigRegion* region);
|
||||
ONIG_EXTERN
|
||||
void onig_region_free(OnigRegion* region, int free_self);
|
||||
ONIG_EXTERN
|
||||
void onig_region_copy(OnigRegion* to, const OnigRegion* from);
|
||||
ONIG_EXTERN
|
||||
void onig_region_clear(OnigRegion* region);
|
||||
ONIG_EXTERN
|
||||
int onig_region_resize(OnigRegion* region, int n);
|
||||
ONIG_EXTERN
|
||||
int onig_region_set(OnigRegion* region, int at, int beg, int end);
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums);
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, const OnigRegion *region);
|
||||
ONIG_EXTERN
|
||||
int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg);
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_names(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_captures(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_capture_histories(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region);
|
||||
ONIG_EXTERN
|
||||
int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg);
|
||||
ONIG_EXTERN
|
||||
int onig_noname_group_capture_is_active(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onig_get_encoding(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_options(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_case_fold_flag(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
const OnigSyntaxType* onig_get_syntax(const OnigRegexType *reg);
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_syntax(const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from);
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op(const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op2(const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_behavior(const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_syntax_options(const OnigSyntaxType* syntax);
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op);
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2);
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior);
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options);
|
||||
ONIG_EXTERN
|
||||
int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code);
|
||||
ONIG_EXTERN
|
||||
void onig_copy_encoding(OnigEncodingType *to, OnigEncoding from);
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_default_case_fold_flag(void);
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag);
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_match_stack_limit_size(void);
|
||||
ONIG_EXTERN
|
||||
int onig_set_match_stack_limit_size(unsigned int size);
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_parse_depth_limit(void);
|
||||
ONIG_EXTERN
|
||||
int onig_set_parse_depth_limit(unsigned int depth);
|
||||
ONIG_EXTERN
|
||||
int onig_end(void);
|
||||
ONIG_EXTERN
|
||||
const char* onig_version(void);
|
||||
ONIG_EXTERN
|
||||
const char* onig_copyright(void);
|
||||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
#ifdef __cplusplus
|
||||
# if 0
|
||||
{ /* satisfy cc-mode */
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ONIGMO_H */
|
|
@ -1,880 +1,8 @@
|
|||
#ifndef ONIGURUMA_H
|
||||
#define ONIGURUMA_H
|
||||
/**********************************************************************
|
||||
oniguruma.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#if 0
|
||||
} /* satisfy cc-mode */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "onigmo.h"
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 5
|
||||
#define ONIGURUMA_VERSION_MINOR 15
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
# define HAVE_PROTOTYPES 1
|
||||
# endif
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
|
||||
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
# define P_(args) args
|
||||
#else
|
||||
# define P_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef PV_
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
# define PV_(args) args
|
||||
#else
|
||||
# define PV_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#ifdef RUBY_EXTERN
|
||||
#define ONIG_EXTERN RUBY_EXTERN
|
||||
#else
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(EXPORT) || defined(RUBY_EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
||||
#include <stddef.h> /* for size_t */
|
||||
|
||||
/* PART: character encoding */
|
||||
|
||||
#ifndef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#define UChar OnigUChar
|
||||
#endif
|
||||
|
||||
typedef unsigned char OnigUChar;
|
||||
typedef unsigned int OnigCodePoint;
|
||||
typedef unsigned int OnigCtype;
|
||||
typedef size_t OnigDistance;
|
||||
typedef ptrdiff_t OnigPosition;
|
||||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
|
||||
/*
|
||||
* Onig casefold/case mapping flags and related definitions
|
||||
*
|
||||
* Subfields (starting with 0 at LSB):
|
||||
* 0-2: Code point count in casefold.h
|
||||
* 3-12: Index into SpecialCaseMapping array in casefold.h
|
||||
* 13-22: Case folding/mapping flags
|
||||
*/
|
||||
typedef unsigned int OnigCaseFoldType; /* case fold flag */
|
||||
|
||||
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
|
||||
|
||||
/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
|
||||
#define OnigCodePointMaskWidth 3
|
||||
#define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1)
|
||||
#define OnigCodePointCount(n) ((n)&OnigCodePointMask)
|
||||
#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
|
||||
|
||||
/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
|
||||
/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
|
||||
|
||||
/* bits for index into table with separate titlecase mappings */
|
||||
/* 10 bits provide 1024 values */
|
||||
#define OnigSpecialIndexShift 3
|
||||
#define OnigSpecialIndexWidth 10
|
||||
|
||||
#define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */
|
||||
#define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */
|
||||
#define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */
|
||||
#define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bytes from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */
|
||||
#define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */
|
||||
#define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */
|
||||
#define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */
|
||||
#define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
|
||||
#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
|
||||
#define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
|
||||
|
||||
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
|
||||
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
|
||||
|
||||
|
||||
#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
|
||||
/* 13 => Unicode:0x1ffc */
|
||||
|
||||
/* code range */
|
||||
#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
|
||||
#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
|
||||
#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
|
||||
|
||||
typedef struct {
|
||||
int byte_len; /* argument(original) character(s) byte length */
|
||||
int code_len; /* number of code */
|
||||
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
|
||||
} OnigCaseFoldCodeItem;
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint esc;
|
||||
OnigCodePoint anychar;
|
||||
OnigCodePoint anytime;
|
||||
OnigCodePoint zero_or_one_time;
|
||||
OnigCodePoint one_or_more_time;
|
||||
OnigCodePoint anychar_anytime;
|
||||
} OnigMetaCharTableType;
|
||||
|
||||
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
|
||||
|
||||
typedef struct OnigEncodingTypeST {
|
||||
int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
|
||||
const char* name;
|
||||
int max_enc_len;
|
||||
int min_enc_len;
|
||||
int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int (*code_to_mbclen)(OnigCodePoint code, const struct OnigEncodingTypeST* enc);
|
||||
int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, const struct OnigEncodingTypeST* enc);
|
||||
int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, const struct OnigEncodingTypeST* enc);
|
||||
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, const struct OnigEncodingTypeST* enc);
|
||||
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], const struct OnigEncodingTypeST* enc);
|
||||
int (*property_name_to_ctype)(const struct OnigEncodingTypeST* enc, const OnigUChar* p, const OnigUChar* end);
|
||||
int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, const struct OnigEncodingTypeST* enc);
|
||||
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], const struct OnigEncodingTypeST* enc);
|
||||
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
|
||||
int ruby_encoding_index;
|
||||
unsigned int flags;
|
||||
int (*case_map)(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef const OnigEncodingType* OnigEncoding;
|
||||
|
||||
ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
|
||||
|
||||
#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
|
||||
|
||||
#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
|
||||
|
||||
/* this declaration needs to be here because it is used in string.c */
|
||||
ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP,
|
||||
const OnigUChar** pp, const OnigUChar* end,
|
||||
OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc));
|
||||
|
||||
|
||||
/* work size */
|
||||
#define ONIGENC_CODE_TO_MBC_MAXLEN 7
|
||||
#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
|
||||
/* 18: 6(max-byte) * 3(case-fold chars) */
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_NEWLINE 0
|
||||
#define ONIGENC_CTYPE_ALPHA 1
|
||||
#define ONIGENC_CTYPE_BLANK 2
|
||||
#define ONIGENC_CTYPE_CNTRL 3
|
||||
#define ONIGENC_CTYPE_DIGIT 4
|
||||
#define ONIGENC_CTYPE_GRAPH 5
|
||||
#define ONIGENC_CTYPE_LOWER 6
|
||||
#define ONIGENC_CTYPE_PRINT 7
|
||||
#define ONIGENC_CTYPE_PUNCT 8
|
||||
#define ONIGENC_CTYPE_SPACE 9
|
||||
#define ONIGENC_CTYPE_UPPER 10
|
||||
#define ONIGENC_CTYPE_XDIGIT 11
|
||||
#define ONIGENC_CTYPE_WORD 12
|
||||
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
|
||||
#define ONIGENC_CTYPE_ASCII 14
|
||||
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
|
||||
|
||||
/* flags */
|
||||
#define ONIGENC_FLAG_NONE 0U
|
||||
#define ONIGENC_FLAG_UNICODE 1U
|
||||
|
||||
#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
|
||||
|
||||
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
|
||||
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
|
||||
#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
|
||||
#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
|
||||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
|
||||
onigenc_ascii_is_code_ctype( \
|
||||
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)
|
||||
#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
|
||||
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
|
||||
(enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
|
||||
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
|
||||
(enc)->is_allowed_reverse_match(s,end,enc)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
|
||||
(enc)->left_adjust_char_head(start, s, end, enc)
|
||||
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
|
||||
(enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
|
||||
(enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
|
||||
#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
|
||||
onigenc_step_back((enc),(start),(s),(end),(n))
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
|
||||
#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
|
||||
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
|
||||
#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
|
||||
|
||||
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
|
||||
#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
|
||||
#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
|
||||
|
||||
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
|
||||
|
||||
ONIG_EXTERN
|
||||
int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc));
|
||||
|
||||
#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
|
||||
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
|
||||
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
|
||||
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
|
||||
#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
|
||||
#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
|
||||
#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
|
||||
#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
|
||||
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
|
||||
(enc)->property_name_to_ctype(enc,p,end)
|
||||
|
||||
#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
|
||||
|
||||
#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
|
||||
#define ONIGENC_IS_CODE_GRAPH(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
|
||||
#define ONIGENC_IS_CODE_PRINT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
|
||||
#define ONIGENC_IS_CODE_ALNUM(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
|
||||
#define ONIGENC_IS_CODE_ALPHA(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
|
||||
#define ONIGENC_IS_CODE_LOWER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
|
||||
#define ONIGENC_IS_CODE_UPPER(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
|
||||
#define ONIGENC_IS_CODE_CNTRL(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
|
||||
#define ONIGENC_IS_CODE_PUNCT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
|
||||
#define ONIGENC_IS_CODE_SPACE(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
|
||||
#define ONIGENC_IS_CODE_BLANK(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
|
||||
#define ONIGENC_IS_CODE_DIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
|
||||
#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
|
||||
#define ONIGENC_IS_CODE_WORD(enc,code) \
|
||||
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
|
||||
|
||||
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
|
||||
(enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
|
||||
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n));
|
||||
|
||||
|
||||
/* encoding API */
|
||||
ONIG_EXTERN
|
||||
int onigenc_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
|
||||
PUREFUNC(ONIG_EXTERN OnigEncoding onigenc_get_default_encoding P_((void)));
|
||||
PUREFUNC(ONIG_EXTERN void onigenc_set_default_caseconv_table P_((const OnigUChar* table)));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
|
||||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
|
||||
/* constants */
|
||||
#define ONIG_MAX_ERROR_MESSAGE_LEN 90
|
||||
|
||||
typedef unsigned int OnigOptionType;
|
||||
|
||||
#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
|
||||
|
||||
/* options */
|
||||
#define ONIG_OPTION_NONE 0U
|
||||
#define ONIG_OPTION_IGNORECASE 1U
|
||||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||||
#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
|
||||
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
|
||||
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
|
||||
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
|
||||
/* options (ctype range) */
|
||||
#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_POSIX_REGION << 1)
|
||||
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
|
||||
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
|
||||
/* options (newline) */
|
||||
#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
|
||||
#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NEWLINE_CRLF << 1)
|
||||
#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOTEOS /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
|
||||
|
||||
/* syntax */
|
||||
typedef struct {
|
||||
unsigned int op;
|
||||
unsigned int op2;
|
||||
unsigned int behavior;
|
||||
OnigOptionType options; /* default option */
|
||||
OnigMetaCharTableType meta_char_table;
|
||||
} OnigSyntaxType;
|
||||
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58_NG;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
|
||||
ONIG_EXTERN const OnigSyntaxType OnigSyntaxPython;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
|
||||
#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
|
||||
#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
|
||||
#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
|
||||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
|
||||
#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
|
||||
|
||||
/* default syntax */
|
||||
ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
/* syntax (operators) */
|
||||
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
|
||||
#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
|
||||
#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
|
||||
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
|
||||
#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
|
||||
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
|
||||
#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
|
||||
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
|
||||
#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
|
||||
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
|
||||
#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
|
||||
#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
|
||||
#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
|
||||
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
|
||||
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
|
||||
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
|
||||
#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
|
||||
#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
|
||||
#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
|
||||
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
|
||||
#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
|
||||
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
|
||||
#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
|
||||
#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
|
||||
#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
|
||||
#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
|
||||
#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
|
||||
#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
|
||||
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
|
||||
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
|
||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
|
||||
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */ /* NOTIMPL */
|
||||
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
|
||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
|
||||
#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
|
||||
#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
|
||||
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
|
||||
#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
|
||||
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
|
||||
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
|
||||
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
|
||||
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
|
||||
#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
|
||||
#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
|
||||
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
|
||||
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
|
||||
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
|
||||
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
|
||||
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X as (?>\P{M}\p{M}*) */
|
||||
#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
|
||||
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
|
||||
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
|
||||
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
|
||||
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
|
||||
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
|
||||
#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
|
||||
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
|
||||
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
|
||||
#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
|
||||
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
|
||||
#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
|
||||
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
|
||||
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
|
||||
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
|
||||
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
|
||||
|
||||
/* syntax (behavior) in char class [...] */
|
||||
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
|
||||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
|
||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
|
||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
|
||||
/* syntax (behavior) warning */
|
||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
|
||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
|
||||
#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
|
||||
|
||||
/* meta character specifiers (onig_set_meta_char()) */
|
||||
#define ONIG_META_CHAR_ESCAPE 0
|
||||
#define ONIG_META_CHAR_ANYCHAR 1
|
||||
#define ONIG_META_CHAR_ANYTIME 2
|
||||
#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
|
||||
#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
|
||||
#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
|
||||
|
||||
#define ONIG_INEFFECTIVE_META_CHAR 0
|
||||
|
||||
/* error codes */
|
||||
#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
|
||||
/* normal return */
|
||||
#define ONIG_NORMAL 0
|
||||
#define ONIG_MISMATCH -1
|
||||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||||
|
||||
/* internal error */
|
||||
#define ONIGERR_MEMORY -5
|
||||
#define ONIGERR_TYPE_BUG -6
|
||||
#define ONIGERR_PARSER_BUG -11
|
||||
#define ONIGERR_STACK_BUG -12
|
||||
#define ONIGERR_UNDEFINED_BYTECODE -13
|
||||
#define ONIGERR_UNEXPECTED_BYTECODE -14
|
||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
|
||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
|
||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
|
||||
/* general error */
|
||||
#define ONIGERR_INVALID_ARGUMENT -30
|
||||
/* syntax error */
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
|
||||
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
|
||||
#define ONIGERR_EMPTY_CHAR_CLASS -102
|
||||
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
|
||||
#define ONIGERR_END_PATTERN_AT_ESCAPE -104
|
||||
#define ONIGERR_END_PATTERN_AT_META -105
|
||||
#define ONIGERR_END_PATTERN_AT_CONTROL -106
|
||||
#define ONIGERR_META_CODE_SYNTAX -108
|
||||
#define ONIGERR_CONTROL_CODE_SYNTAX -109
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
|
||||
#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
|
||||
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
|
||||
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
|
||||
#define ONIGERR_NESTED_REPEAT_OPERATOR -115
|
||||
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
|
||||
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
|
||||
#define ONIGERR_END_PATTERN_IN_GROUP -118
|
||||
#define ONIGERR_UNDEFINED_GROUP_OPTION -119
|
||||
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
|
||||
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
|
||||
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
|
||||
#define ONIGERR_INVALID_CONDITION_PATTERN -124
|
||||
/* values error (syntax error) */
|
||||
#define ONIGERR_TOO_BIG_NUMBER -200
|
||||
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
|
||||
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
|
||||
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
|
||||
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
|
||||
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
|
||||
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
|
||||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
|
||||
#define ONIGERR_INVALID_BACKREF -208
|
||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
|
||||
#define ONIGERR_TOO_SHORT_DIGITS -210
|
||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
|
||||
#define ONIGERR_EMPTY_GROUP_NAME -214
|
||||
#define ONIGERR_INVALID_GROUP_NAME -215
|
||||
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
|
||||
#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
|
||||
#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
|
||||
#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
|
||||
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
|
||||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -224
|
||||
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
|
||||
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
|
||||
|
||||
/* errors related to thread */
|
||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
|
||||
|
||||
|
||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
|
||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
||||
typedef struct OnigCaptureTreeNodeStruct {
|
||||
int group; /* group number */
|
||||
OnigPosition beg;
|
||||
OnigPosition end;
|
||||
int allocated;
|
||||
int num_childs;
|
||||
struct OnigCaptureTreeNodeStruct** childs;
|
||||
} OnigCaptureTreeNode;
|
||||
|
||||
/* match result region type */
|
||||
struct re_registers {
|
||||
int allocated;
|
||||
int num_regs;
|
||||
OnigPosition* beg;
|
||||
OnigPosition* end;
|
||||
/* extended */
|
||||
OnigCaptureTreeNode* history_root; /* capture history tree root */
|
||||
};
|
||||
|
||||
/* capture tree traverse */
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
|
||||
#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
|
||||
( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
|
||||
|
||||
|
||||
#define ONIG_REGION_NOTPOS -1
|
||||
|
||||
typedef struct re_registers OnigRegion;
|
||||
|
||||
typedef struct {
|
||||
OnigEncoding enc;
|
||||
OnigUChar* par;
|
||||
OnigUChar* par_end;
|
||||
} OnigErrorInfo;
|
||||
|
||||
typedef struct {
|
||||
int lower;
|
||||
int upper;
|
||||
} OnigRepeatRange;
|
||||
|
||||
typedef void (*OnigWarnFunc) P_((const char* s));
|
||||
extern void onig_null_warn P_((const char* s));
|
||||
#define ONIG_NULL_WARN onig_null_warn
|
||||
|
||||
#define ONIG_CHAR_TABLE_SIZE 256
|
||||
|
||||
/* regex_t state */
|
||||
#define ONIG_STATE_NORMAL 0
|
||||
#define ONIG_STATE_SEARCHING 1
|
||||
#define ONIG_STATE_COMPILING -1
|
||||
#define ONIG_STATE_MODIFY -2
|
||||
|
||||
#define ONIG_STATE(reg) \
|
||||
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int state; /* normal, searching, compiling */
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* combination explosion check */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
|
||||
OnigOptionType options;
|
||||
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
const OnigSyntaxType* syntax;
|
||||
void* name_table;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigDistance dmin; /* min-distance of exact or map */
|
||||
OnigDistance dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
} OnigRegexType;
|
||||
|
||||
typedef OnigRegexType* OnigRegex;
|
||||
|
||||
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
|
||||
typedef OnigRegexType regex_t;
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {
|
||||
int num_of_elements;
|
||||
OnigEncoding pattern_enc;
|
||||
OnigEncoding target_enc;
|
||||
const OnigSyntaxType* syntax;
|
||||
OnigOptionType option;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
/* Oniguruma Native API */
|
||||
ONIG_EXTERN
|
||||
int onig_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_error_code_to_str PV_((OnigUChar* s, OnigPosition err_code, ...));
|
||||
ONIG_EXTERN
|
||||
void onig_set_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
void onig_free P_((OnigRegex));
|
||||
ONIG_EXTERN
|
||||
void onig_free_body P_((OnigRegex));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new P_((void));
|
||||
ONIG_EXTERN
|
||||
void onig_region_init P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
void onig_region_free P_((OnigRegion* region, int free_self));
|
||||
ONIG_EXTERN
|
||||
void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
|
||||
ONIG_EXTERN
|
||||
void onig_region_clear P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_region_resize P_((OnigRegion* region, int n));
|
||||
ONIG_EXTERN
|
||||
int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
|
||||
ONIG_EXTERN
|
||||
int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
|
||||
ONIG_EXTERN
|
||||
int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_names P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_captures P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_number_of_capture_histories P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
|
||||
ONIG_EXTERN
|
||||
int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg));
|
||||
ONIG_EXTERN
|
||||
int onig_noname_group_capture_is_active P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onig_get_encoding P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_options P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
const OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_syntax P_((const OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
|
||||
ONIG_EXTERN
|
||||
void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
|
||||
ONIG_EXTERN
|
||||
int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
|
||||
ONIG_EXTERN
|
||||
void onig_copy_encoding P_((OnigEncodingType *to, OnigEncoding from));
|
||||
ONIG_EXTERN
|
||||
OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_match_stack_limit_size P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_match_stack_limit_size P_((unsigned int size));
|
||||
ONIG_EXTERN
|
||||
int onig_end P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_version P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_copyright P_((void));
|
||||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if 0
|
||||
{ /* satisfy cc-mode */
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#define ONIGURUMA_VERSION_MAJOR ONIGMO_VERSION_MAJOR
|
||||
#define ONIGURUMA_VERSION_MINOR ONIGMO_VERSION_MINOR
|
||||
#define ONIGURUMA_VERSION_TEENY ONIGMO_VERSION_TEENY
|
||||
#endif /* ONIGURUMA_H */
|
||||
|
|
3
re.c
3
re.c
|
@ -847,7 +847,7 @@ onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_e
|
|||
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
||||
if (r) goto err;
|
||||
|
||||
r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
|
||||
r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
|
||||
if (r) {
|
||||
err:
|
||||
onig_free(*reg);
|
||||
|
@ -3908,7 +3908,6 @@ Init_Regexp(void)
|
|||
{
|
||||
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
|
||||
|
||||
onigenc_set_default_caseconv_table((UChar*)casetable);
|
||||
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
||||
onig_set_warn_func(re_warn);
|
||||
onig_set_verb_warn_func(re_warn);
|
||||
|
|
583
regcomp.c
583
regcomp.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
67
regenc.c
67
regenc.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -364,12 +364,14 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
|
|||
};
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
extern void
|
||||
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
|
||||
{
|
||||
/* nothing */
|
||||
/* obsoleted. */
|
||||
}
|
||||
#endif
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
|
||||
|
@ -631,8 +633,10 @@ onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding e
|
|||
extern int
|
||||
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
|
||||
{
|
||||
#ifdef RUBY
|
||||
if (code > 0xff)
|
||||
rb_raise(rb_eRangeError, "%u out of char range", code);
|
||||
#endif
|
||||
*buf = (UChar )(code & 0xff);
|
||||
return 1;
|
||||
}
|
||||
|
@ -892,6 +896,7 @@ onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
|
|||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* Property management */
|
||||
static int
|
||||
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
|
||||
|
@ -944,68 +949,64 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
|
|||
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern int
|
||||
onigenc_property_list_init(int (*f)(void))
|
||||
{
|
||||
int r;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
r = f();
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
return r;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
|
||||
OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
|
||||
onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
|
||||
OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
int codepoint_length;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
|
||||
if (codepoint_length < 0)
|
||||
return codepoint_length; /* encoding invalid */
|
||||
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
|
||||
*pp += codepoint_length;
|
||||
|
||||
if (code>='a' && code<='z' && (flags&ONIGENC_CASE_UPCASE))
|
||||
flags |= ONIGENC_CASE_MODIFIED, code += 'A'-'a';
|
||||
else if (code>='A' && code<='Z' && (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)))
|
||||
flags |= ONIGENC_CASE_MODIFIED, code += 'a'-'A';
|
||||
if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code += 'A' - 'a';
|
||||
} else if (code >= 'A' && code <= 'Z' &&
|
||||
(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code += 'a' - 'A';
|
||||
}
|
||||
to += ONIGENC_CODE_TO_MBC(enc, code, to);
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_single_byte_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
|
||||
const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
|
||||
const struct OnigEncodingTypeST* enc)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
OnigUChar *to_start = to;
|
||||
OnigCaseFoldType flags = *flagP;
|
||||
|
||||
while (*pp<end && to<to_end) {
|
||||
while (*pp < end && to < to_end) {
|
||||
code = *(*pp)++;
|
||||
|
||||
if (code>='a' && code<='z' && (flags&ONIGENC_CASE_UPCASE))
|
||||
flags |= ONIGENC_CASE_MODIFIED, code += 'A'-'a';
|
||||
else if (code>='A' && code<='Z' && (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)))
|
||||
flags |= ONIGENC_CASE_MODIFIED, code += 'a'-'A';
|
||||
if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code += 'A' - 'a';
|
||||
} else if (code >= 'A' && code <= 'Z' &&
|
||||
(flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
|
||||
flags |= ONIGENC_CASE_MODIFIED;
|
||||
code += 'a' - 'A';
|
||||
}
|
||||
*to++ = code;
|
||||
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
|
||||
flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
|
||||
flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
|
||||
}
|
||||
*flagP = flags;
|
||||
return (int)(to-to_start);
|
||||
return (int )(to - to_start);
|
||||
}
|
||||
|
|
136
regenc.h
136
regenc.h
|
@ -1,11 +1,11 @@
|
|||
#ifndef ONIGURUMA_REGENC_H
|
||||
#define ONIGURUMA_REGENC_H
|
||||
#ifndef ONIGMO_REGENC_H
|
||||
#define ONIGMO_REGENC_H
|
||||
/**********************************************************************
|
||||
regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -30,18 +30,32 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef REGINT_H
|
||||
#ifndef RUBY_EXTERN
|
||||
#include "ruby/config.h"
|
||||
#include "ruby/defines.h"
|
||||
#endif
|
||||
#if !defined(RUBY) && (defined(RUBY_EXPORT) || defined(ONIG_ENC_REGISTER))
|
||||
# define RUBY
|
||||
#endif
|
||||
#ifdef RUBY
|
||||
# ifndef ONIGMO_REGINT_H
|
||||
# ifndef RUBY_EXTERN
|
||||
# include "ruby/config.h"
|
||||
# include "ruby/defines.h"
|
||||
# endif
|
||||
# endif
|
||||
#else /* RUBY */
|
||||
# ifndef PACKAGE
|
||||
/* PACKAGE is defined in config.h */
|
||||
# include "config.h"
|
||||
# endif
|
||||
#endif /* RUBY */
|
||||
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
# undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#include "ruby/oniguruma.h"
|
||||
#ifdef RUBY
|
||||
# include "ruby/onigmo.h"
|
||||
#else
|
||||
# include "onigmo.h"
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
||||
|
@ -52,23 +66,23 @@ typedef struct {
|
|||
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL ((void* )0)
|
||||
# define NULL ((void* )0)
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
# define TRUE 1
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
# define FALSE 0
|
||||
#endif
|
||||
|
||||
#ifndef ARG_UNUSED
|
||||
#if defined(__GNUC__)
|
||||
# if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# else
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
|
||||
|
@ -111,7 +125,7 @@ typedef struct {
|
|||
{(short int )(sizeof(name) - 1), (name), (ctype)}
|
||||
|
||||
#ifndef numberof
|
||||
#define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
|
||||
# define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -125,48 +139,48 @@ typedef struct {
|
|||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
/* for encoding system implementation (internal) */
|
||||
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
|
||||
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc));
|
||||
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)));
|
||||
PUREFUNC(ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)));
|
||||
ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
|
||||
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
|
||||
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
|
||||
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
|
||||
|
||||
|
||||
/* methods for single byte encoding */
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p, const UChar* e, OnigEncoding enc)));
|
||||
PUREFUNC(ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end, OnigEncoding enc)));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf, OnigEncoding enc));
|
||||
CONSTFUNC(ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc)));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc)));
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc);
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
|
||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
|
||||
|
||||
/* methods for multi byte encoding */
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
CONSTFUNC(ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end);
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower);
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
|
||||
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
|
||||
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
|
||||
ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
|
||||
ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
|
||||
|
||||
ONIG_EXTERN int onigenc_unicode_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
|
||||
ONIG_EXTERN int onigenc_unicode_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
|
||||
|
||||
|
||||
/* in enc/unicode.c */
|
||||
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
|
||||
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
|
||||
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
|
||||
ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
|
||||
ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]);
|
||||
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold);
|
||||
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
|
||||
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
|
@ -182,14 +196,14 @@ ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
|
|||
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
|
||||
|
||||
ONIG_EXTERN int
|
||||
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
|
||||
onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
|
||||
ONIG_EXTERN int
|
||||
onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
|
||||
onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
|
||||
ONIG_EXTERN UChar*
|
||||
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
|
||||
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n);
|
||||
|
||||
/* defined in regexec.c, but used in enc/xxx.c */
|
||||
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
|
||||
extern int onig_is_in_code_range(const UChar* p, OnigCodePoint code);
|
||||
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
|
||||
|
@ -212,9 +226,9 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
|
|||
|
||||
#ifdef ONIG_ENC_REGISTER
|
||||
extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
|
||||
#define OnigEncodingName(n) encoding_##n
|
||||
#define OnigEncodingDeclare(n) static const OnigEncodingType OnigEncodingName(n)
|
||||
#define OnigEncodingDefine(f,n) \
|
||||
# define OnigEncodingName(n) encoding_##n
|
||||
# define OnigEncodingDeclare(n) static const OnigEncodingType OnigEncodingName(n)
|
||||
# define OnigEncodingDefine(f,n) \
|
||||
OnigEncodingDeclare(n); \
|
||||
void Init_##f(void) { \
|
||||
ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
|
||||
|
@ -222,9 +236,9 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
|
|||
} \
|
||||
OnigEncodingDeclare(n)
|
||||
#else
|
||||
#define OnigEncodingName(n) OnigEncoding##n
|
||||
#define OnigEncodingDeclare(n) const OnigEncodingType OnigEncodingName(n)
|
||||
#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
|
||||
# define OnigEncodingName(n) OnigEncoding##n
|
||||
# define OnigEncodingDeclare(n) const OnigEncodingType OnigEncodingName(n)
|
||||
# define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
|
||||
#endif
|
||||
|
||||
/* macros for define replica encoding and encoding alias */
|
||||
|
@ -234,4 +248,4 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
|
|||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
#endif /* ONIGURUMA_REGENC_H */
|
||||
#endif /* ONIGMO_REGENC_H */
|
||||
|
|
65
regerror.c
65
regerror.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -31,13 +31,7 @@
|
|||
#include "regint.h"
|
||||
#include <stdio.h> /* for vsnprintf() */
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
#include <stdarg.h>
|
||||
#define va_init_list(a,b) va_start(a,b)
|
||||
#else
|
||||
#include <varargs.h>
|
||||
#define va_init_list(a,b) va_start(a)
|
||||
#endif
|
||||
|
||||
extern UChar*
|
||||
onig_error_code_to_format(OnigPosition code)
|
||||
|
@ -65,6 +59,8 @@ onig_error_code_to_format(OnigPosition code)
|
|||
p = "unexpected bytecode (bug)"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
|
||||
p = "parse depth limit over"; break;
|
||||
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
|
||||
p = "default multibyte-encoding is not set"; break;
|
||||
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
|
||||
|
@ -179,8 +175,6 @@ onig_error_code_to_format(OnigPosition code)
|
|||
p = "not supported encoding combination"; break;
|
||||
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
|
||||
p = "invalid combination of options"; break;
|
||||
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
|
||||
p = "over thread pass limit count"; break;
|
||||
|
||||
default:
|
||||
p = "undefined error code"; break;
|
||||
|
@ -191,12 +185,12 @@ onig_error_code_to_format(OnigPosition code)
|
|||
|
||||
static void sprint_byte(char* s, unsigned int v)
|
||||
{
|
||||
sprintf(s, "%02x", (v & 0377));
|
||||
xsnprintf(s, 3, "%02x", (v & 0377));
|
||||
}
|
||||
|
||||
static void sprint_byte_with_x(char* s, unsigned int v)
|
||||
{
|
||||
sprintf(s, "\\x%02x", (v & 0377));
|
||||
xsnprintf(s, 5, "\\x%02x", (v & 0377));
|
||||
}
|
||||
|
||||
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
||||
|
@ -252,14 +246,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
|||
#define MAX_ERROR_PAR_LEN 30
|
||||
|
||||
extern int
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_error_code_to_str(UChar* s, OnigPosition code, ...)
|
||||
#else
|
||||
onig_error_code_to_str(s, code, va_alist)
|
||||
UChar* s;
|
||||
OnigPosition code;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
UChar *p, *q;
|
||||
OnigErrorInfo* einfo;
|
||||
|
@ -268,7 +255,7 @@ onig_error_code_to_str(s, code, va_alist)
|
|||
UChar parbuf[MAX_ERROR_PAR_LEN];
|
||||
va_list vargs;
|
||||
|
||||
va_init_list(vargs, code);
|
||||
va_start(vargs, code);
|
||||
|
||||
switch (code) {
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
|
@ -337,26 +324,17 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
|||
need = (pat_end - pat) * 4 + 4;
|
||||
|
||||
if (n + need < (size_t )bufsize) {
|
||||
strcat((char* )buf, ": /");
|
||||
xstrcat((char* )buf, ": /", bufsize);
|
||||
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
|
||||
|
||||
p = pat;
|
||||
while (p < pat_end) {
|
||||
if (*p == '\\') {
|
||||
*s++ = *p++;
|
||||
len = enclen(enc, p, pat_end);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = (unsigned char )'\\';
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
|
||||
if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
|
||||
len = enclen(enc, p, pat_end);
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) {
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else { /* for UTF16 */
|
||||
else { /* for UTF16/32 */
|
||||
int blen;
|
||||
|
||||
while (len-- > 0) {
|
||||
|
@ -367,6 +345,15 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (*p == '\\') {
|
||||
*s++ = *p++;
|
||||
len = enclen(enc, p, pat_end);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = (unsigned char )'\\';
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
|
||||
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
|
||||
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
|
||||
|
@ -384,25 +371,15 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
|||
}
|
||||
}
|
||||
|
||||
#if 0 /* unused */
|
||||
void
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
||||
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
|
||||
#else
|
||||
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
||||
UChar buf[];
|
||||
int bufsize;
|
||||
OnigEncoding enc;
|
||||
UChar* pat;
|
||||
UChar* pat_end;
|
||||
const UChar *fmt;
|
||||
va_dcl
|
||||
#endif
|
||||
{
|
||||
va_list args;
|
||||
va_init_list(args, fmt);
|
||||
va_start(args, fmt);
|
||||
onig_vsnprintf_with_pattern(buf, bufsize, enc,
|
||||
pat, pat_end, fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
885
regexec.c
885
regexec.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
410
regint.h
410
regint.h
|
@ -1,11 +1,11 @@
|
|||
#ifndef ONIGURUMA_REGINT_H
|
||||
#define ONIGURUMA_REGINT_H
|
||||
#ifndef ONIGMO_REGINT_H
|
||||
#define ONIGMO_REGINT_H
|
||||
/**********************************************************************
|
||||
regint.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -35,6 +35,7 @@
|
|||
/* #define ONIG_DEBUG_COMPILE */
|
||||
/* #define ONIG_DEBUG_SEARCH */
|
||||
/* #define ONIG_DEBUG_MATCH */
|
||||
/* #define ONIG_DEBUG_MEMLEAK */
|
||||
/* #define ONIG_DONT_OPTIMIZE */
|
||||
|
||||
/* for byte-code statistical data. */
|
||||
|
@ -42,25 +43,25 @@
|
|||
|
||||
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
|
||||
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
|
||||
defined(ONIG_DEBUG_STATISTICS)
|
||||
#ifndef ONIG_DEBUG
|
||||
#define ONIG_DEBUG
|
||||
#endif
|
||||
defined(ONIG_DEBUG_STATISTICS) || defined(ONIG_DEBUG_MEMLEAK)
|
||||
# ifndef ONIG_DEBUG
|
||||
# define ONIG_DEBUG
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef UNALIGNED_WORD_ACCESS
|
||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
|
||||
defined(__powerpc64__) || \
|
||||
defined(__mc68020__)
|
||||
#define UNALIGNED_WORD_ACCESS 1
|
||||
#else
|
||||
#define UNALIGNED_WORD_ACCESS 0
|
||||
#endif
|
||||
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
|
||||
defined(__powerpc64__) || \
|
||||
defined(__mc68020__)
|
||||
# define UNALIGNED_WORD_ACCESS 1
|
||||
# else
|
||||
# define UNALIGNED_WORD_ACCESS 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if UNALIGNED_WORD_ACCESS
|
||||
#define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
# define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#endif
|
||||
|
||||
/* config */
|
||||
|
@ -73,213 +74,163 @@
|
|||
#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
/* #define USE_RECOMPILE_API */
|
||||
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
#define USE_NO_INVALID_QUANTIFIER
|
||||
|
||||
/* internal config */
|
||||
#define USE_PARSE_TREE_NODE_RECYCLE
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
/* #define USE_OP_PUSH_OR_JUMP_EXACT */
|
||||
#define USE_QTFR_PEEK_NEXT
|
||||
#define USE_ST_LIBRARY
|
||||
#define USE_SHARED_CCLASS_TABLE
|
||||
#define USE_SUNDAY_QUICK_SEARCH
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
|
||||
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
|
||||
|
||||
#define OPT_EXACT_MAXLEN 24
|
||||
|
||||
/* check config */
|
||||
#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
|
||||
#if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
|
||||
#error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
|
||||
#endif
|
||||
# if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
|
||||
# error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
# define ARG_UNUSED
|
||||
# define ARG_UNUSED
|
||||
#endif
|
||||
|
||||
#ifndef RUBY_DEFINES_H
|
||||
#include "ruby/ruby.h"
|
||||
#undef xmalloc
|
||||
#undef xrealloc
|
||||
#undef xcalloc
|
||||
#undef xfree
|
||||
#if !defined(RUBY) && defined(RUBY_EXPORT)
|
||||
# define RUBY
|
||||
#endif
|
||||
#ifdef RUBY
|
||||
# ifndef RUBY_DEFINES_H
|
||||
# include "ruby/ruby.h"
|
||||
# undef xmalloc
|
||||
# undef xrealloc
|
||||
# undef xcalloc
|
||||
# undef xfree
|
||||
# endif
|
||||
#else /* RUBY */
|
||||
# include "config.h"
|
||||
# if SIZEOF_LONG_LONG > 0
|
||||
# define LONG_LONG long long
|
||||
# endif
|
||||
#endif /* RUBY */
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
/* */
|
||||
/* escape other system UChar definition */
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
# undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
|
||||
#undef USE_CAPTURE_HISTORY
|
||||
#ifdef RUBY
|
||||
# undef USE_CAPTURE_HISTORY
|
||||
#else
|
||||
# define USE_CAPTURE_HISTORY
|
||||
#endif
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION /* needed for POSIX API support */
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
|
||||
|
||||
/* multithread config */
|
||||
/* #define USE_MULTI_THREAD_SYSTEM */
|
||||
/* #define USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#if defined(USE_MULTI_THREAD_SYSTEM) \
|
||||
&& defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
extern CRITICAL_SECTION gOnigMutex;
|
||||
#define THREAD_SYSTEM_INIT InitializeCriticalSection(&gOnigMutex)
|
||||
#define THREAD_SYSTEM_END DeleteCriticalSection(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_START EnterCriticalSection(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_END LeaveCriticalSection(&gOnigMutex)
|
||||
#define THREAD_PASS Sleep(0)
|
||||
#else /* _WIN32 */
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
extern pthread_mutex_t gOnigMutex;
|
||||
#define THREAD_SYSTEM_INIT pthread_mutex_init(&gOnigMutex, NULL)
|
||||
#define THREAD_SYSTEM_END pthread_mutex_destroy(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_START pthread_mutex_lock(&gOnigMutex)
|
||||
#define THREAD_ATOMIC_END pthread_mutex_unlock(&gOnigMutex)
|
||||
#define THREAD_PASS sched_yield()
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#else /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifndef THREAD_SYSTEM_INIT
|
||||
#define THREAD_SYSTEM_INIT /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_SYSTEM_END
|
||||
#define THREAD_SYSTEM_END /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_ATOMIC_START
|
||||
#define THREAD_ATOMIC_START /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_ATOMIC_END
|
||||
#define THREAD_ATOMIC_END /* depend on thread system */
|
||||
#endif
|
||||
#ifndef THREAD_PASS
|
||||
#define THREAD_PASS /* depend on thread system */
|
||||
#endif
|
||||
|
||||
#endif /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifndef xmalloc
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xcalloc calloc
|
||||
#define xfree free
|
||||
# define xmalloc malloc
|
||||
# define xrealloc realloc
|
||||
# define xcalloc calloc
|
||||
# define xfree free
|
||||
#endif
|
||||
|
||||
#ifdef RUBY
|
||||
|
||||
#define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints()
|
||||
#define onig_st_init_table st_init_table
|
||||
#define onig_st_init_table_with_size st_init_table_with_size
|
||||
#define onig_st_init_numtable st_init_numtable
|
||||
#define onig_st_init_numtable_with_size st_init_numtable_with_size
|
||||
#define onig_st_init_strtable st_init_strtable
|
||||
#define onig_st_init_strtable_with_size st_init_strtable_with_size
|
||||
#define onig_st_delete st_delete
|
||||
#define onig_st_delete_safe st_delete_safe
|
||||
#define onig_st_insert st_insert
|
||||
#define onig_st_lookup st_lookup
|
||||
#define onig_st_foreach st_foreach
|
||||
#define onig_st_add_direct st_add_direct
|
||||
#define onig_st_free_table st_free_table
|
||||
#define onig_st_cleanup_safe st_cleanup_safe
|
||||
#define onig_st_copy st_copy
|
||||
#define onig_st_nothing_key_clone st_nothing_key_clone
|
||||
#define onig_st_nothing_key_free st_nothing_key_free
|
||||
#define onig_st_is_member st_is_member
|
||||
# define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints()
|
||||
# define onig_st_init_table st_init_table
|
||||
# define onig_st_init_table_with_size st_init_table_with_size
|
||||
# define onig_st_init_numtable st_init_numtable
|
||||
# define onig_st_init_numtable_with_size st_init_numtable_with_size
|
||||
# define onig_st_init_strtable st_init_strtable
|
||||
# define onig_st_init_strtable_with_size st_init_strtable_with_size
|
||||
# define onig_st_delete st_delete
|
||||
# define onig_st_delete_safe st_delete_safe
|
||||
# define onig_st_insert st_insert
|
||||
# define onig_st_lookup st_lookup
|
||||
# define onig_st_foreach st_foreach
|
||||
# define onig_st_add_direct st_add_direct
|
||||
# define onig_st_free_table st_free_table
|
||||
# define onig_st_cleanup_safe st_cleanup_safe
|
||||
# define onig_st_copy st_copy
|
||||
# define onig_st_nothing_key_clone st_nothing_key_clone
|
||||
# define onig_st_nothing_key_free st_nothing_key_free
|
||||
# define onig_st_is_member st_is_member
|
||||
|
||||
#define USE_UPPER_CASE_TABLE
|
||||
#else
|
||||
# define USE_UPPER_CASE_TABLE
|
||||
#else /* RUBY */
|
||||
|
||||
#define CHECK_INTERRUPT_IN_MATCH_AT
|
||||
# define CHECK_INTERRUPT_IN_MATCH_AT
|
||||
|
||||
#define st_init_table onig_st_init_table
|
||||
#define st_init_table_with_size onig_st_init_table_with_size
|
||||
#define st_init_numtable onig_st_init_numtable
|
||||
#define st_init_numtable_with_size onig_st_init_numtable_with_size
|
||||
#define st_init_strtable onig_st_init_strtable
|
||||
#define st_init_strtable_with_size onig_st_init_strtable_with_size
|
||||
#define st_delete onig_st_delete
|
||||
#define st_delete_safe onig_st_delete_safe
|
||||
#define st_insert onig_st_insert
|
||||
#define st_lookup onig_st_lookup
|
||||
#define st_foreach onig_st_foreach
|
||||
#define st_add_direct onig_st_add_direct
|
||||
#define st_free_table onig_st_free_table
|
||||
#define st_cleanup_safe onig_st_cleanup_safe
|
||||
#define st_copy onig_st_copy
|
||||
#define st_nothing_key_clone onig_st_nothing_key_clone
|
||||
#define st_nothing_key_free onig_st_nothing_key_free
|
||||
# define st_init_table onig_st_init_table
|
||||
# define st_init_table_with_size onig_st_init_table_with_size
|
||||
# define st_init_numtable onig_st_init_numtable
|
||||
# define st_init_numtable_with_size onig_st_init_numtable_with_size
|
||||
# define st_init_strtable onig_st_init_strtable
|
||||
# define st_init_strtable_with_size onig_st_init_strtable_with_size
|
||||
# define st_delete onig_st_delete
|
||||
# define st_delete_safe onig_st_delete_safe
|
||||
# define st_insert onig_st_insert
|
||||
# define st_lookup onig_st_lookup
|
||||
# define st_foreach onig_st_foreach
|
||||
# define st_add_direct onig_st_add_direct
|
||||
# define st_free_table onig_st_free_table
|
||||
# define st_cleanup_safe onig_st_cleanup_safe
|
||||
# define st_copy onig_st_copy
|
||||
# define st_nothing_key_clone onig_st_nothing_key_clone
|
||||
# define st_nothing_key_free onig_st_nothing_key_free
|
||||
/* */
|
||||
#define onig_st_is_member st_is_member
|
||||
# define onig_st_is_member st_is_member
|
||||
|
||||
#endif
|
||||
#endif /* RUBY */
|
||||
|
||||
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
|
||||
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
|
||||
|
||||
#define THREAD_PASS_LIMIT_COUNT 8
|
||||
#define xmemset memset
|
||||
#define xmemcpy memcpy
|
||||
#define xmemmove memmove
|
||||
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#define xalloca _alloca
|
||||
#define xvsnprintf _vsnprintf
|
||||
# define xalloca _alloca
|
||||
# define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args)
|
||||
# define xsnprintf sprintf_s
|
||||
# define xstrcat(dest,src,size) strcat_s(dest,size,src)
|
||||
#else
|
||||
#define xalloca alloca
|
||||
#define xvsnprintf vsnprintf
|
||||
# define xalloca alloca
|
||||
# define xvsnprintf vsnprintf
|
||||
# define xsnprintf snprintf
|
||||
# define xstrcat(dest,src,size) strcat(dest,src)
|
||||
#endif
|
||||
|
||||
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
|
||||
# define _CRTDBG_MAP_ALLOC
|
||||
# include <malloc.h>
|
||||
# include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
|
||||
#define ONIG_STATE_INC(reg) (reg)->state++
|
||||
#define ONIG_STATE_DEC(reg) (reg)->state--
|
||||
|
||||
#define ONIG_STATE_INC_THREAD(reg) do {\
|
||||
THREAD_ATOMIC_START;\
|
||||
(reg)->state++;\
|
||||
THREAD_ATOMIC_END;\
|
||||
} while(0)
|
||||
#define ONIG_STATE_DEC_THREAD(reg) do {\
|
||||
THREAD_ATOMIC_START;\
|
||||
(reg)->state--;\
|
||||
THREAD_ATOMIC_END;\
|
||||
} while(0)
|
||||
#else
|
||||
#define ONIG_STATE_INC(reg) /* Nothing */
|
||||
#define ONIG_STATE_DEC(reg) /* Nothing */
|
||||
#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
|
||||
#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
|
||||
#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
|
||||
#include <alloca.h>
|
||||
# include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
# include <sys/types.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
|
@ -290,12 +241,10 @@ extern pthread_mutex_t gOnigMutex;
|
|||
# include <inttypes.h>
|
||||
#endif
|
||||
|
||||
#ifdef STDC_HEADERS
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h> /* for alloca() */
|
||||
# include <malloc.h> /* for alloca() */
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
@ -303,28 +252,32 @@ extern pthread_mutex_t gOnigMutex;
|
|||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1300)
|
||||
#ifndef _INTPTR_T_DEFINED
|
||||
#define _INTPTR_T_DEFINED
|
||||
# if defined(_MSC_VER) && (_MSC_VER < 1300)
|
||||
# ifndef _INTPTR_T_DEFINED
|
||||
# define _INTPTR_T_DEFINED
|
||||
typedef int intptr_t;
|
||||
#endif
|
||||
#ifndef _UINTPTR_T_DEFINED
|
||||
#define _UINTPTR_T_DEFINED
|
||||
# endif
|
||||
# ifndef _UINTPTR_T_DEFINED
|
||||
# define _UINTPTR_T_DEFINED
|
||||
typedef unsigned int uintptr_t;
|
||||
#endif
|
||||
#endif
|
||||
# endif
|
||||
# endif
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#ifndef PRIdPTR
|
||||
#ifdef _WIN64
|
||||
#define PRIdPTR "I64d"
|
||||
#define PRIuPTR "I64u"
|
||||
#define PRIxPTR "I64x"
|
||||
#else
|
||||
#define PRIdPTR "ld"
|
||||
#define PRIuPTR "lu"
|
||||
#define PRIxPTR "lx"
|
||||
# ifdef _WIN64
|
||||
# define PRIdPTR "I64d"
|
||||
# define PRIuPTR "I64u"
|
||||
# define PRIxPTR "I64x"
|
||||
# else
|
||||
# define PRIdPTR "ld"
|
||||
# define PRIuPTR "lu"
|
||||
# define PRIxPTR "lx"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef PRIdPTRDIFF
|
||||
# define PRIdPTRDIFF PRIdPTR
|
||||
#endif
|
||||
|
||||
#include "regenc.h"
|
||||
|
@ -332,10 +285,10 @@ typedef unsigned int uintptr_t;
|
|||
RUBY_SYMBOL_EXPORT_BEGIN
|
||||
|
||||
#ifdef MIN
|
||||
#undef MIN
|
||||
# undef MIN
|
||||
#endif
|
||||
#ifdef MAX
|
||||
#undef MAX
|
||||
# undef MAX
|
||||
#endif
|
||||
#define MIN(a,b) (((a)>(b))?(b):(a))
|
||||
#define MAX(a,b) (((a)<(b))?(b):(a))
|
||||
|
@ -350,28 +303,28 @@ RUBY_SYMBOL_EXPORT_BEGIN
|
|||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
# define PLATFORM_GET_INC(val,p,type) do{\
|
||||
val = *(type* )p;\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
#define PLATFORM_GET_INC(val,p,type) do{\
|
||||
# define PLATFORM_GET_INC(val,p,type) do{\
|
||||
xmemcpy(&val, (p), sizeof(type));\
|
||||
(p) += sizeof(type);\
|
||||
} while(0)
|
||||
|
||||
/* sizeof(OnigCodePoint) */
|
||||
#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
|
||||
# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
|
||||
|
||||
#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
# define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
|
||||
(pad_size) = WORD_ALIGNMENT_SIZE \
|
||||
- ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
|
||||
} while (0)
|
||||
|
||||
#define ALIGNMENT_RIGHT(addr) do {\
|
||||
# define ALIGNMENT_RIGHT(addr) do {\
|
||||
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
|
||||
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
|
||||
} while (0)
|
||||
|
@ -435,7 +388,6 @@ typedef unsigned int BitStatusType;
|
|||
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
|
||||
#define IS_NOTBOS(option) ((option) & ONIG_OPTION_NOTBOS)
|
||||
#define IS_NOTEOS(option) ((option) & ONIG_OPTION_NOTEOS)
|
||||
#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
|
||||
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
|
||||
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
|
||||
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
|
||||
|
@ -618,7 +570,6 @@ enum OpCode {
|
|||
OP_CCLASS_NOT,
|
||||
OP_CCLASS_MB_NOT,
|
||||
OP_CCLASS_MIX_NOT,
|
||||
OP_CCLASS_NODE, /* pointer to CClassNode node */
|
||||
|
||||
OP_ANYCHAR, /* "." */
|
||||
OP_ANYCHAR_ML, /* "." multi-line */
|
||||
|
@ -781,10 +732,10 @@ typedef void* PointerType;
|
|||
#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
|
||||
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
# define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
# define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
# define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
|
||||
# define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
|
||||
#endif
|
||||
|
||||
#define MC_ESC(syn) (syn)->meta_char_table.esc
|
||||
|
@ -832,13 +783,10 @@ typedef void* PointerType;
|
|||
|
||||
/* cclass node */
|
||||
#define FLAG_NCCLASS_NOT (1<<0)
|
||||
#define FLAG_NCCLASS_SHARE (1<<1)
|
||||
|
||||
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
|
||||
#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
|
||||
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
|
||||
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
|
||||
#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
|
||||
|
||||
typedef struct {
|
||||
int type;
|
||||
|
@ -936,60 +884,44 @@ typedef struct {
|
|||
extern OnigOpInfoType OnigOpInfo[];
|
||||
|
||||
|
||||
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
|
||||
extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc);
|
||||
|
||||
#ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init P_((void));
|
||||
extern void onig_print_statistics P_((FILE* f));
|
||||
#endif
|
||||
# ifdef ONIG_DEBUG_STATISTICS
|
||||
extern void onig_statistics_init(void);
|
||||
extern void onig_print_statistics(FILE* f);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
extern UChar* onig_error_code_to_format P_((OnigPosition code));
|
||||
extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size));
|
||||
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline));
|
||||
extern void onig_chain_reduce P_((regex_t* reg));
|
||||
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
|
||||
extern void onig_transfer P_((regex_t* to, regex_t* from));
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
|
||||
extern UChar* onig_error_code_to_format(OnigPosition code);
|
||||
extern void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
|
||||
extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...);
|
||||
extern int onig_bbuf_init(BBuf* buf, OnigDistance size);
|
||||
extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo);
|
||||
#ifdef RUBY
|
||||
extern int onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline);
|
||||
#endif
|
||||
extern void onig_transfer(regex_t* to, regex_t* from);
|
||||
extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc);
|
||||
extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc);
|
||||
|
||||
/* strend hash */
|
||||
typedef void hash_table_type;
|
||||
#ifdef RUBY
|
||||
#include "ruby/st.h"
|
||||
typedef st_data_t hash_data_type;
|
||||
# include "ruby/st.h"
|
||||
#else
|
||||
#include "st.h"
|
||||
typedef uintptr_t hash_data_type;
|
||||
# include "st.h"
|
||||
#endif
|
||||
typedef st_data_t hash_data_type;
|
||||
|
||||
extern hash_table_type* onig_st_init_strend_table_with_size P_((st_index_t size));
|
||||
extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
|
||||
extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
|
||||
extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size);
|
||||
extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value);
|
||||
extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value);
|
||||
|
||||
/* encoding property management */
|
||||
#define PROPERTY_LIST_ADD_PROP(Name, CR) \
|
||||
r = onigenc_property_list_add_property((UChar* )Name, CR,\
|
||||
&PropertyNameTable, &PropertyList, &PropertyListNum,\
|
||||
&PropertyListSize);\
|
||||
if (r != 0) goto end
|
||||
|
||||
#define PROPERTY_LIST_INIT_CHECK \
|
||||
if (PropertyInited == 0) {\
|
||||
int r = onigenc_property_list_init(init_property_list);\
|
||||
if (r != 0) return r;\
|
||||
}
|
||||
|
||||
extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
|
||||
|
||||
typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
|
||||
|
||||
extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
|
||||
|
||||
extern size_t onig_memsize P_((const regex_t *reg));
|
||||
extern size_t onig_region_memsize P_((const struct re_registers *regs));
|
||||
#ifdef RUBY
|
||||
extern size_t onig_memsize(const regex_t *reg);
|
||||
extern size_t onig_region_memsize(const struct re_registers *regs);
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
#endif /* ONIGURUMA_REGINT_H */
|
||||
#endif /* ONIGMO_REGINT_H */
|
||||
|
|
1095
regparse.c
1095
regparse.c
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
56
regparse.h
56
regparse.h
|
@ -1,11 +1,11 @@
|
|||
#ifndef ONIGURUMA_REGPARSE_H
|
||||
#define ONIGURUMA_REGPARSE_H
|
||||
#ifndef ONIGMO_REGPARSE_H
|
||||
#define ONIGMO_REGPARSE_H
|
||||
/**********************************************************************
|
||||
regparse.h - Onigmo (Oniguruma-mod) (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -317,9 +317,12 @@ typedef struct {
|
|||
int curr_max_regnum;
|
||||
int has_recursion;
|
||||
#endif
|
||||
unsigned int parse_depth;
|
||||
int warnings_flag;
|
||||
#ifdef RUBY
|
||||
const char* sourcefile;
|
||||
int sourceline;
|
||||
#endif
|
||||
} ScanEnv;
|
||||
|
||||
|
||||
|
@ -332,36 +335,35 @@ typedef struct {
|
|||
int new_val;
|
||||
} GroupNumRemap;
|
||||
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map);
|
||||
#endif
|
||||
|
||||
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
||||
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
||||
extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
|
||||
extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
|
||||
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
||||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_enclose P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern Node* onig_node_list_add P_((Node* list, Node* x));
|
||||
extern Node* onig_node_new_alt P_((Node* left, Node* right));
|
||||
extern void onig_node_str_clear P_((Node* node));
|
||||
extern int onig_free_node_list P_((void));
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_free_shared_cclass_table P_((void));
|
||||
extern int onig_strncmp(const UChar* s1, const UChar* s2, int n);
|
||||
extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end);
|
||||
extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end);
|
||||
extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc);
|
||||
extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode);
|
||||
extern void onig_node_conv_to_str_node(Node* node, int raw);
|
||||
extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end);
|
||||
extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end);
|
||||
extern void onig_node_free(Node* node);
|
||||
extern Node* onig_node_new_enclose(int type);
|
||||
extern Node* onig_node_new_anchor(int type);
|
||||
extern Node* onig_node_new_str(const UChar* s, const UChar* end);
|
||||
extern Node* onig_node_new_list(Node* left, Node* right);
|
||||
extern Node* onig_node_list_add(Node* list, Node* x);
|
||||
extern Node* onig_node_new_alt(Node* left, Node* right);
|
||||
extern void onig_node_str_clear(Node* node);
|
||||
extern int onig_names_free(regex_t* reg);
|
||||
extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env);
|
||||
extern int onig_free_shared_cclass_table(void);
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
#ifdef USE_NAMED_GROUP
|
||||
# ifdef USE_NAMED_GROUP
|
||||
extern int onig_print_names(FILE*, regex_t*);
|
||||
#endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
RUBY_SYMBOL_EXPORT_END
|
||||
|
||||
#endif /* ONIGURUMA_REGPARSE_H */
|
||||
#endif /* ONIGMO_REGPARSE_H */
|
||||
|
|
15
regsyntax.c
15
regsyntax.c
|
@ -3,7 +3,7 @@
|
|||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
|
||||
* Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -229,7 +229,7 @@ const OnigSyntaxType OnigSyntaxPerl = {
|
|||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
ONIG_SYN_OP_ESC_O_BRACE_OCTAL | ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
|
@ -248,7 +248,8 @@ const OnigSyntaxType OnigSyntaxPerl = {
|
|||
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL )
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL |
|
||||
ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_CAPTURE_GROUP )
|
||||
,
|
||||
{
|
||||
|
@ -332,25 +333,25 @@ onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
|
|||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op(OnigSyntaxType* syntax)
|
||||
onig_get_syntax_op(const OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_op2(OnigSyntaxType* syntax)
|
||||
onig_get_syntax_op2(const OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->op2;
|
||||
}
|
||||
|
||||
extern unsigned int
|
||||
onig_get_syntax_behavior(OnigSyntaxType* syntax)
|
||||
onig_get_syntax_behavior(const OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->behavior;
|
||||
}
|
||||
|
||||
extern OnigOptionType
|
||||
onig_get_syntax_options(OnigSyntaxType* syntax)
|
||||
onig_get_syntax_options(const OnigSyntaxType* syntax)
|
||||
{
|
||||
return syntax->options;
|
||||
}
|
||||
|
|
|
@ -41,7 +41,8 @@ encdirs.each do |encdir|
|
|||
open(File.join(encdir,fn)) do |f|
|
||||
name = nil
|
||||
f.each_line do |line|
|
||||
if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
|
||||
if (/^#ifndef RUBY/ =~ line)..(/^#endif/ =~ line)
|
||||
elsif (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
|
||||
if $1
|
||||
if name
|
||||
lines << %[ENC_SET_BASE("#$1", "#{name}");]
|
||||
|
|
|
@ -20,7 +20,7 @@ end
|
|||
|
||||
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
|
||||
|
||||
POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII]
|
||||
POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print XPosixPunct Space Upper XDigit Word Alnum ASCII Punct]
|
||||
|
||||
def pair_codepoints(codepoints)
|
||||
|
||||
|
@ -115,6 +115,7 @@ def define_posix_props(data)
|
|||
data['Upper'] = data['Uppercase']
|
||||
data['Lower'] = data['Lowercase']
|
||||
data['Punct'] = data['Punctuation']
|
||||
data['XPosixPunct'] = data['Punctuation'] + [0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e]
|
||||
data['Digit'] = data['Decimal_Number']
|
||||
data['XDigit'] = (0x0030..0x0039).to_a + (0x0041..0x0046).to_a +
|
||||
(0x0061..0x0066).to_a
|
||||
|
@ -260,7 +261,11 @@ $const_cache = {}
|
|||
# given property, group of paired codepoints, and a human-friendly name for
|
||||
# the group
|
||||
def make_const(prop, data, name)
|
||||
puts "\n/* '#{prop}': #{name} */"
|
||||
if name.empty?
|
||||
puts "\n/* '#{prop}' */"
|
||||
else
|
||||
puts "\n/* '#{prop}': #{name} */"
|
||||
end
|
||||
if origprop = $const_cache.key(data)
|
||||
puts "#define CR_#{prop} CR_#{origprop}"
|
||||
else
|
||||
|
@ -387,7 +392,13 @@ props.concat parse_scripts(data, categories)
|
|||
aliases = parse_aliases(data)
|
||||
define_posix_props(data)
|
||||
POSIX_NAMES.each do |name|
|
||||
make_const(name, data[name], "[[:#{name}:]]")
|
||||
if name == 'XPosixPunct'
|
||||
make_const(name, data[name], "[[:Punct:]]")
|
||||
elsif name == 'Punct'
|
||||
make_const(name, data[name], "")
|
||||
else
|
||||
make_const(name, data[name], "[[:#{name}:]]")
|
||||
end
|
||||
end
|
||||
output.ifdef :USE_UNICODE_PROPERTIES
|
||||
props.each do |name|
|
||||
|
|
Загрузка…
Ссылка в новой задаче