зеркало из https://github.com/github/ruby.git
* include/ruby/oniguruma.h: Additional flag for characters that are titlecase.
* enc/unicode/case-folding.rb, casefold.h: Using above flag in data. * enc/unicode.c: Marking capitalized character as unmodified if it is already titlecase. * test/ruby/enc/test_case_mapping.rb: Tests for above functionality. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54229 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
Родитель
bf99f85941
Коммит
2f455ceca4
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
Tue Mar 22 21:08:30 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
|
||||
|
||||
* include/ruby/oniguruma.h: Additional flag for characters that are titlecase.
|
||||
|
||||
* enc/unicode/case-folding.rb, casefold.h: Using above flag in data.
|
||||
|
||||
* enc/unicode.c: Marking capitalized character as unmodified if it is
|
||||
already titlecase.
|
||||
|
||||
* test/ruby/enc/test_case_mapping.rb: Tests for above functionality.
|
||||
|
||||
Tue Mar 22 14:18:59 2016 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||
|
||||
* parse.y (lambda_body, parser_yylex): warn mismatched indentation
|
||||
|
|
|
@ -158,6 +158,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
#define ST ONIGENC_CASE_TITLECASE
|
||||
#define SU ONIGENC_CASE_UP_SPECIAL
|
||||
#define SL ONIGENC_CASE_DOWN_SPECIAL
|
||||
#define IT ONIGENC_CASE_IS_TITLECASE
|
||||
#define I(n) OnigSpecialIndexEncode(n)
|
||||
#define L(n) SpecialsLengthEncode(n)
|
||||
|
||||
|
@ -169,6 +170,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
|
|||
#undef ST
|
||||
#undef SU
|
||||
#undef SL
|
||||
#undef IT
|
||||
#undef I
|
||||
#undef L
|
||||
|
||||
|
@ -718,8 +720,11 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
|
|||
OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
|
||||
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) {
|
||||
if (flags&ONIGENC_CASE_TITLECASE)
|
||||
if (flags&ONIGENC_CASE_TITLECASE) {
|
||||
if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)
|
||||
flags ^= ONIGENC_CASE_MODIFIED;
|
||||
goto SpecialsCopy;
|
||||
}
|
||||
else
|
||||
SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
|
||||
}
|
||||
|
|
|
@ -190,7 +190,7 @@ class CaseFolding
|
|||
end
|
||||
|
||||
class MapItem
|
||||
attr_accessor :upper, :lower, :title
|
||||
attr_accessor :upper, :lower, :title, :code
|
||||
|
||||
def initialize(code, upper, lower, title)
|
||||
@code = code
|
||||
|
@ -242,15 +242,19 @@ class CaseMapping
|
|||
specials = []
|
||||
unless item.upper == item.title
|
||||
specials << item.title
|
||||
flags += "|ST"
|
||||
flags += '|ST'
|
||||
if item.code == item.title
|
||||
flags += '|IT'
|
||||
end
|
||||
|
||||
end
|
||||
unless item.lower.nil? or item.lower==from or item.lower==to
|
||||
specials << item.lower
|
||||
flags += "|SL"
|
||||
flags += '|SL'
|
||||
end
|
||||
unless item.upper.nil? or item.upper==from or item.upper==to
|
||||
specials << item.upper
|
||||
flags += "|SU"
|
||||
flags += '|SU'
|
||||
end
|
||||
if specials.first
|
||||
flags += "|I(#{@specials_length})"
|
||||
|
|
|
@ -161,11 +161,11 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x01b8, {1|F|D, {0x01b9}}},
|
||||
{0x01bc, {1|F|D, {0x01bd}}},
|
||||
{0x01c4, {1|F|D|ST|I(8), {0x01c6}}},
|
||||
{0x01c5, {1|F|D|ST|SU|I(9), {0x01c6}}},
|
||||
{0x01c5, {1|F|D|ST|IT|SU|I(9), {0x01c6}}},
|
||||
{0x01c7, {1|F|D|ST|I(11), {0x01c9}}},
|
||||
{0x01c8, {1|F|D|ST|SU|I(12), {0x01c9}}},
|
||||
{0x01c8, {1|F|D|ST|IT|SU|I(12), {0x01c9}}},
|
||||
{0x01ca, {1|F|D|ST|I(14), {0x01cc}}},
|
||||
{0x01cb, {1|F|D|ST|SU|I(15), {0x01cc}}},
|
||||
{0x01cb, {1|F|D|ST|IT|SU|I(15), {0x01cc}}},
|
||||
{0x01cd, {1|F|D, {0x01ce}}},
|
||||
{0x01cf, {1|F|D, {0x01d0}}},
|
||||
{0x01d1, {1|F|D, {0x01d2}}},
|
||||
|
@ -185,7 +185,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x01ee, {1|F|D, {0x01ef}}},
|
||||
{0x01f0, {2|F|SU|I(17), {0x006a, 0x030c}}},
|
||||
{0x01f1, {1|F|D|ST|I(19), {0x01f3}}},
|
||||
{0x01f2, {1|F|D|ST|SU|I(20), {0x01f3}}},
|
||||
{0x01f2, {1|F|D|ST|IT|SU|I(20), {0x01f3}}},
|
||||
{0x01f4, {1|F|D, {0x01f5}}},
|
||||
{0x01f6, {1|F|D, {0x0195}}},
|
||||
{0x01f7, {1|F|D, {0x01bf}}},
|
||||
|
@ -723,14 +723,14 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1f85, {2|F|ST|SU|I(79), {0x1f05, 0x03b9}}},
|
||||
{0x1f86, {2|F|ST|SU|I(82), {0x1f06, 0x03b9}}},
|
||||
{0x1f87, {2|F|ST|SU|I(85), {0x1f07, 0x03b9}}},
|
||||
{0x1f88, {2|F|ST|SL|SU|I(88), {0x1f00, 0x03b9}}},
|
||||
{0x1f89, {2|F|ST|SL|SU|I(92), {0x1f01, 0x03b9}}},
|
||||
{0x1f8a, {2|F|ST|SL|SU|I(96), {0x1f02, 0x03b9}}},
|
||||
{0x1f8b, {2|F|ST|SL|SU|I(100), {0x1f03, 0x03b9}}},
|
||||
{0x1f8c, {2|F|ST|SL|SU|I(104), {0x1f04, 0x03b9}}},
|
||||
{0x1f8d, {2|F|ST|SL|SU|I(108), {0x1f05, 0x03b9}}},
|
||||
{0x1f8e, {2|F|ST|SL|SU|I(112), {0x1f06, 0x03b9}}},
|
||||
{0x1f8f, {2|F|ST|SL|SU|I(116), {0x1f07, 0x03b9}}},
|
||||
{0x1f88, {2|F|ST|IT|SL|SU|I(88), {0x1f00, 0x03b9}}},
|
||||
{0x1f89, {2|F|ST|IT|SL|SU|I(92), {0x1f01, 0x03b9}}},
|
||||
{0x1f8a, {2|F|ST|IT|SL|SU|I(96), {0x1f02, 0x03b9}}},
|
||||
{0x1f8b, {2|F|ST|IT|SL|SU|I(100), {0x1f03, 0x03b9}}},
|
||||
{0x1f8c, {2|F|ST|IT|SL|SU|I(104), {0x1f04, 0x03b9}}},
|
||||
{0x1f8d, {2|F|ST|IT|SL|SU|I(108), {0x1f05, 0x03b9}}},
|
||||
{0x1f8e, {2|F|ST|IT|SL|SU|I(112), {0x1f06, 0x03b9}}},
|
||||
{0x1f8f, {2|F|ST|IT|SL|SU|I(116), {0x1f07, 0x03b9}}},
|
||||
{0x1f90, {2|F|ST|SU|I(120), {0x1f20, 0x03b9}}},
|
||||
{0x1f91, {2|F|ST|SU|I(123), {0x1f21, 0x03b9}}},
|
||||
{0x1f92, {2|F|ST|SU|I(126), {0x1f22, 0x03b9}}},
|
||||
|
@ -739,14 +739,14 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1f95, {2|F|ST|SU|I(135), {0x1f25, 0x03b9}}},
|
||||
{0x1f96, {2|F|ST|SU|I(138), {0x1f26, 0x03b9}}},
|
||||
{0x1f97, {2|F|ST|SU|I(141), {0x1f27, 0x03b9}}},
|
||||
{0x1f98, {2|F|ST|SL|SU|I(144), {0x1f20, 0x03b9}}},
|
||||
{0x1f99, {2|F|ST|SL|SU|I(148), {0x1f21, 0x03b9}}},
|
||||
{0x1f9a, {2|F|ST|SL|SU|I(152), {0x1f22, 0x03b9}}},
|
||||
{0x1f9b, {2|F|ST|SL|SU|I(156), {0x1f23, 0x03b9}}},
|
||||
{0x1f9c, {2|F|ST|SL|SU|I(160), {0x1f24, 0x03b9}}},
|
||||
{0x1f9d, {2|F|ST|SL|SU|I(164), {0x1f25, 0x03b9}}},
|
||||
{0x1f9e, {2|F|ST|SL|SU|I(168), {0x1f26, 0x03b9}}},
|
||||
{0x1f9f, {2|F|ST|SL|SU|I(172), {0x1f27, 0x03b9}}},
|
||||
{0x1f98, {2|F|ST|IT|SL|SU|I(144), {0x1f20, 0x03b9}}},
|
||||
{0x1f99, {2|F|ST|IT|SL|SU|I(148), {0x1f21, 0x03b9}}},
|
||||
{0x1f9a, {2|F|ST|IT|SL|SU|I(152), {0x1f22, 0x03b9}}},
|
||||
{0x1f9b, {2|F|ST|IT|SL|SU|I(156), {0x1f23, 0x03b9}}},
|
||||
{0x1f9c, {2|F|ST|IT|SL|SU|I(160), {0x1f24, 0x03b9}}},
|
||||
{0x1f9d, {2|F|ST|IT|SL|SU|I(164), {0x1f25, 0x03b9}}},
|
||||
{0x1f9e, {2|F|ST|IT|SL|SU|I(168), {0x1f26, 0x03b9}}},
|
||||
{0x1f9f, {2|F|ST|IT|SL|SU|I(172), {0x1f27, 0x03b9}}},
|
||||
{0x1fa0, {2|F|ST|SU|I(176), {0x1f60, 0x03b9}}},
|
||||
{0x1fa1, {2|F|ST|SU|I(179), {0x1f61, 0x03b9}}},
|
||||
{0x1fa2, {2|F|ST|SU|I(182), {0x1f62, 0x03b9}}},
|
||||
|
@ -755,14 +755,14 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1fa5, {2|F|ST|SU|I(191), {0x1f65, 0x03b9}}},
|
||||
{0x1fa6, {2|F|ST|SU|I(194), {0x1f66, 0x03b9}}},
|
||||
{0x1fa7, {2|F|ST|SU|I(197), {0x1f67, 0x03b9}}},
|
||||
{0x1fa8, {2|F|ST|SL|SU|I(200), {0x1f60, 0x03b9}}},
|
||||
{0x1fa9, {2|F|ST|SL|SU|I(204), {0x1f61, 0x03b9}}},
|
||||
{0x1faa, {2|F|ST|SL|SU|I(208), {0x1f62, 0x03b9}}},
|
||||
{0x1fab, {2|F|ST|SL|SU|I(212), {0x1f63, 0x03b9}}},
|
||||
{0x1fac, {2|F|ST|SL|SU|I(216), {0x1f64, 0x03b9}}},
|
||||
{0x1fad, {2|F|ST|SL|SU|I(220), {0x1f65, 0x03b9}}},
|
||||
{0x1fae, {2|F|ST|SL|SU|I(224), {0x1f66, 0x03b9}}},
|
||||
{0x1faf, {2|F|ST|SL|SU|I(228), {0x1f67, 0x03b9}}},
|
||||
{0x1fa8, {2|F|ST|IT|SL|SU|I(200), {0x1f60, 0x03b9}}},
|
||||
{0x1fa9, {2|F|ST|IT|SL|SU|I(204), {0x1f61, 0x03b9}}},
|
||||
{0x1faa, {2|F|ST|IT|SL|SU|I(208), {0x1f62, 0x03b9}}},
|
||||
{0x1fab, {2|F|ST|IT|SL|SU|I(212), {0x1f63, 0x03b9}}},
|
||||
{0x1fac, {2|F|ST|IT|SL|SU|I(216), {0x1f64, 0x03b9}}},
|
||||
{0x1fad, {2|F|ST|IT|SL|SU|I(220), {0x1f65, 0x03b9}}},
|
||||
{0x1fae, {2|F|ST|IT|SL|SU|I(224), {0x1f66, 0x03b9}}},
|
||||
{0x1faf, {2|F|ST|IT|SL|SU|I(228), {0x1f67, 0x03b9}}},
|
||||
{0x1fb2, {2|F|ST|SU|I(232), {0x1f70, 0x03b9}}},
|
||||
{0x1fb3, {2|F|ST|SU|I(236), {0x03b1, 0x03b9}}},
|
||||
{0x1fb4, {2|F|ST|SU|I(239), {0x03ac, 0x03b9}}},
|
||||
|
@ -772,7 +772,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1fb9, {1|F|D, {0x1fb1}}},
|
||||
{0x1fba, {1|F|D, {0x1f70}}},
|
||||
{0x1fbb, {1|F|D, {0x1f71}}},
|
||||
{0x1fbc, {2|F|ST|SL|SU|I(251), {0x03b1, 0x03b9}}},
|
||||
{0x1fbc, {2|F|ST|IT|SL|SU|I(251), {0x03b1, 0x03b9}}},
|
||||
{0x1fbe, {1|F|SU|I(255), {0x03b9}}},
|
||||
{0x1fc2, {2|F|ST|SU|I(256), {0x1f74, 0x03b9}}},
|
||||
{0x1fc3, {2|F|ST|SU|I(260), {0x03b7, 0x03b9}}},
|
||||
|
@ -783,7 +783,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1fc9, {1|F|D, {0x1f73}}},
|
||||
{0x1fca, {1|F|D, {0x1f74}}},
|
||||
{0x1fcb, {1|F|D, {0x1f75}}},
|
||||
{0x1fcc, {2|F|ST|SL|SU|I(275), {0x03b7, 0x03b9}}},
|
||||
{0x1fcc, {2|F|ST|IT|SL|SU|I(275), {0x03b7, 0x03b9}}},
|
||||
{0x1fd2, {3|F|SU|I(279), {0x03b9, 0x0308, 0x0300}}},
|
||||
{0x1fd3, {3|F|SU|I(282), {0x03b9, 0x0308, 0x0301}}},
|
||||
{0x1fd6, {2|F|SU|I(285), {0x03b9, 0x0342}}},
|
||||
|
@ -811,7 +811,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
|
|||
{0x1ff9, {1|F|D, {0x1f79}}},
|
||||
{0x1ffa, {1|F|D, {0x1f7c}}},
|
||||
{0x1ffb, {1|F|D, {0x1f7d}}},
|
||||
{0x1ffc, {2|F|ST|SL|SU|I(322), {0x03c9, 0x03b9}}},
|
||||
{0x1ffc, {2|F|ST|IT|SL|SU|I(322), {0x03c9, 0x03b9}}},
|
||||
{0x2126, {1|F|D, {0x03c9}}},
|
||||
{0x212a, {1|F|D, {0x006b}}},
|
||||
{0x212b, {1|F|D, {0x00e5}}},
|
||||
|
|
|
@ -152,6 +152,7 @@ ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
|
|||
#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
|
||||
#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
|
||||
#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
|
||||
#define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
|
||||
#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
|
||||
|
||||
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
|
||||
|
|
|
@ -78,6 +78,12 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
|
|||
assert_equal 'ᎠᎡᎢᎣᎤᎥᎦᎧᎨᎩ', "\uab70\uab71\uab72\uab73\uab74\uab75\uab76\uab77\uab78\uab79".downcase(:fold)
|
||||
end
|
||||
|
||||
def test_titlecase
|
||||
check_downcase_properties 'dz dž lj nj', 'Dz Dž Lj Nj', :lithuanian
|
||||
check_upcase_properties 'DZ DŽ LJ NJ', 'Dz Dž Lj Nj', :lithuanian
|
||||
check_capitalize_properties 'Dz', 'DZ', :lithuanian
|
||||
end
|
||||
|
||||
def test_ascii_option
|
||||
check_downcase_properties 'yukihiro matsumoto (matz)', 'Yukihiro MATSUMOTO (MATZ)', :ascii
|
||||
check_upcase_properties 'YUKIHIRO MATSUMOTO (MATZ)', 'yukihiro matsumoto (matz)', :ascii
|
||||
|
|
Загрузка…
Ссылка в новой задаче