Bug 1405993 - Part 3: Update in-tree ICU to release 60.1. rs=Waldo

--HG--
extra : rebase_source : 468a4fc2e1fa7215b1224d998024a7121a05af62
This commit is contained in:
André Bargull 2017-11-01 11:56:15 -07:00
Родитель db02e51a31
Коммит 3296f48ec9
1202 изменённых файлов: 133567 добавлений и 36786 удалений

3
config/external/icu/common/sources.mozbuild поставляемый
Просмотреть файл

@ -4,6 +4,7 @@ SOURCES += [
'/intl/icu/source/common/bmpset.cpp', '/intl/icu/source/common/bmpset.cpp',
'/intl/icu/source/common/brkeng.cpp', '/intl/icu/source/common/brkeng.cpp',
'/intl/icu/source/common/brkiter.cpp', '/intl/icu/source/common/brkiter.cpp',
'/intl/icu/source/common/bytesinkutil.cpp',
'/intl/icu/source/common/bytestream.cpp', '/intl/icu/source/common/bytestream.cpp',
'/intl/icu/source/common/bytestrie.cpp', '/intl/icu/source/common/bytestrie.cpp',
'/intl/icu/source/common/bytestriebuilder.cpp', '/intl/icu/source/common/bytestriebuilder.cpp',
@ -47,6 +48,7 @@ SOURCES += [
'/intl/icu/source/common/punycode.cpp', '/intl/icu/source/common/punycode.cpp',
'/intl/icu/source/common/putil.cpp', '/intl/icu/source/common/putil.cpp',
'/intl/icu/source/common/rbbi.cpp', '/intl/icu/source/common/rbbi.cpp',
'/intl/icu/source/common/rbbi_cache.cpp',
'/intl/icu/source/common/rbbidata.cpp', '/intl/icu/source/common/rbbidata.cpp',
'/intl/icu/source/common/rbbinode.cpp', '/intl/icu/source/common/rbbinode.cpp',
'/intl/icu/source/common/rbbirb.cpp', '/intl/icu/source/common/rbbirb.cpp',
@ -225,6 +227,7 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/simpleformatter.h', '/intl/icu/source/common/unicode/simpleformatter.h',
'/intl/icu/source/common/unicode/std_string.h', '/intl/icu/source/common/unicode/std_string.h',
'/intl/icu/source/common/unicode/strenum.h', '/intl/icu/source/common/unicode/strenum.h',
'/intl/icu/source/common/unicode/stringoptions.h',
'/intl/icu/source/common/unicode/stringpiece.h', '/intl/icu/source/common/unicode/stringpiece.h',
'/intl/icu/source/common/unicode/stringtriebuilder.h', '/intl/icu/source/common/unicode/stringtriebuilder.h',
'/intl/icu/source/common/unicode/symtable.h', '/intl/icu/source/common/unicode/symtable.h',

Двоичные данные
config/external/icu/data/icudt59l.dat → config/external/icu/data/icudt60l.dat поставляемый

Двоичный файл не отображается.

20
config/external/icu/i18n/sources.mozbuild поставляемый
Просмотреть файл

@ -95,7 +95,25 @@ SOURCES += [
'/intl/icu/source/i18n/nfrule.cpp', '/intl/icu/source/i18n/nfrule.cpp',
'/intl/icu/source/i18n/nfsubs.cpp', '/intl/icu/source/i18n/nfsubs.cpp',
'/intl/icu/source/i18n/nortrans.cpp', '/intl/icu/source/i18n/nortrans.cpp',
'/intl/icu/source/i18n/nounit.cpp',
'/intl/icu/source/i18n/nultrans.cpp', '/intl/icu/source/i18n/nultrans.cpp',
'/intl/icu/source/i18n/number_affixutils.cpp',
'/intl/icu/source/i18n/number_compact.cpp',
'/intl/icu/source/i18n/number_decimalquantity.cpp',
'/intl/icu/source/i18n/number_decimfmtprops.cpp',
'/intl/icu/source/i18n/number_fluent.cpp',
'/intl/icu/source/i18n/number_formatimpl.cpp',
'/intl/icu/source/i18n/number_grouping.cpp',
'/intl/icu/source/i18n/number_integerwidth.cpp',
'/intl/icu/source/i18n/number_longnames.cpp',
'/intl/icu/source/i18n/number_modifiers.cpp',
'/intl/icu/source/i18n/number_notation.cpp',
'/intl/icu/source/i18n/number_padding.cpp',
'/intl/icu/source/i18n/number_patternmodifier.cpp',
'/intl/icu/source/i18n/number_patternstring.cpp',
'/intl/icu/source/i18n/number_rounding.cpp',
'/intl/icu/source/i18n/number_scientific.cpp',
'/intl/icu/source/i18n/number_stringbuilder.cpp',
'/intl/icu/source/i18n/numfmt.cpp', '/intl/icu/source/i18n/numfmt.cpp',
'/intl/icu/source/i18n/numsys.cpp', '/intl/icu/source/i18n/numsys.cpp',
'/intl/icu/source/i18n/olsontz.cpp', '/intl/icu/source/i18n/olsontz.cpp',
@ -227,6 +245,8 @@ EXPORTS.unicode += [
'/intl/icu/source/i18n/unicode/measunit.h', '/intl/icu/source/i18n/unicode/measunit.h',
'/intl/icu/source/i18n/unicode/measure.h', '/intl/icu/source/i18n/unicode/measure.h',
'/intl/icu/source/i18n/unicode/msgfmt.h', '/intl/icu/source/i18n/unicode/msgfmt.h',
'/intl/icu/source/i18n/unicode/nounit.h',
'/intl/icu/source/i18n/unicode/numberformatter.h',
'/intl/icu/source/i18n/unicode/numfmt.h', '/intl/icu/source/i18n/unicode/numfmt.h',
'/intl/icu/source/i18n/unicode/numsys.h', '/intl/icu/source/i18n/unicode/numsys.h',
'/intl/icu/source/i18n/unicode/plurfmt.h', '/intl/icu/source/i18n/unicode/plurfmt.h',

Просмотреть файл

@ -1,10 +1,10 @@
Path: icu4c Path: icu4c
URL: https://ssl.icu-project.org/repos/icu/tags/release-59-1/icu4c URL: https://ssl.icu-project.org/repos/icu/tags/release-60-1/icu4c
Relative URL: ^/tags/release-59-1/icu4c Relative URL: ^/tags/release-60-1/icu4c
Repository Root: https://ssl.icu-project.org/repos/icu Repository Root: https://ssl.icu-project.org/repos/icu
Repository UUID: 251d0590-4201-4cf1-90de-194747b24ca1 Repository UUID: 251d0590-4201-4cf1-90de-194747b24ca1
Node Kind: directory Node Kind: directory
Last Changed Author: yoshito Last Changed Author: yoshito
Last Changed Rev: 40047 Last Changed Rev: 40662
Last Changed Date: 2017-04-13 09:55:03 +0000 (Thu, 13 Apr 2017) Last Changed Date: 2017-10-31 15:14:15 +0000 (Tue, 31 Oct 2017)

Просмотреть файл

@ -194,7 +194,7 @@ EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES SEARCH_INCLUDES = YES
INCLUDE_PATH = INCLUDE_PATH =
INCLUDE_FILE_PATTERNS = INCLUDE_FILE_PATTERNS =
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE=override U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_HAVE_RVALUE_REFERENCES=1 U_WCHAR_IS_UTF16 PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV_FPTR= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE= U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_WCHAR_IS_UTF16 U_NOEXCEPT=
EXPAND_AS_DEFINED = EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES SKIP_FUNCTION_MACROS = YES
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------

Просмотреть файл

@ -34,9 +34,11 @@ subdir = .
@LAYOUTEX_TRUE@LAYOUTEX = layoutex @LAYOUTEX_TRUE@LAYOUTEX = layoutex
@ICUIO_TRUE@ICUIO = io @ICUIO_TRUE@ICUIO = io
@EXTRAS_TRUE@EXTRA = extra @EXTRAS_TRUE@EXTRA = extra
@TESTS_TRUE@TEST = test # need tools for tests
@TOOLS_TRUE@@TESTS_TRUE@TEST = test
@SAMPLES_TRUE@SAMPLE = samples @SAMPLES_TRUE@SAMPLE = samples
@TOOLS_TRUE@TOOLS = tools @TOOLS_TRUE@TOOLS = tools
@TOOLS_TRUE@DATASUBDIR = data
## pkgconfig setup. Always have uc and i18n. Others are optional. ## pkgconfig setup. Always have uc and i18n. Others are optional.
ALL_PKGCONFIG_SUFFIX=uc i18n ALL_PKGCONFIG_SUFFIX=uc i18n
@ -58,7 +60,7 @@ INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/conf
LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk config/icucross.inc LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk config/icucross.inc
DOCDIRS = common i18n DOCDIRS = common i18n
SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) data $(EXTRA) $(SAMPLE) $(TEST) SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) $(DATASUBDIR) $(EXTRA) $(SAMPLE) $(TEST)
SECTION = 1 SECTION = 1
@ -85,7 +87,7 @@ all: all-local all-recursive
install: install-recursive install-local install: install-recursive install-local
clean: clean-recursive-with-twist clean-local clean: clean-recursive-with-twist clean-local
distclean : distclean-recursive distclean-local distclean : distclean-recursive distclean-local
dist: dist-recursive dist-local dist: dist-recursive
check: all check-recursive check: all check-recursive
check-recursive: all check-recursive: all
xcheck: all xcheck-recursive xcheck: all xcheck-recursive

Просмотреть файл

@ -89,7 +89,7 @@ ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \ resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \ ucurr.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \ messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
bytestream.o stringpiece.o \ bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \ stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \ bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \ ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
@ -104,7 +104,7 @@ patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwr
uscript.o uscript_props.o usc_impl.o unames.o \ uscript.o uscript_props.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \ uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \ uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \ util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \

Просмотреть файл

@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) : BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) { list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes)); uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
uprv_memset(table7FF, 0, sizeof(table7FF)); uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits)); uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
@ -45,14 +45,16 @@ BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1); list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
} }
list4kStarts[0x11]=listLength-1; list4kStarts[0x11]=listLength-1;
containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
initBits(); initBits();
overrideIllegal(); overrideIllegal();
} }
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) : BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
containsFFFD(otherBMPSet.containsFFFD),
list(newParentList), listLength(newParentListLength) { list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes)); uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF)); uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits)); uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts)); uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
@ -120,7 +122,7 @@ void BMPSet::initBits() {
UChar32 start, limit; UChar32 start, limit;
int32_t listIndex=0; int32_t listIndex=0;
// Set asciiBytes[]. // Set latin1Contains[].
do { do {
start=list[listIndex++]; start=list[listIndex++];
if(listIndex<listLength) { if(listIndex<listLength) {
@ -128,13 +130,30 @@ void BMPSet::initBits() {
} else { } else {
limit=0x110000; limit=0x110000;
} }
if(start>=0x80) { if(start>=0x100) {
break; break;
} }
do { do {
asciiBytes[start++]=1; latin1Contains[start++]=1;
} while(start<limit && start<0x80); } while(start<limit && start<0x100);
} while(limit<=0x80); } while(limit<=0x100);
// Find the first range overlapping with (or after) 80..FF again,
// to include them in table7FF as well.
for(listIndex=0;;) {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(limit>0x80) {
if(start<0x80) {
start=0x80;
}
break;
}
}
// Set table7FF[]. // Set table7FF[].
while(start<0x800) { while(start<0x800) {
@ -204,19 +223,14 @@ void BMPSet::initBits() {
* for faster validity checking at runtime. * for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor * No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits(). * and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF) * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF. * Need to set 0 values for surrogates D800..DFFF.
*/ */
void BMPSet::overrideIllegal() { void BMPSet::overrideIllegal() {
uint32_t bits, mask; uint32_t bits, mask;
int32_t i; int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) { if(containsFFFD) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
bits=3; // Lead bytes 0xC0 and 0xC1. bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) { for(i=0; i<64; ++i) {
table7FF[i]|=bits; table7FF[i]|=bits;
@ -233,7 +247,6 @@ void BMPSet::overrideIllegal() {
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits; bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
} }
} else { } else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED. mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block. for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask; bmpBlockBits[i]&=mask;
@ -277,8 +290,8 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
UBool UBool
BMPSet::contains(UChar32 c) const { BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) { if((uint32_t)c<=0xff) {
return (UBool)asciiBytes[c]; return (UBool)latin1Contains[c];
} else if((uint32_t)c<=0x7ff) { } else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0); return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) { } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
@ -314,8 +327,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span // span
do { do {
c=*s; c=*s;
if(c<=0x7f) { if(c<=0xff) {
if(!asciiBytes[c]) { if(!latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -354,8 +367,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span not // span not
do { do {
c=*s; c=*s;
if(c<=0x7f) { if(c<=0xff) {
if(asciiBytes[c]) { if(latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -403,8 +416,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span // span
for(;;) { for(;;) {
c=*(--limit); c=*(--limit);
if(c<=0x7f) { if(c<=0xff) {
if(!asciiBytes[c]) { if(!latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -446,8 +459,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span not // span not
for(;;) { for(;;) {
c=*(--limit); c=*(--limit);
if(c<=0x7f) { if(c<=0xff) {
if(asciiBytes[c]) { if(latin1Contains[c]) {
break; break;
} }
} else if(c<=0x7ff) { } else if(c<=0x7ff) {
@ -497,22 +510,22 @@ const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length; const uint8_t *limit=s+length;
uint8_t b=*s; uint8_t b=*s;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
// Initial all-ASCII span. // Initial all-ASCII span.
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b] || ++s==limit) { if(!latin1Contains[b] || ++s==limit) {
return s; return s;
} }
b=*s; b=*s;
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b] || ++s==limit) { if(latin1Contains[b] || ++s==limit) {
return s; return s;
} }
b=*s; b=*s;
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} }
length=(int32_t)(limit-s); length=(int32_t)(limit-s);
} }
@ -540,20 +553,20 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// single trail byte, check for preceding 3- or 4-byte lead byte // single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) { if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2; limit-=2;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) { } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes // 4-byte lead byte with only two trail bytes
limit-=3; limit-=3;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} }
} else { } else {
// lead byte with no trail bytes // lead byte with no trail bytes
--limit; --limit;
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
limit0=limit; limit0=limit;
} }
} }
@ -563,26 +576,26 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
while(s<limit) { while(s<limit) {
b=*s; b=*s;
if(b<0xc0) { if(U8_IS_SINGLE(b)) {
// ASCII; or trail bytes with the result of contains(FFFD). // ASCII
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b]) { if(!latin1Contains[b]) {
return s; return s;
} else if(++s==limit) { } else if(++s==limit) {
return limit0; return limit0;
} }
b=*s; b=*s;
} while(b<0xc0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b]) { if(latin1Contains[b]) {
return s; return s;
} else if(++s==limit) { } else if(++s==limit) {
return limit0; return limit0;
} }
b=*s; b=*s;
} while(b<0xc0); } while(U8_IS_SINGLE(b));
} }
} }
++s; // Advance past the lead byte. ++s; // Advance past the lead byte.
@ -619,7 +632,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3; UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ? if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) : containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80] containsFFFD
) != spanCondition ) != spanCondition
) { ) {
return s-1; return s-1;
@ -627,8 +640,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
s+=3; s+=3;
continue; continue;
} }
} else /* 0xc0<=b<0xe0 */ { } else {
if( /* handle U+0000..U+07FF inline */ if( /* handle U+0000..U+07FF inline */
b>=0xc0 &&
(t1=(uint8_t)(*s-0x80)) <= 0x3f (t1=(uint8_t)(*s-0x80)) <= 0x3f
) { ) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) { if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
@ -642,7 +656,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// Give an illegal sequence the same value as the result of contains(FFFD). // Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code; // Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling. // no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) { if(containsFFFD!=spanCondition) {
return s-1; return s-1;
} }
} }
@ -667,26 +681,26 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
do { do {
b=s[--length]; b=s[--length];
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
// ASCII sub-span // ASCII sub-span
if(spanCondition) { if(spanCondition) {
do { do {
if(!asciiBytes[b]) { if(!latin1Contains[b]) {
return length+1; return length+1;
} else if(length==0) { } else if(length==0) {
return 0; return 0;
} }
b=s[--length]; b=s[--length];
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} else { } else {
do { do {
if(asciiBytes[b]) { if(latin1Contains[b]) {
return length+1; return length+1;
} else if(length==0) { } else if(length==0) {
return 0; return 0;
} }
b=s[--length]; b=s[--length];
} while((int8_t)b>=0); } while(U8_IS_SINGLE(b));
} }
} }

Просмотреть файл

@ -28,11 +28,12 @@ U_NAMESPACE_BEGIN
* Helper class for frozen UnicodeSets, implements contains() and span() * Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly. * optimized for BMP code points. Structured to be UTF-8-friendly.
* *
* ASCII: Look up bytes. * Latin-1: Look up bytes.
* 2-byte characters: Bits organized vertically. * 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges. * with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set. * Supplementary characters: Binary search over
* the supplementary part of the parent set's inversion list.
*/ */
class BMPSet : public UMemory { class BMPSet : public UMemory {
public: public:
@ -96,12 +97,12 @@ private:
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const; inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/* /*
* One byte per ASCII character, or trail byte in lead position. * One byte 0 or 1 per Latin-1 character.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
*/ */
UBool asciiBytes[0xc0]; UBool latin1Contains[0x100];
/* TRUE if contains(U+FFFD). */
UBool containsFFFD;
/* /*
* One bit per code point from U+0000..U+07FF. * One bit per code point from U+0000..U+07FF.

Просмотреть файл

@ -11,9 +11,6 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/chariter.h" #include "unicode/chariter.h"
@ -24,6 +21,10 @@
#include "unicode/uscript.h" #include "unicode/uscript.h"
#include "unicode/ucharstrie.h" #include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h" #include "unicode/bytestrie.h"
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "charstr.h" #include "charstr.h"
#include "dictionarydata.h" #include "dictionarydata.h"
#include "mutex.h" #include "mutex.h"
@ -80,23 +81,15 @@ UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
int32_t int32_t
UnhandledEngine::findBreaks( UText *text, UnhandledEngine::findBreaks( UText *text,
int32_t startPos, int32_t /* startPos */,
int32_t endPos, int32_t endPos,
UBool reverse, int32_t breakType,
int32_t breakType, UVector32 &/*foundBreaks*/ ) const {
UStack &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) { if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
UChar32 c = utext_current32(text); UChar32 c = utext_current32(text);
if (reverse) { while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_previous32(text); c = utext_current32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
} }
} }
return 0; return 0;

Просмотреть файл

@ -19,6 +19,7 @@ U_NAMESPACE_BEGIN
class UnicodeSet; class UnicodeSet;
class UStack; class UStack;
class UVector32;
class DictionaryMatcher; class DictionaryMatcher;
/******************************************************************* /*******************************************************************
@ -67,18 +68,15 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling. * is capable of handling.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found. * @return The number of breaks found.
*/ */
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const = 0; UVector32 &foundBreaks ) const = 0;
}; };
@ -192,8 +190,6 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling. * is capable of handling.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found. * @return The number of breaks found.
@ -201,9 +197,8 @@ class UnhandledEngine : public LanguageBreakEngine {
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
/** /**
* <p>Tell the engine to handle a particular character and break type.</p> * <p>Tell the engine to handle a particular character and break type.</p>

Просмотреть файл

@ -195,7 +195,7 @@ BreakIterator::getAvailableLocales(int32_t& count)
// ------------------------------------------ // ------------------------------------------
// //
// Default constructor and destructor // Constructors, destructor and assignment operator
// //
//------------------------------------------- //-------------------------------------------
@ -204,6 +204,19 @@ BreakIterator::BreakIterator()
*validLocale = *actualLocale = 0; *validLocale = *actualLocale = 0;
} }
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
return *this;
}
BreakIterator::~BreakIterator() BreakIterator::~BreakIterator()
{ {
} }
@ -265,7 +278,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
// defined in ucln_cmn.h // defined in ucln_cmn.h
U_NAMESPACE_END U_NAMESPACE_END
static icu::UInitOnce gInitOnce; static icu::UInitOnce gInitOnceBrkiter;
static icu::ICULocaleService* gService = NULL; static icu::ICULocaleService* gService = NULL;
@ -280,7 +293,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
delete gService; delete gService;
gService = NULL; gService = NULL;
} }
gInitOnce.reset(); gInitOnceBrkiter.reset();
#endif #endif
return TRUE; return TRUE;
} }
@ -296,7 +309,7 @@ initService(void) {
static ICULocaleService* static ICULocaleService*
getService(void) getService(void)
{ {
umtx_initOnce(gInitOnce, &initService); umtx_initOnce(gInitOnceBrkiter, &initService);
return gService; return gService;
} }
@ -306,7 +319,7 @@ getService(void)
static inline UBool static inline UBool
hasService(void) hasService(void)
{ {
return !gInitOnce.isReset() && getService() != NULL; return !gInitOnceBrkiter.isReset() && getService() != NULL;
} }
// ------------------------------------- // -------------------------------------

Просмотреть файл

@ -0,0 +1,123 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.cpp
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
UBool
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
char scratch[200];
int32_t s8Length = 0;
for (int32_t i = 0; i < s16Length;) {
int32_t capacity;
int32_t desiredCapacity = s16Length - i;
if (desiredCapacity < (INT32_MAX / 3)) {
desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
} else if (desiredCapacity < (INT32_MAX / 2)) {
desiredCapacity *= 2;
} else {
desiredCapacity = INT32_MAX;
}
char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
scratch, UPRV_LENGTHOF(scratch), &capacity);
capacity -= U8_MAX_LENGTH - 1;
int32_t j = 0;
for (; i < s16Length && j < capacity;) {
UChar32 c;
U16_NEXT_UNSAFE(s16, i, c);
U8_APPEND_UNSAFE(buffer, j, c);
}
if (j > (INT32_MAX - s8Length)) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
sink.Append(buffer, j);
s8Length += j;
}
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
return TRUE;
}
UBool
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}
void
ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
char s8[U8_MAX_LENGTH];
int32_t s8Length = 0;
U8_APPEND_UNSAFE(s8, s8Length, c);
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
sink.Append(s8, s8Length);
}
namespace {
// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
sink.Append(s8, 2);
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) {
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
}
return TRUE;
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,53 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.h
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
class ByteSink;
class Edits;
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
/** (length) bytes were mapped to valid (s16, s16Length). */
static UBool appendChange(int32_t length,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
static UBool appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** (length) bytes were mapped/changed to valid code point c. */
static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
}
/** Append the two-byte character (U+0080..U+07FF). */
static void appendTwoBytes(UChar32 c, ByteSink &sink);
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
};
U_NAMESPACE_END

Просмотреть файл

@ -45,6 +45,12 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) { if (n <= 0) {
return; return;
} }
if (n > (INT32_MAX - appended_)) {
// TODO: Report as integer overflow, not merely buffer overflow.
appended_ = INT32_MAX;
overflowed_ = TRUE;
return;
}
appended_ += n; appended_ += n;
int32_t available = capacity_ - size_; int32_t available = capacity_ - size_;
if (n > available) { if (n > available) {

Просмотреть файл

@ -405,7 +405,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
//String[] finalResult = new String[result.size()]; //String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL; UnicodeString *finalResult = NULL;
int32_t resultCount; int32_t resultCount;
if((resultCount = result.count())) { if((resultCount = result.count()) != 0) {
finalResult = new UnicodeString[resultCount]; finalResult = new UnicodeString[resultCount];
if (finalResult == 0) { if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR; status = U_MEMORY_ALLOCATION_ERROR;

Просмотреть файл

@ -162,7 +162,6 @@ public:
* @param p simple pointer to an array of T items that is adopted * @param p simple pointer to an array of T items that is adopted
*/ */
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {} explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -170,14 +169,12 @@ public:
LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the memory it owns. * Destructor deletes the memory it owns.
*/ */
~LocalMemory() { ~LocalMemory() {
uprv_free(LocalPointerBase<T>::ptr); uprv_free(LocalPointerBase<T>::ptr);
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -187,7 +184,6 @@ public:
LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT { LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -312,6 +308,14 @@ public:
* Default constructor initializes with internal T[stackCapacity] buffer. * Default constructor initializes with internal T[stackCapacity] buffer.
*/ */
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {} MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Automatically allocates the heap array if the argument is larger than the stack capacity.
* Intended for use when an approximate capacity is known at compile time but the true
* capacity is not known until runtime.
*/
MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
if (capacity < newCapacity) { resize(newCapacity); }
};
/** /**
* Destructor deletes the array (if owned). * Destructor deletes the array (if owned).
*/ */

Просмотреть файл

@ -20,6 +20,7 @@
</ItemGroup> </ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid> <ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@ -109,7 +110,7 @@
<Culture>0x0409</Culture> <Culture>0x0409</Culture>
</ResourceCompile> </ResourceCompile>
<Link> <Link>
<OutputFile>..\..\bin\icuuc59.dll</OutputFile> <OutputFile>..\..\bin\icuuc60.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner> <SuppressStartupBanner>true</SuppressStartupBanner>
<ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -151,7 +152,7 @@
<Culture>0x0409</Culture> <Culture>0x0409</Culture>
</ResourceCompile> </ResourceCompile>
<Link> <Link>
<OutputFile>..\..\bin\icuuc59d.dll</OutputFile> <OutputFile>..\..\bin\icuuc60d.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner> <SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile>
@ -190,7 +191,7 @@
<Culture>0x0409</Culture> <Culture>0x0409</Culture>
</ResourceCompile> </ResourceCompile>
<Link> <Link>
<OutputFile>..\..\bin64\icuuc59.dll</OutputFile> <OutputFile>..\..\bin64\icuuc60.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner> <SuppressStartupBanner>true</SuppressStartupBanner>
<ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -230,7 +231,7 @@
<Culture>0x0409</Culture> <Culture>0x0409</Culture>
</ResourceCompile> </ResourceCompile>
<Link> <Link>
<OutputFile>..\..\bin64\icuuc59d.dll</OutputFile> <OutputFile>..\..\bin64\icuuc60d.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner> <SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation> <GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile>
@ -267,6 +268,8 @@
</ClCompile> </ClCompile>
<ClCompile Include="rbbitblb.cpp"> <ClCompile Include="rbbitblb.cpp">
</ClCompile> </ClCompile>
<ClCompile Include="rbbi_cache.cpp">
</ClCompile>
<ClCompile Include="dictionarydata.cpp" /> <ClCompile Include="dictionarydata.cpp" />
<ClCompile Include="ubrk.cpp" /> <ClCompile Include="ubrk.cpp" />
<ClCompile Include="ucol_swp.cpp"> <ClCompile Include="ucol_swp.cpp">
@ -444,6 +447,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="usprep.cpp" /> <ClCompile Include="usprep.cpp" />
<ClCompile Include="appendable.cpp" /> <ClCompile Include="appendable.cpp" />
<ClCompile Include="bytesinkutil.cpp" />
<ClCompile Include="bytestream.cpp" /> <ClCompile Include="bytestream.cpp" />
<ClCompile Include="bytestrie.cpp" /> <ClCompile Include="bytestrie.cpp" />
<ClCompile Include="bytestriebuilder.cpp" /> <ClCompile Include="bytestriebuilder.cpp" />
@ -571,6 +575,7 @@
<ClInclude Include="rbbiscan.h" /> <ClInclude Include="rbbiscan.h" />
<ClInclude Include="rbbisetb.h" /> <ClInclude Include="rbbisetb.h" />
<ClInclude Include="rbbitblb.h" /> <ClInclude Include="rbbitblb.h" />
<ClInclude Include="rbbi_cache.h" />
<ClInclude Include="dictionarydata.h" /> <ClInclude Include="dictionarydata.h" />
<CustomBuild Include="unicode\ubrk.h"> <CustomBuild Include="unicode\ubrk.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
@ -1477,6 +1482,7 @@
</Command> </Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="bytesinkutil.h" />
<CustomBuild Include="unicode\bytestream.h"> <CustomBuild Include="unicode\bytestream.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command> </Command>

Просмотреть файл

@ -97,6 +97,9 @@
<ClCompile Include="rbbitblb.cpp"> <ClCompile Include="rbbitblb.cpp">
<Filter>break iteration</Filter> <Filter>break iteration</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="rbbi_cache.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="ubrk.cpp"> <ClCompile Include="ubrk.cpp">
<Filter>break iteration</Filter> <Filter>break iteration</Filter>
</ClCompile> </ClCompile>
@ -460,6 +463,9 @@
<ClCompile Include="usprep.cpp"> <ClCompile Include="usprep.cpp">
<Filter>sprep</Filter> <Filter>sprep</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="bytesinkutil.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="bytestream.cpp"> <ClCompile Include="bytestream.cpp">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClCompile> </ClCompile>
@ -636,6 +642,9 @@
<ClInclude Include="rbbitblb.h"> <ClInclude Include="rbbitblb.h">
<Filter>break iteration</Filter> <Filter>break iteration</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="rbbi_cache.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="ubrkimpl.h"> <ClInclude Include="ubrkimpl.h">
<Filter>break iteration</Filter> <Filter>break iteration</Filter>
</ClInclude> </ClInclude>
@ -861,6 +870,9 @@
<ClInclude Include="sprpimpl.h"> <ClInclude Include="sprpimpl.h">
<Filter>sprep</Filter> <Filter>sprep</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="bytesinkutil.h">
<Filter>strings</Filter>
</ClInclude>
<ClInclude Include="charstr.h"> <ClInclude Include="charstr.h">
<Filter>strings</Filter> <Filter>strings</Filter>
</ClInclude> </ClInclude>

Просмотреть файл

@ -70,15 +70,17 @@
<LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental> <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup> <ItemDefinitionGroup>
<!-- Options that are common to *all* configurations -->
<Midl> <Midl>
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MkTypLibCompatible>true</MkTypLibCompatible> <MkTypLibCompatible>true</MkTypLibCompatible>
<SuppressStartupBanner>true</SuppressStartupBanner> <SuppressStartupBanner>true</SuppressStartupBanner>
</Midl> </Midl>
<ClCompile> <ClCompile>
<AdditionalIncludeDirectories>..\..\include;..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..\include;..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<!-- U_DISABLE_RENAMING -->
<!-- U_HIDE_DRAFT_API & U_HIDE_DEPRECATED_API --> <!-- U_HIDE_DRAFT_API & U_HIDE_DEPRECATED_API -->
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;U_ATTRIBUTE_DEPRECATED=;_CRT_SECURE_NO_DEPRECATE;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;U_ATTRIBUTE_DEPRECATED=;_CRT_SECURE_NO_DEPRECATE;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling> <StringPooling>true</StringPooling>
<ExceptionHandling> <ExceptionHandling>
</ExceptionHandling> </ExceptionHandling>
@ -93,7 +95,7 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
</ClCompile> </ClCompile>
<ResourceCompile> <ResourceCompile>
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture> <Culture>0x0409</Culture>
<AdditionalIncludeDirectories>../common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile> </ResourceCompile>
@ -108,6 +110,7 @@
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'"> <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<!-- Options that are common to all 'Release' configurations -->
<Midl> <Midl>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl> </Midl>
@ -120,9 +123,11 @@
</ResourceCompile> </ResourceCompile>
<Link> <Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'"> <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<!-- Options that are common to all 'Debug' configurations -->
<Midl> <Midl>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl> </Midl>
@ -137,8 +142,13 @@
<ResourceCompile> <ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile> </ResourceCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'"> <ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<!-- Options that are common to all 32-bit configurations -->
<Midl> <Midl>
<TargetEnvironment>Win32</TargetEnvironment> <TargetEnvironment>Win32</TargetEnvironment>
</Midl> </Midl>
@ -147,6 +157,7 @@
</ClCompile> </ClCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'"> <ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
<!-- Options that are common to all 64-bit configurations -->
<Midl> <Midl>
<TargetEnvironment>X64</TargetEnvironment> <TargetEnvironment>X64</TargetEnvironment>
</Midl> </Midl>
@ -158,6 +169,7 @@
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='ARM'"> <ItemDefinitionGroup Condition="'$(Platform)'=='ARM'">
<!-- Options that are common to all ARM configurations -->
<Midl> <Midl>
<TargetEnvironment>ARM</TargetEnvironment> <TargetEnvironment>ARM</TargetEnvironment>
</Midl> </Midl>
@ -168,6 +180,7 @@
<TargetMachine>MachineARM</TargetMachine> <TargetMachine>MachineARM</TargetMachine>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<!-- Options that are specific to a particular configuration -->
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl> <Midl>
<TypeLibraryName>.\..\..\lib32uwp\icuuc.tlb</TypeLibraryName> <TypeLibraryName>.\..\..\lib32uwp\icuuc.tlb</TypeLibraryName>
@ -179,10 +192,9 @@
<ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\bin32uwp\icuuc.dll</OutputFile> <OutputFile>..\..\bin32uwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary> <ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@ -196,10 +208,9 @@
<ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\bin32uwp\icuucd.dll</OutputFile> <OutputFile>..\..\bin32uwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary> <ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -213,10 +224,9 @@
<ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\bin64uwp\icuuc.dll</OutputFile> <OutputFile>..\..\bin64uwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary> <ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -230,10 +240,9 @@
<ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\bin64uwp\icuucd.dll</OutputFile> <OutputFile>..\..\bin64uwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary> <ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
@ -247,10 +256,9 @@
<ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\binARMuwp\icuuc.dll</OutputFile> <OutputFile>..\..\binARMuwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary> <ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
@ -264,10 +272,9 @@
<ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName> <ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
</ClCompile> </ClCompile>
<Link> <Link>
<OutputFile>..\..\binARMuwp\icuucd.dll</OutputFile> <OutputFile>..\..\binARMuwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile> <ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary> <ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
@ -280,25 +287,19 @@
<ClCompile Include="ubidiwrt.cpp" /> <ClCompile Include="ubidiwrt.cpp" />
<ClCompile Include="uloc_keytype.cpp" /> <ClCompile Include="uloc_keytype.cpp" />
<ClCompile Include="ushape.cpp" /> <ClCompile Include="ushape.cpp" />
<ClCompile Include="brkeng.cpp"> <ClCompile Include="brkeng.cpp" />
</ClCompile> <ClCompile Include="brkiter.cpp" />
<ClCompile Include="brkiter.cpp">
</ClCompile>
<ClCompile Include="dictbe.cpp" /> <ClCompile Include="dictbe.cpp" />
<ClCompile Include="pluralmap.cpp" /> <ClCompile Include="pluralmap.cpp" />
<ClCompile Include="rbbi.cpp"> <ClCompile Include="rbbi.cpp" />
</ClCompile> <ClCompile Include="rbbidata.cpp" />
<ClCompile Include="rbbidata.cpp">
</ClCompile>
<ClCompile Include="rbbinode.cpp" /> <ClCompile Include="rbbinode.cpp" />
<ClCompile Include="rbbirb.cpp"> <ClCompile Include="rbbirb.cpp" />
</ClCompile>
<ClCompile Include="rbbiscan.cpp" /> <ClCompile Include="rbbiscan.cpp" />
<ClCompile Include="rbbisetb.cpp" /> <ClCompile Include="rbbisetb.cpp" />
<ClCompile Include="rbbistbl.cpp"> <ClCompile Include="rbbistbl.cpp" />
</ClCompile> <ClCompile Include="rbbitblb.cpp" />
<ClCompile Include="rbbitblb.cpp"> <ClCompile Include="rbbi_cache.cpp" />
</ClCompile>
<ClCompile Include="dictionarydata.cpp" /> <ClCompile Include="dictionarydata.cpp" />
<ClCompile Include="ubrk.cpp" /> <ClCompile Include="ubrk.cpp" />
<ClCompile Include="ucol_swp.cpp"> <ClCompile Include="ucol_swp.cpp">
@ -320,14 +321,12 @@
<ClCompile Include="uvectr64.cpp" /> <ClCompile Include="uvectr64.cpp" />
<ClCompile Include="errorcode.cpp" /> <ClCompile Include="errorcode.cpp" />
<ClCompile Include="icudataver.cpp" /> <ClCompile Include="icudataver.cpp" />
<ClCompile Include="locmap.cpp"> <ClCompile Include="locmap.cpp" />
</ClCompile>
<ClCompile Include="putil.cpp"> <ClCompile Include="putil.cpp">
<CompileAsWinRT>true</CompileAsWinRT> <CompileAsWinRT>true</CompileAsWinRT>
</ClCompile> </ClCompile>
<ClCompile Include="umath.cpp" /> <ClCompile Include="umath.cpp" />
<ClCompile Include="umutex.cpp"> <ClCompile Include="umutex.cpp" />
</ClCompile>
<ClCompile Include="utrace.cpp" /> <ClCompile Include="utrace.cpp" />
<ClCompile Include="utypes.cpp" /> <ClCompile Include="utypes.cpp" />
<ClCompile Include="wintz.cpp"> <ClCompile Include="wintz.cpp">
@ -335,15 +334,13 @@
</ClCompile> </ClCompile>
<ClCompile Include="ucnv.cpp" /> <ClCompile Include="ucnv.cpp" />
<ClCompile Include="ucnv2022.cpp" /> <ClCompile Include="ucnv2022.cpp" />
<ClCompile Include="ucnv_bld.cpp"> <ClCompile Include="ucnv_bld.cpp" />
</ClCompile>
<ClCompile Include="ucnv_cb.cpp" /> <ClCompile Include="ucnv_cb.cpp" />
<ClCompile Include="ucnv_cnv.cpp" /> <ClCompile Include="ucnv_cnv.cpp" />
<ClCompile Include="ucnv_ct.cpp" /> <ClCompile Include="ucnv_ct.cpp" />
<ClCompile Include="ucnv_err.cpp" /> <ClCompile Include="ucnv_err.cpp" />
<ClCompile Include="ucnv_ext.cpp" /> <ClCompile Include="ucnv_ext.cpp" />
<ClCompile Include="ucnv_io.cpp"> <ClCompile Include="ucnv_io.cpp" />
</ClCompile>
<ClCompile Include="ucnv_lmb.cpp" /> <ClCompile Include="ucnv_lmb.cpp" />
<ClCompile Include="ucnv_set.cpp" /> <ClCompile Include="ucnv_set.cpp" />
<ClCompile Include="ucnv_u16.cpp" /> <ClCompile Include="ucnv_u16.cpp" />
@ -357,19 +354,15 @@
<ClCompile Include="ucnvlat1.cpp" /> <ClCompile Include="ucnvlat1.cpp" />
<ClCompile Include="ucnvmbcs.cpp" /> <ClCompile Include="ucnvmbcs.cpp" />
<ClCompile Include="ucnvscsu.cpp" /> <ClCompile Include="ucnvscsu.cpp" />
<ClCompile Include="ucnvsel.cpp"> <ClCompile Include="ucnvsel.cpp" />
</ClCompile>
<ClCompile Include="cmemory.cpp" /> <ClCompile Include="cmemory.cpp" />
<ClCompile Include="ucln_cmn.cpp"> <ClCompile Include="ucln_cmn.cpp" />
</ClCompile>
<ClCompile Include="ucmndata.cpp" /> <ClCompile Include="ucmndata.cpp" />
<ClCompile Include="udata.cpp" /> <ClCompile Include="udata.cpp" />
<ClCompile Include="udatamem.cpp" /> <ClCompile Include="udatamem.cpp" />
<ClCompile Include="udataswp.cpp" /> <ClCompile Include="udataswp.cpp" />
<ClCompile Include="uinit.cpp"> <ClCompile Include="uinit.cpp" />
</ClCompile> <ClCompile Include="umapfile.cpp" />
<ClCompile Include="umapfile.cpp">
</ClCompile>
<ClCompile Include="uobject.cpp" /> <ClCompile Include="uobject.cpp" />
<ClCompile Include="dtintrv.cpp" /> <ClCompile Include="dtintrv.cpp" />
<ClCompile Include="parsepos.cpp" /> <ClCompile Include="parsepos.cpp" />
@ -379,19 +372,15 @@
<ClCompile Include="punycode.cpp" /> <ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" /> <ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" /> <ClCompile Include="uts46.cpp" />
<ClCompile Include="locavailable.cpp"> <ClCompile Include="locavailable.cpp" />
</ClCompile>
<ClCompile Include="locbased.cpp" /> <ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" /> <ClCompile Include="locdispnames.cpp" />
<ClCompile Include="locdspnm.cpp" /> <ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp"> <ClCompile Include="locid.cpp" />
</ClCompile>
<ClCompile Include="loclikely.cpp" /> <ClCompile Include="loclikely.cpp" />
<ClCompile Include="locresdata.cpp" /> <ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp"> <ClCompile Include="locutil.cpp" />
</ClCompile> <ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund.cpp">
</ClCompile>
<ClCompile Include="resbund_cnv.cpp" /> <ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" /> <ClCompile Include="ucat.cpp" />
<ClCompile Include="uloc.cpp" /> <ClCompile Include="uloc.cpp" />
@ -401,27 +390,22 @@
<ClCompile Include="uresdata.cpp" /> <ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" /> <ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" /> <ClCompile Include="ucurr.cpp" />
<ClCompile Include="caniter.cpp"> <ClCompile Include="caniter.cpp" />
</ClCompile>
<ClCompile Include="filterednormalizer2.cpp" /> <ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" /> <ClCompile Include="loadednormalizer2impl.cpp" />
<ClCompile Include="normalizer2.cpp" /> <ClCompile Include="normalizer2.cpp" />
<ClCompile Include="normalizer2impl.cpp" /> <ClCompile Include="normalizer2impl.cpp" />
<ClCompile Include="normlzr.cpp"> <ClCompile Include="normlzr.cpp" />
</ClCompile>
<ClCompile Include="unorm.cpp" /> <ClCompile Include="unorm.cpp" />
<ClCompile Include="unormcmp.cpp" /> <ClCompile Include="unormcmp.cpp" />
<ClCompile Include="bmpset.cpp" /> <ClCompile Include="bmpset.cpp" />
<ClCompile Include="patternprops.cpp" /> <ClCompile Include="patternprops.cpp" />
<ClCompile Include="propname.cpp"> <ClCompile Include="propname.cpp" />
</ClCompile>
<ClCompile Include="ruleiter.cpp" /> <ClCompile Include="ruleiter.cpp" />
<ClCompile Include="ucase.cpp"> <ClCompile Include="ucase.cpp" />
</ClCompile>
<ClCompile Include="uchar.cpp" /> <ClCompile Include="uchar.cpp" />
<ClCompile Include="unames.cpp" /> <ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp"> <ClCompile Include="unifiedcache.cpp" />
</ClCompile>
<ClCompile Include="unifilt.cpp" /> <ClCompile Include="unifilt.cpp" />
<ClCompile Include="unifunct.cpp" /> <ClCompile Include="unifunct.cpp" />
<ClCompile Include="uniset.cpp" /> <ClCompile Include="uniset.cpp" />
@ -436,22 +420,16 @@
<ClCompile Include="uset_props.cpp" /> <ClCompile Include="uset_props.cpp" />
<ClCompile Include="usetiter.cpp" /> <ClCompile Include="usetiter.cpp" />
<ClCompile Include="icuplug.cpp" /> <ClCompile Include="icuplug.cpp" />
<ClCompile Include="serv.cpp"> <ClCompile Include="serv.cpp" />
</ClCompile> <ClCompile Include="servlk.cpp" />
<ClCompile Include="servlk.cpp"> <ClCompile Include="servlkf.cpp" />
</ClCompile> <ClCompile Include="servls.cpp" />
<ClCompile Include="servlkf.cpp"> <ClCompile Include="servnotf.cpp" />
</ClCompile> <ClCompile Include="servrbf.cpp" />
<ClCompile Include="servls.cpp"> <ClCompile Include="servslkf.cpp" />
</ClCompile>
<ClCompile Include="servnotf.cpp">
</ClCompile>
<ClCompile Include="servrbf.cpp">
</ClCompile>
<ClCompile Include="servslkf.cpp">
</ClCompile>
<ClCompile Include="usprep.cpp" /> <ClCompile Include="usprep.cpp" />
<ClCompile Include="appendable.cpp" /> <ClCompile Include="appendable.cpp" />
<ClCompile Include="bytesinkutil.cpp" />
<ClCompile Include="bytestream.cpp" /> <ClCompile Include="bytestream.cpp" />
<ClCompile Include="bytestrie.cpp" /> <ClCompile Include="bytestrie.cpp" />
<ClCompile Include="bytestriebuilder.cpp" /> <ClCompile Include="bytestriebuilder.cpp" />
@ -494,8 +472,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="unicode\ubidi.h"> <CustomBuild Include="unicode\ubidi.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="localsvc.h" /> <ClInclude Include="localsvc.h" />
@ -529,6 +506,7 @@
<ClInclude Include="rbbiscan.h" /> <ClInclude Include="rbbiscan.h" />
<ClInclude Include="rbbisetb.h" /> <ClInclude Include="rbbisetb.h" />
<ClInclude Include="rbbitblb.h" /> <ClInclude Include="rbbitblb.h" />
<ClInclude Include="rbbi_cache.h" />
<ClInclude Include="dictionarydata.h" /> <ClInclude Include="dictionarydata.h" />
<CustomBuild Include="unicode\ubrk.h"> <CustomBuild Include="unicode\ubrk.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command> <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
@ -599,65 +577,54 @@
</CustomBuild> </CustomBuild>
<ClInclude Include="putilimp.h" /> <ClInclude Include="putilimp.h" />
<CustomBuild Include="unicode\std_string.h"> <CustomBuild Include="unicode\std_string.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="uassert.h" /> <ClInclude Include="uassert.h" />
<CustomBuild Include="unicode\uconfig.h"> <CustomBuild Include="unicode\uconfig.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\umachine.h"> <CustomBuild Include="unicode\umachine.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="umutex.h" /> <ClInclude Include="umutex.h" />
<ClInclude Include="uposixdefs.h" /> <ClInclude Include="uposixdefs.h" />
<CustomBuild Include="unicode\urename.h"> <CustomBuild Include="unicode\urename.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utrace.h"> <CustomBuild Include="unicode\utrace.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="utracimp.h" /> <ClInclude Include="utracimp.h" />
<CustomBuild Include="unicode\utypes.h"> <CustomBuild Include="unicode\utypes.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uvernum.h"> <CustomBuild Include="unicode\uvernum.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uversion.h"> <CustomBuild Include="unicode\uversion.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="wintz.h" /> <ClInclude Include="wintz.h" />
<CustomBuild Include="unicode\ucnv.h"> <CustomBuild Include="unicode\ucnv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucnv_bld.h" /> <ClInclude Include="ucnv_bld.h" />
<CustomBuild Include="unicode\ucnv_cb.h"> <CustomBuild Include="unicode\ucnv_cb.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucnv_cnv.h" /> <ClInclude Include="ucnv_cnv.h" />
<CustomBuild Include="unicode\ucnv_err.h"> <CustomBuild Include="unicode\ucnv_err.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucnv_ext.h" /> <ClInclude Include="ucnv_ext.h" />
@ -665,19 +632,16 @@
<ClInclude Include="ucnv_io.h" /> <ClInclude Include="ucnv_io.h" />
<ClInclude Include="ucnvmbcs.h" /> <ClInclude Include="ucnvmbcs.h" />
<CustomBuild Include="unicode\ucnvsel.h"> <CustomBuild Include="unicode\ucnvsel.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="cmemory.h" /> <ClInclude Include="cmemory.h" />
<CustomBuild Include="unicode\localpointer.h"> <CustomBuild Include="unicode\localpointer.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uclean.h"> <CustomBuild Include="unicode\uclean.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucln.h" /> <ClInclude Include="ucln.h" />
@ -685,99 +649,82 @@
<ClInclude Include="ucln_imp.h" /> <ClInclude Include="ucln_imp.h" />
<ClInclude Include="ucmndata.h" /> <ClInclude Include="ucmndata.h" />
<CustomBuild Include="unicode\udata.h"> <CustomBuild Include="unicode\udata.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="udatamem.h" /> <ClInclude Include="udatamem.h" />
<ClInclude Include="udataswp.h" /> <ClInclude Include="udataswp.h" />
<ClInclude Include="umapfile.h" /> <ClInclude Include="umapfile.h" />
<CustomBuild Include="unicode\uobject.h"> <CustomBuild Include="unicode\uobject.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\dtintrv.h"> <CustomBuild Include="unicode\dtintrv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\parseerr.h"> <CustomBuild Include="unicode\parseerr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\parsepos.h"> <CustomBuild Include="unicode\parsepos.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\umisc.h"> <CustomBuild Include="unicode\umisc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ustrfmt.h" /> <ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" /> <ClInclude Include="util.h" />
<CustomBuild Include="unicode\idna.h"> <CustomBuild Include="unicode\idna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="punycode.h" /> <ClInclude Include="punycode.h" />
<CustomBuild Include="unicode\uidna.h"> <CustomBuild Include="unicode\uidna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="locbased.h" /> <ClInclude Include="locbased.h" />
<CustomBuild Include="unicode\locid.h"> <CustomBuild Include="unicode\locid.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="locutil.h" /> <ClInclude Include="locutil.h" />
<CustomBuild Include="unicode\resbund.h"> <CustomBuild Include="unicode\resbund.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="sharedobject.h" /> <ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" /> <ClCompile Include="sharedobject.cpp" />
<CustomBuild Include="unicode\locdspnm.h"> <CustomBuild Include="unicode\locdspnm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\simpleformatter.h"> <CustomBuild Include="unicode\simpleformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ucat.h"> <CustomBuild Include="unicode\ucat.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\udisplaycontext.h"> <CustomBuild Include="unicode\udisplaycontext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uldnames.h"> <CustomBuild Include="unicode\uldnames.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uloc.h"> <CustomBuild Include="unicode\uloc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ulocimp.h" /> <ClInclude Include="ulocimp.h" />
<CustomBuild Include="unicode\ures.h"> <CustomBuild Include="unicode\ures.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="unifiedcache.h" /> <ClInclude Include="unifiedcache.h" />
@ -786,36 +733,30 @@
<ClInclude Include="ureslocs.h" /> <ClInclude Include="ureslocs.h" />
<ClInclude Include="resource.h" /> <ClInclude Include="resource.h" />
<CustomBuild Include="unicode\ucurr.h"> <CustomBuild Include="unicode\ucurr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucurrimp.h" /> <ClInclude Include="ucurrimp.h" />
<CustomBuild Include="unicode\caniter.h"> <CustomBuild Include="unicode\caniter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="norm2allmodes.h" /> <ClInclude Include="norm2allmodes.h" />
<CustomBuild Include="unicode\normalizer2.h"> <CustomBuild Include="unicode\normalizer2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="normalizer2impl.h" /> <ClInclude Include="normalizer2impl.h" />
<CustomBuild Include="unicode\normlzr.h"> <CustomBuild Include="unicode\normlzr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unorm.h"> <CustomBuild Include="unicode\unorm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unorm2.h"> <CustomBuild Include="unicode\unorm2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="unormimp.h" /> <ClInclude Include="unormimp.h" />
@ -825,58 +766,48 @@
<ClInclude Include="propname.h" /> <ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" /> <ClInclude Include="ruleiter.h" />
<CustomBuild Include="unicode\symtable.h"> <CustomBuild Include="unicode\symtable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ucase.h" /> <ClInclude Include="ucase.h" />
<CustomBuild Include="unicode\uchar.h"> <CustomBuild Include="unicode\uchar.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unifilt.h"> <CustomBuild Include="unicode\unifilt.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unifunct.h"> <CustomBuild Include="unicode\unifunct.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unimatch.h"> <CustomBuild Include="unicode\unimatch.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uniset.h"> <CustomBuild Include="unicode\uniset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="unisetspan.h" /> <ClInclude Include="unisetspan.h" />
<ClInclude Include="uprops.h" /> <ClInclude Include="uprops.h" />
<ClInclude Include="usc_impl.h" /> <ClInclude Include="usc_impl.h" />
<CustomBuild Include="unicode\uscript.h"> <CustomBuild Include="unicode\uscript.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uset.h"> <CustomBuild Include="unicode\uset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="uset_imp.h" /> <ClInclude Include="uset_imp.h" />
<CustomBuild Include="unicode\usetiter.h"> <CustomBuild Include="unicode\usetiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\icuplug.h"> <CustomBuild Include="unicode\icuplug.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="icuplugimp.h" /> <ClInclude Include="icuplugimp.h" />
@ -885,33 +816,28 @@
<ClInclude Include="servnotf.h" /> <ClInclude Include="servnotf.h" />
<ClInclude Include="sprpimpl.h" /> <ClInclude Include="sprpimpl.h" />
<CustomBuild Include="unicode\usprep.h"> <CustomBuild Include="unicode\usprep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\appendable.h"> <CustomBuild Include="unicode\appendable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="bytesinkutil.h" />
<CustomBuild Include="unicode\bytestream.h"> <CustomBuild Include="unicode\bytestream.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\bytestrie.h"> <CustomBuild Include="unicode\bytestrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\bytestriebuilder.h"> <CustomBuild Include="unicode\bytestriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\chariter.h"> <CustomBuild Include="unicode\chariter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="charstr.h" /> <ClInclude Include="charstr.h" />
@ -919,116 +845,94 @@
<ClInclude Include="cstr.h" /> <ClInclude Include="cstr.h" />
<ClInclude Include="cwchar.h" /> <ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\messagepattern.h"> <CustomBuild Include="unicode\messagepattern.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\rep.h"> <CustomBuild Include="unicode\rep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\schriter.h"> <CustomBuild Include="unicode\schriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\stringpiece.h"> <CustomBuild Include="unicode\stringpiece.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\stringtriebuilder.h"> <CustomBuild Include="unicode\stringtriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ucasemap.h"> <CustomBuild Include="unicode\ucasemap.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ucharstrie.h"> <CustomBuild Include="unicode\ucharstrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ucharstriebuilder.h"> <CustomBuild Include="unicode\ucharstriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\uchriter.h"> <CustomBuild Include="unicode\uchriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="uinvchar.h" /> <ClInclude Include="uinvchar.h" />
<CustomBuild Include="unicode\uiter.h"> <CustomBuild Include="unicode\uiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\unistr.h"> <CustomBuild Include="unicode\unistr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\urep.h"> <CustomBuild Include="unicode\urep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<ClInclude Include="ustr_cnv.h" /> <ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" /> <ClInclude Include="ustr_imp.h" />
<CustomBuild Include="unicode\ustring.h"> <CustomBuild Include="unicode\ustring.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ustringtrie.h"> <CustomBuild Include="unicode\ustringtrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utext.h"> <CustomBuild Include="unicode\utext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utf.h"> <CustomBuild Include="unicode\utf.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utf16.h"> <CustomBuild Include="unicode\utf16.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utf32.h"> <CustomBuild Include="unicode\utf32.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utf8.h"> <CustomBuild Include="unicode\utf8.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\utf_old.h"> <CustomBuild Include="unicode\utf_old.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\listformatter.h"> <CustomBuild Include="unicode\listformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
<CustomBuild Include="unicode\ulistformatter.h"> <CustomBuild Include="unicode\ulistformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode <Command>copy "%(FullPath)" ..\..\include\unicode</Command>
</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs> <Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild> </CustomBuild>
</ItemGroup> </ItemGroup>

Просмотреть файл

@ -46,9 +46,9 @@ int32_t
DictionaryBreakEngine::findBreaks( UText *text, DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0; int32_t result = 0;
// Find the span of characters included in the set. // Find the span of characters included in the set.
@ -60,34 +60,12 @@ DictionaryBreakEngine::findBreaks( UText *text,
int32_t rangeStart; int32_t rangeStart;
int32_t rangeEnd; int32_t rangeEnd;
UChar32 c = utext_current32(text); UChar32 c = utext_current32(text);
if (reverse) { while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
UBool isDict = fSet.contains(c); utext_next32(text); // TODO: recast loop for postincrement
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) { c = utext_current32(text);
c = utext_previous32(text);
isDict = fSet.contains(c);
}
if (current < startPos) {
rangeStart = startPos;
} else {
rangeStart = current;
if (!isDict) {
utext_next32(text);
rangeStart = (int32_t)utext_getNativeIndex(text);
}
}
// rangeEnd = start + 1;
utext_setNativeIndex(text, start);
utext_next32(text);
rangeEnd = (int32_t)utext_getNativeIndex(text);
}
else {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
} }
rangeStart = start;
rangeEnd = current;
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current); utext_setNativeIndex(text, current);
@ -248,7 +226,7 @@ int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text, ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
utext_setNativeIndex(text, rangeStart); utext_setNativeIndex(text, rangeStart);
utext_moveIndex32(text, THAI_MIN_WORD_SPAN); utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
if (utext_getNativeIndex(text) >= rangeEnd) { if (utext_getNativeIndex(text) >= rangeEnd) {
@ -487,7 +465,7 @@ int32_t
LaoBreakEngine::divideUpDictionaryRange( UText *text, LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -680,7 +658,7 @@ int32_t
BurmeseBreakEngine::divideUpDictionaryRange( UText *text, BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -885,7 +863,7 @@ int32_t
KhmerBreakEngine::divideUpDictionaryRange( UText *text, KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words return 0; // Not enough characters for two words
} }
@ -1110,9 +1088,9 @@ static inline uint32_t getKatakanaCost(int32_t wordLength){
return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength]; return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
} }
static inline bool isKatakana(uint16_t value) { static inline bool isKatakana(UChar32 value) {
return (value >= 0x30A1u && value <= 0x30FEu && value != 0x30FBu) || return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
(value >= 0xFF66u && value <= 0xFF9fu); (value >= 0xFF66 && value <= 0xFF9f);
} }
@ -1128,14 +1106,14 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
* @param text A UText representing the text * @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters * @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters * @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0 * @param foundBreaks vector<int32> to receive the break positions
* @return The number of breaks found * @return The number of breaks found
*/ */
int32_t int32_t
CjkBreakEngine::divideUpDictionaryRange( UText *inText, CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const { UVector32 &foundBreaks ) const {
if (rangeStart >= rangeEnd) { if (rangeStart >= rangeEnd) {
return 0; return 0;
} }
@ -1405,6 +1383,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
prevCPPos = cpPos; prevCPPos = cpPos;
prevUTextPos = utextPos; prevUTextPos = utextPos;
} }
(void)prevCPPos; // suppress compiler warnings about unused variable
// inString goes out of scope // inString goes out of scope
// inputMap goes out of scope // inputMap goes out of scope

Просмотреть файл

@ -15,6 +15,7 @@
#include "unicode/utext.h" #include "unicode/utext.h"
#include "brkeng.h" #include "brkeng.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -84,21 +85,18 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
* *
* @param text A UText representing the text. The iterator is left at * @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling * the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range. * that starts from the first character in the range.
* @param startPos The start of the run within the supplied text. * @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text. * @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1. * @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any * @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found. * @return The number of breaks found.
*/ */
virtual int32_t findBreaks( UText *text, virtual int32_t findBreaks( UText *text,
int32_t startPos, int32_t startPos,
int32_t endPos, int32_t endPos,
UBool reverse,
int32_t breakType, int32_t breakType,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
protected: protected:
@ -128,7 +126,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const = 0; UVector32 &foundBreaks ) const = 0;
}; };
@ -185,7 +183,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -241,7 +239,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -297,7 +295,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -353,7 +351,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };
@ -417,7 +415,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text, virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart, int32_t rangeStart,
int32_t rangeEnd, int32_t rangeEnd,
UStack &foundBreaks ) const; UVector32 &foundBreaks ) const;
}; };

Просмотреть файл

@ -17,10 +17,10 @@ namespace {
const int32_t MAX_UNCHANGED_LENGTH = 0x1000; const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units. // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
// No length change. const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
const int32_t MAX_SHORT_WIDTH = 6; const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff; const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
const int32_t MAX_SHORT_CHANGE = 0x6fff; const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n. // 0111mmmmmmnnnnnn records a replacement of m text units with n.
@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace } // namespace
Edits::~Edits() { void Edits::releaseArray() U_NOEXCEPT {
if(array != stackArray) { if (array != stackArray) {
uprv_free(array); uprv_free(array);
} }
} }
void Edits::reset() { Edits &Edits::copyArray(const Edits &other) {
length = delta = 0; if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
if (length > capacity) {
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
if (newArray == nullptr) {
length = delta = numChanges = 0;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
releaseArray();
array = newArray;
capacity = length;
}
if (length > 0) {
uprv_memcpy(array, other.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
releaseArray();
if (length > STACK_CAPACITY) {
array = src.array;
capacity = src.capacity;
src.array = src.stackArray;
src.capacity = STACK_CAPACITY;
src.reset();
return *this;
}
array = stackArray;
capacity = STACK_CAPACITY;
if (length > 0) {
uprv_memcpy(array, src.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::operator=(const Edits &other) {
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
errorCode_ = other.errorCode_;
return copyArray(other);
}
Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
length = src.length;
delta = src.delta;
numChanges = src.numChanges;
errorCode_ = src.errorCode_;
return moveArray(src);
}
Edits::~Edits() {
releaseArray();
}
void Edits::reset() U_NOEXCEPT {
length = delta = numChanges = 0;
errorCode_ = U_ZERO_ERROR;
} }
void Edits::addUnchanged(int32_t unchangedLength) { void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; } if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
if(unchangedLength < 0) { if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR; errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
// Merge into previous unchanged-text record, if any. // Merge into previous unchanged-text record, if any.
@ -72,38 +137,41 @@ void Edits::addUnchanged(int32_t unchangedLength) {
} }
void Edits::addReplace(int32_t oldLength, int32_t newLength) { void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; } if(U_FAILURE(errorCode_)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(oldLength < 0 || newLength < 0) { if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR; errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return; return;
} }
if (oldLength == 0 && newLength == 0) { if (oldLength == 0 && newLength == 0) {
return; return;
} }
++numChanges;
int32_t newDelta = newLength - oldLength; int32_t newDelta = newLength - oldLength;
if (newDelta != 0) { if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow. // Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return; return;
} }
delta += newDelta; delta += newDelta;
} }
if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
// Merge into previous same-lengths short-replacement record, if any.
int32_t u = (oldLength << 12) | (newLength << 9);
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last & ~SHORT_CHANGE_NUM_MASK) == u &&
(last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
setLastUnit(last + 1);
return;
}
append(u);
return;
}
int32_t head = 0x7000; int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6; head |= oldLength << 6;
@ -149,7 +217,7 @@ UBool Edits::growArray() {
} else if (capacity == INT32_MAX) { } else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow. // with a result-string-buffer overflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE; return FALSE;
} else if (capacity >= (INT32_MAX / 2)) { } else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX; newCapacity = INT32_MAX;
@ -158,18 +226,16 @@ UBool Edits::growArray() {
} }
// Grow by at least 5 units so that a maximal change record will fit. // Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) { if ((newCapacity - capacity) < 5) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE; return FALSE;
} }
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2); uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) { if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR; errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE; return FALSE;
} }
uprv_memcpy(newArray, array, (size_t)length * 2); uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) { releaseArray();
uprv_free(array);
}
array = newArray; array = newArray;
capacity = newCapacity; capacity = newCapacity;
return TRUE; return TRUE;
@ -177,27 +243,161 @@ UBool Edits::growArray() {
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; } if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; } if (U_SUCCESS(errorCode_)) { return FALSE; }
outErrorCode = errorCode; outErrorCode = errorCode_;
return TRUE; return TRUE;
} }
UBool Edits::hasChanges() const { Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
if (delta != 0) { if (copyErrorTo(errorCode)) { return *this; }
return TRUE; // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
} // Parallel iteration over both Edits.
for (int32_t i = 0; i < length; ++i) { Iterator abIter = ab.getFineIterator();
if (array[i] > MAX_UNCHANGED) { Iterator bcIter = bc.getFineIterator();
return TRUE; UBool abHasNext = TRUE, bcHasNext = TRUE;
// Copy iterator state into local variables, so that we can modify and subdivide spans.
// ab old & new length, bc old & new length
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
// When we have different-intermediate-length changes, we accumulate a larger change.
int32_t pending_aLength = 0, pending_cLength = 0;
for (;;) {
// At this point, for each of the two iterators:
// Either we are done with the locally cached current edit,
// and its intermediate-string length has been reset,
// or we will continue to work with a truncated remainder of this edit.
//
// If the current edit is done, and the iterator has not yet reached the end,
// then we fetch the next edit. This is true for at least one of the iterators.
//
// Normally it does not matter whether we fetch from ab and then bc or vice versa.
// However, the result is observably different when
// ab deletions meet bc insertions at the same intermediate-string index.
// Some users expect the bc insertions to come first, so we fetch from bc first.
if (bc_bLength == 0) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
bc_bLength = bcIter.oldLength();
cLength = bcIter.newLength();
if (bc_bLength == 0) {
// insertion
if (ab_bLength == 0 || !abIter.hasChange()) {
addReplace(pending_aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_cLength += cLength;
}
continue;
}
}
// else see if the other iterator is done, too.
}
if (ab_bLength == 0) {
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
aLength = abIter.oldLength();
ab_bLength = abIter.newLength();
if (ab_bLength == 0) {
// deletion
if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
addReplace(pending_aLength + aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_aLength += aLength;
}
continue;
}
} else if (bc_bLength == 0) {
// Both iterators are done at the same time:
// The intermediate-string lengths match.
break;
} else {
// The ab output string is shorter than the bc input string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
}
if (bc_bLength == 0) {
// The bc input string is shorter than the ab output string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
// Done fetching: ab_bLength > 0 && bc_bLength > 0
// The current state has two parts:
// - Past: We accumulate a longer ac edit in the "pending" variables.
// - Current: We have copies of the current ab/bc edits in local variables.
// At least one side is newly fetched.
// One side might be a truncated remainder of an edit we fetched earlier.
if (!abIter.hasChange() && !bcIter.hasChange()) {
// An unchanged span all the way from string a to string c.
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
}
int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
addUnchanged(unchangedLength);
ab_bLength = aLength -= unchangedLength;
bc_bLength = cLength -= unchangedLength;
// At least one of the unchanged spans is now empty.
continue;
}
if (!abIter.hasChange() && bcIter.hasChange()) {
// Unchanged a->b but changed b->c.
if (ab_bLength >= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
aLength = ab_bLength -= bc_bLength;
bc_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else if (abIter.hasChange() && !bcIter.hasChange()) {
// Changed a->b and then unchanged b->c.
if (ab_bLength <= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
pending_aLength = pending_cLength = 0;
cLength = bc_bLength -= ab_bLength;
ab_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else { // both abIter.hasChange() && bcIter.hasChange()
if (ab_bLength == bc_bLength) {
// Changes on both sides up to the same position. Emit & reset.
addReplace(pending_aLength + aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
ab_bLength = bc_bLength = 0;
continue;
}
}
// Accumulate the a->c change, reset the shorter side,
// keep a remainder of the longer one.
pending_aLength += aLength;
pending_cLength += cLength;
if (ab_bLength < bc_bLength) {
bc_bLength -= ab_bLength;
cLength = ab_bLength = 0;
} else { // ab_bLength > bc_bLength
ab_bLength -= bc_bLength;
aLength = bc_bLength = 0;
} }
} }
return FALSE; if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
}
copyErrorTo(errorCode);
return *this;
} }
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0), array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs), onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0), dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {} srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) { int32_t Edits::Iterator::readLength(int32_t head) {
@ -219,7 +419,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
} }
} }
void Edits::Iterator::updateIndexes() { void Edits::Iterator::updateNextIndexes() {
srcIndex += oldLength_; srcIndex += oldLength_;
if (changed) { if (changed) {
replIndex += newLength_; replIndex += newLength_;
@ -227,22 +427,52 @@ void Edits::Iterator::updateIndexes() {
destIndex += newLength_; destIndex += newLength_;
} }
void Edits::Iterator::updatePreviousIndexes() {
srcIndex -= oldLength_;
if (changed) {
replIndex -= newLength_;
}
destIndex -= newLength_;
}
UBool Edits::Iterator::noNext() { UBool Edits::Iterator::noNext() {
// No change beyond the string. // No change before or beyond the string.
dir = 0;
changed = FALSE; changed = FALSE;
oldLength_ = newLength_ = 0; oldLength_ = newLength_ = 0;
return FALSE; return FALSE;
} }
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Forward iteration: Update the string indexes to the limit of the current span,
// and post-increment-read array units to assemble a new span.
// Leaves the array index one after the last unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; } if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows. // We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere. // It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes(); if (dir > 0) {
if (remaining > 0) { updateNextIndexes();
// Fine-grained iterator: Continue a sequence of equal-length changes. } else {
--remaining; if (dir < 0) {
return TRUE; // Turn around from previous() to next().
// Post-increment-read the same span again.
if (remaining > 0) {
// Fine-grained iterator:
// Stay on the current one of a sequence of compressed changes.
++index; // next() rests on the index after the sequence unit.
dir = 1;
return TRUE;
}
}
dir = 1;
}
if (remaining >= 1) {
// Fine-grained iterator: Continue a sequence of compressed changes.
if (remaining > 1) {
--remaining;
return TRUE;
}
remaining = 0;
} }
if (index >= length) { if (index >= length) {
return noNext(); return noNext();
@ -258,7 +488,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
} }
newLength_ = oldLength_; newLength_ = oldLength_;
if (onlyChanges) { if (onlyChanges) {
updateIndexes(); updateNextIndexes();
if (index >= length) { if (index >= length) {
return noNext(); return noNext();
} }
@ -270,14 +500,19 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
} }
changed = TRUE; changed = TRUE;
if (u <= MAX_SHORT_CHANGE) { if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) { if (coarse) {
int32_t w = u >> 12; oldLength_ = num * oldLen;
int32_t len = (u & 0xfff) + 1; newLength_ = num * newLen;
oldLength_ = newLength_ = len * w;
} else { } else {
// Split a sequence of equal-length changes that was compressed into one unit. // Split a sequence of changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12; oldLength_ = oldLen;
remaining = u & 0xfff; newLength_ = newLen;
if (num > 1) {
remaining = num; // This is the first of two or more changes.
}
return TRUE; return TRUE;
} }
} else { } else {
@ -292,55 +527,250 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
while (index < length && (u = array[index]) > MAX_UNCHANGED) { while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index; ++index;
if (u <= MAX_SHORT_CHANGE) { if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12; int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
int32_t len = (u & 0xfff) + 1; oldLength_ += (u >> 12) * num;
len = len * w; newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
oldLength_ += len;
newLength_ += len;
} else { } else {
U_ASSERT(u <= 0x7fff); U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f); oldLength_ += readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f); newLength_ += readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
} }
} }
return TRUE; return TRUE;
} }
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) { UBool Edits::Iterator::previous(UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; } // Backward iteration: Pre-decrement-read array units to assemble a new span,
if (i < srcIndex) { // then update the string indexes to the start of that span.
// Reset the iterator to the start. // Leaves the array index on the head unit of that span.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; if (U_FAILURE(errorCode)) { return FALSE; }
} else if (i < (srcIndex + oldLength_)) { // We have an errorCode in case we need to start guarding against integer overflows.
// The index is in the current span. // It is also convenient for caller loops if we bail out when an error was set elsewhere.
return TRUE; if (dir >= 0) {
} if (dir > 0) {
while (next(FALSE, errorCode)) { // Turn around from next() to previous().
if (i < (srcIndex + oldLength_)) { // Set the string indexes to the span limit and
// The index is in the current span. // pre-decrement-read the same span again.
return TRUE; if (remaining > 0) {
} // Fine-grained iterator:
if (remaining > 0) { // Stay on the current one of a sequence of compressed changes.
// Is the index in one of the remaining compressed edits? --index; // previous() rests on the sequence unit.
// srcIndex is the start of the current span, before the remaining ones. dir = -1;
int32_t len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
return TRUE; return TRUE;
} }
updateNextIndexes();
}
dir = -1;
}
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of compressed changes.
int32_t u = array[index];
U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
++remaining;
updatePreviousIndexes();
return TRUE;
}
remaining = 0;
}
if (index <= 0) {
return noNext();
}
int32_t u = array[--index];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
--index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
// No need to handle onlyChanges as long as previous() is called only from findIndex().
updatePreviousIndexes();
return TRUE;
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) {
oldLength_ = num * oldLen;
newLength_ = num * newLen;
} else {
// Split a sequence of changes that was compressed into one unit.
oldLength_ = oldLen;
newLength_ = newLen;
if (num > 1) {
remaining = 1; // This is the last of two or more changes.
}
updatePreviousIndexes();
return TRUE;
}
} else {
if (u <= 0x7fff) {
// The change is encoded in u alone.
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
} else {
// Back up to the head of the change, read the lengths,
// and reset the index to the head again.
U_ASSERT(index > 0);
while ((u = array[--index]) > 0x7fff) {}
U_ASSERT(u > MAX_SHORT_CHANGE);
int32_t headIndex = index++;
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
index = headIndex;
}
if (!coarse) {
updatePreviousIndexes();
return TRUE;
}
}
// Combine adjacent changes.
while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
--index;
if (u <= MAX_SHORT_CHANGE) {
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
oldLength_ += (u >> 12) * num;
newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
} else if (u <= 0x7fff) {
// Read the lengths, and reset the index to the head again.
int32_t headIndex = index++;
oldLength_ += readLength((u >> 6) & 0x3f);
newLength_ += readLength(u & 0x3f);
index = headIndex;
}
}
updatePreviousIndexes();
return TRUE;
}
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return -1; }
int32_t spanStart, spanLength;
if (findSource) { // find source index
spanStart = srcIndex;
spanLength = oldLength_;
} else { // find destination index
spanStart = destIndex;
spanLength = newLength_;
}
if (i < spanStart) {
if (i >= (spanStart / 2)) {
// Search backwards.
for (;;) {
UBool hasPrevious = previous(errorCode);
U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0
(void)hasPrevious; // avoid unused-variable warning
spanStart = findSource ? srcIndex : destIndex;
if (i >= spanStart) {
// The index is in the current span.
return 0;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// spanStart is the start of the current span, first of the remaining ones.
spanLength = findSource ? oldLength_ : newLength_;
int32_t u = array[index];
U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
int32_t len = num * spanLength;
if (i >= (spanStart - len)) {
int32_t n = ((spanStart - i - 1) / spanLength) + 1;
// 1 <= n <= num
srcIndex -= n * oldLength_;
replIndex -= n * newLength_;
destIndex -= n * newLength_;
remaining += n;
return 0;
}
// Skip all of these edits at once.
srcIndex -= num * oldLength_;
replIndex -= num * newLength_;
destIndex -= num * newLength_;
remaining = 0;
}
}
}
// Reset the iterator to the start.
dir = 0;
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
while (next(FALSE, errorCode)) {
if (findSource) {
spanStart = srcIndex;
spanLength = oldLength_;
} else {
spanStart = destIndex;
spanLength = newLength_;
}
if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
if (remaining > 1) {
// Is the index in one of the remaining compressed edits?
// spanStart is the start of the current span, first of the remaining ones.
int32_t len = remaining * spanLength;
if (i < (spanStart + len)) {
int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1
srcIndex += n * oldLength_;
replIndex += n * newLength_;
destIndex += n * newLength_;
remaining -= n;
return 0;
}
// Make next() skip all of these edits at once. // Make next() skip all of these edits at once.
oldLength_ = newLength_ = len; oldLength_ *= remaining;
newLength_ *= remaining;
remaining = 0; remaining = 0;
} }
} }
return FALSE; return 1;
}
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, TRUE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == srcIndex) {
// At or after string length, or at start of the found span.
return destIndex;
}
if (changed) {
// In a change span, map to its end.
return destIndex + newLength_;
} else {
// In an unchanged span, offset 1:1 within it.
return destIndex + (i - srcIndex);
}
}
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, FALSE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == destIndex) {
// At or after string length, or at start of the found span.
return srcIndex;
}
if (changed) {
// In a change span, map to its end.
return srcIndex + oldLength_;
} else {
// In an unchanged span, offset within it.
return srcIndex + (i - destIndex);
}
} }
U_NAMESPACE_END U_NAMESPACE_END

Просмотреть файл

@ -694,7 +694,7 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
} }
FilteredBreakIteratorBuilder * FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL; if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
return (U_SUCCESS(status))? ret.orphan(): NULL; return (U_SUCCESS(status))? ret.orphan(): NULL;

Просмотреть файл

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm.h" #include "unicode/unorm.h"
@ -85,6 +87,52 @@ FilteredNormalizer2::normalize(const UnicodeString &src,
return dest; return dest;
} }
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
options |= U_EDITS_NO_RESET; // Do not reset for each span.
normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
}
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const {
while (length > 0) {
int32_t spanLength = set.spanUTF8(src, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
if (spanLength != 0) {
if (edits != nullptr) {
edits->addUnchanged(spanLength);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src, spanLength);
}
}
spanCondition = USET_SPAN_SIMPLE;
} else {
if (spanLength != 0) {
// Not norm2.normalizeSecondAndAppend() because we do not want
// to modify the non-filter part of dest.
norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
if (U_FAILURE(errorCode)) {
break;
}
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
src += spanLength;
length -= spanLength;
}
}
UnicodeString & UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
@ -196,6 +244,31 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
return TRUE; return TRUE;
} }
UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const char *s = sp.data();
int32_t length = sp.length();
USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
while (length > 0) {
int32_t spanLength = set.spanUTF8(s, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
spanCondition = USET_SPAN_SIMPLE;
} else {
if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
U_FAILURE(errorCode)) {
return FALSE;
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
s += spanLength;
length -= spanLength;
}
return TRUE;
}
UNormalizationCheckResult UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode); uprv_checkCanGetBuffer(s, errorCode);

Просмотреть файл

@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status); inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
public: public:
/** /**
* Construct a hashtable * Construct a hashtable
@ -41,6 +43,14 @@ public:
*/ */
Hashtable(UBool ignoreKeyCase, UErrorCode& status); Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param size initial size allocation
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
/** /**
* Construct a hashtable * Construct a hashtable
* @param keyComp Comparator for comparing the keys * @param keyComp Comparator for comparing the keys
@ -76,9 +86,9 @@ public:
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status); int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const; void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const; int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key); void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key); int32_t removei(const UnicodeString& key);
@ -92,9 +102,9 @@ public:
* @see uhash_nextElement * @see uhash_nextElement
*/ */
const UHashElement* nextElement(int32_t& pos) const; const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyComparator(UKeyComparator*keyComp); UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
UValueComparator* setValueComparator(UValueComparator* valueComp); UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const; UBool equals(const Hashtable& that) const;
@ -107,7 +117,7 @@ private:
* Implementation * Implementation
********************************************************************/ ********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp, inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) { UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return; return;
@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
} }
} }
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, int32_t size, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) { UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status); init( uhash_hashUnicodeString, keyComp, valueComp, status);
} }
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status) inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0) : hash(0)
{ {
@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
status); status);
} }
inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
: hash(0)
{
initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL, size,
status);
}
inline Hashtable::Hashtable(UErrorCode& status) inline Hashtable::Hashtable(UErrorCode& status)
: hash(0) : hash(0)
{ {
@ -200,7 +234,7 @@ inline void Hashtable::removeAll(void) {
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){ inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp); return uhash_setKeyComparator(hash, keyComp);
} }
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){ inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp); return uhash_setValueComparator(hash, valueComp);
} }

Просмотреть файл

@ -63,7 +63,7 @@ ListFormatInternal(const ListFormatInternal &other) :
static Hashtable* listPatternHash = NULL; static Hashtable* listPatternHash = NULL;
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER; static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
static const char *STANDARD_STYLE = "standard"; static const char STANDARD_STYLE[] = "standard";
U_CDECL_BEGIN U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() { static UBool U_CALLCONV uprv_listformatter_cleanup() {

Просмотреть файл

@ -62,7 +62,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 && pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2 pInfo->formatVersion[0]==3
) { ) {
// Normalizer2Impl *me=(Normalizer2Impl *)context; // Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes; const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) { if(indexesLength<=IX_MIN_LCCC_CP) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return; return;
} }

Просмотреть файл

@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
static icu::Locale* availableLocaleList = NULL; static icu::Locale* availableLocaleList = NULL;
static int32_t availableLocaleListCount; static int32_t availableLocaleListCount;
static icu::UInitOnce gInitOnce = U_INITONCE_INITIALIZER; static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
U_NAMESPACE_END U_NAMESPACE_END
@ -50,7 +50,7 @@ static UBool U_CALLCONV locale_available_cleanup(void)
availableLocaleList = NULL; availableLocaleList = NULL;
} }
availableLocaleListCount = 0; availableLocaleListCount = 0;
gInitOnce.reset(); gInitOnceLocale.reset();
return TRUE; return TRUE;
} }
@ -81,7 +81,7 @@ void U_CALLCONV locale_available_init() {
const Locale* U_EXPORT2 const Locale* U_EXPORT2
Locale::getAvailableLocales(int32_t& count) Locale::getAvailableLocales(int32_t& count)
{ {
umtx_initOnce(gInitOnce, &locale_available_init); umtx_initOnce(gInitOnceLocale, &locale_available_init);
count = availableLocaleListCount; count = availableLocaleListCount;
return availableLocaleList; return availableLocaleList;
} }

Просмотреть файл

@ -542,7 +542,7 @@ uloc_getDisplayName(const char *locale,
return 0; return 0;
} }
separator = (const UChar *)p0 + subLen; separator = (const UChar *)p0 + subLen;
sepLen = p1 - separator; sepLen = static_cast<int32_t>(p1 - separator);
} }
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
@ -558,8 +558,8 @@ uloc_getDisplayName(const char *locale,
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
} }
sub0Pos=p0-pattern; sub0Pos = static_cast<int32_t>(p0-pattern);
sub1Pos=p1-pattern; sub1Pos = static_cast<int32_t>(p1-pattern);
if (sub1Pos < sub0Pos) { /* a very odd pattern */ if (sub1Pos < sub0Pos) { /* a very odd pattern */
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
langi=1; langi=1;
@ -821,6 +821,8 @@ uloc_getDisplayKeywordValue( const char* locale,
/* get the keyword value */ /* get the keyword value */
keywordValue[0]=0; keywordValue[0]=0;
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status); keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
if (*status == U_STRING_NOT_TERMINATED_WARNING)
*status = U_BUFFER_OVERFLOW_ERROR;
/* /*
* if the keyword is equal to currency .. then to get the display name * if the keyword is equal to currency .. then to get the display name

Просмотреть файл

@ -54,7 +54,7 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
*p = 0; *p = 0;
va_end(args); va_end(args);
return p - buffer; return static_cast<int32_t>(p - buffer);
} }
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -636,8 +636,9 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
const char* key; const char* key;
while ((key = e->next((int32_t *)0, status)) != NULL) { while ((key = e->next((int32_t *)0, status)) != NULL) {
value[0] = 0;
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
if (U_FAILURE(status)) { if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
return result; return result;
} }
keyDisplayName(key, temp, TRUE); keyDisplayName(key, temp, TRUE);

Просмотреть файл

@ -511,7 +511,7 @@ parseTagString(
unknownLanguage); unknownLanguage);
*langLength = (int32_t)uprv_strlen(lang); *langLength = (int32_t)uprv_strlen(lang);
} }
else if (_isIDSeparator(*position)) { if (_isIDSeparator(*position)) {
++position; ++position;
} }
@ -1281,7 +1281,7 @@ uloc_minimizeSubtags(const char* localeID,
// Pairs of (language subtag, + or -) for finding out fast if common languages // Pairs of (language subtag, + or -) for finding out fast if common languages
// are LTR (minus) or RTL (plus). // are LTR (minus) or RTL (plus).
static const char* LANG_DIR_STRING = static const char LANG_DIR_STRING[] =
"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
// Implemented here because this calls uloc_addLikelySubtags(). // Implemented here because this calls uloc_addLikelySubtags().

Просмотреть файл

@ -190,7 +190,10 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG) ILCID_POSIX_SUBTABLE(bin) {
{0x66, "bin"},
{0x0466, "bin_NG"}
};
ILCID_POSIX_SUBTABLE(bn) { ILCID_POSIX_SUBTABLE(bn) {
{0x45, "bn"}, {0x45, "bn"},
@ -214,7 +217,13 @@ ILCID_POSIX_SUBTABLE(ca) {
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
ILCID_POSIX_SUBTABLE(chr) {
{0x05c, "chr"},
{0x7c5c, "chr_Cher"},
{0x045c, "chr_Cher_US"},
{0x045c, "chr_US"}
};
// ICU has chosen different names for these. // ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb) { ILCID_POSIX_SUBTABLE(ckb) {
@ -263,10 +272,10 @@ ILCID_POSIX_SUBTABLE(en) {
{0x2C09, "en_TT"}, {0x2C09, "en_TT"},
{0x0409, "en_US"}, {0x0409, "en_US"},
{0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ {0x2409, "en_029"},
{0x1c09, "en_ZA"}, {0x1c09, "en_ZA"},
{0x3009, "en_ZW"}, {0x3009, "en_ZW"},
{0x2409, "en_029"}, {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
@ -419,7 +428,12 @@ ILCID_POSIX_SUBTABLE(hsb) {
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
ILCID_POSIX_SUBTABLE(ibb) {
{0x69, "ibb"},
{0x0469, "ibb_NG"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
@ -458,13 +472,18 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
{0x60, "ks"}, {0x60, "ks"},
{0x0860, "ks_IN"}, /* Documentation doesn't mention script */
{0x0460, "ks_Arab_IN"}, {0x0460, "ks_Arab_IN"},
{0x0860, "ks_Deva_IN"} {0x0860, "ks_Deva_IN"}
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
ILCID_POSIX_SUBTABLE(la) {
{0x76, "la"},
{0x0476, "la_001"},
{0x0476, "la_IT"} /*Left in for compatibility*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
@ -535,15 +554,19 @@ ILCID_POSIX_SUBTABLE(or_IN) {
{0x0448, "or_IN"}, {0x0448, "or_IN"},
}; };
ILCID_POSIX_SUBTABLE(pa) { ILCID_POSIX_SUBTABLE(pa) {
{0x46, "pa"}, {0x46, "pa"},
{0x0446, "pa_IN"}, {0x0446, "pa_IN"},
{0x0846, "pa_PK"}, {0x0846, "pa_Arab_PK"},
{0x0846, "pa_Arab_PK"} {0x0846, "pa_PK"}
};
ILCID_POSIX_SUBTABLE(pap) {
{0x79, "pap"},
{0x0479, "pap_029"},
{0x0479, "pap_AN"} /*Left in for compatibility*/
}; };
ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
@ -619,9 +642,11 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
ILCID_POSIX_SUBTABLE(sd) { ILCID_POSIX_SUBTABLE(sd) {
{0x59, "sd"}, {0x59, "sd"},
{0x0459, "sd_IN"},
{0x0459, "sd_Deva_IN"}, {0x0459, "sd_Deva_IN"},
{0x0859, "sd_PK"} {0x0459, "sd_IN"},
{0x0859, "sd_Arab_PK"},
{0x0859, "sd_PK"},
{0x7c59, "sd_Arab"}
}; };
ILCID_POSIX_SUBTABLE(se) { ILCID_POSIX_SUBTABLE(se) {
@ -645,9 +670,8 @@ ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */ ILCID_POSIX_SUBTABLE(so) {
{0x77, "so"}, {0x77, "so"},
{0x0477, "so_ET"},
{0x0477, "so_SO"} {0x0477, "so_SO"}
}; };
@ -739,7 +763,12 @@ ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
ILCID_POSIX_SUBTABLE(yi) {
{0x003d, "yi"},
{0x043d, "yi_001"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
// Windows & ICU tend to different names for some of these // Windows & ICU tend to different names for some of these
@ -1103,7 +1132,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
} }
if (pPosixID) { if (pPosixID) {
int32_t resLen = uprv_strlen(pPosixID); int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
uprv_memcpy(posixID, pPosixID, copyLen); uprv_memcpy(posixID, pPosixID, copyLen);
if (resLen < posixIDCapacity) { if (resLen < posixIDCapacity) {
@ -1177,7 +1206,7 @@ uprv_convertToLCIDPlatform(const char* localeID)
char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus); (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
if (U_SUCCESS(myStatus)) if (U_SUCCESS(myStatus))
{ {
@ -1214,6 +1243,8 @@ uprv_convertToLCIDPlatform(const char* localeID)
} }
} }
} }
#else
(void)localeID; // Suppress unused variable warning.
#endif /* USE_WINDOWS_LCID_MAPPING_API */ #endif /* USE_WINDOWS_LCID_MAPPING_API */
// No found, or not implemented on platforms without native name->lcid conversion // No found, or not implemented on platforms without native name->lcid conversion

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (C) 2014, International Business Machines * Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved. * Corporation and others. All Rights Reserved.
******************************************************************************* *******************************************************************************
* loadednormalizer2impl.h * norm2allmodes.h
* *
* created on: 2014sep07 * created on: 2014sep07
* created by: Markus W. Scherer * created by: Markus W. Scherer
@ -18,7 +18,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "cpputils.h" #include "cpputils.h"
#include "normalizer2impl.h" #include "normalizer2impl.h"
@ -210,8 +212,8 @@ private:
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
} }
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
}; };
@ -224,19 +226,35 @@ public:
private: private:
virtual void virtual void
normalize(const UChar *src, const UChar *limit, normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const { ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
} }
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
&sink, edits, errorCode);
sink.Flush();
}
virtual void virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle, UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const { ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
} }
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return FALSE; return FALSE;
} }
@ -252,8 +270,16 @@ private:
} }
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
} }
virtual UBool
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
}
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return UNORM_MAYBE; return UNORM_MAYBE;
} }
@ -267,21 +293,21 @@ private:
return qcResult; return qcResult;
} }
virtual const UChar * virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
} }
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
return impl.getCompQuickCheck(impl.getNorm16(c)); return impl.getCompQuickCheck(impl.getNorm16(c));
} }
virtual UBool hasBoundaryBefore(UChar32 c) const { virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryBefore(c); return impl.hasCompBoundaryBefore(c);
} }
virtual UBool hasBoundaryAfter(UChar32 c) const { virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); return impl.hasCompBoundaryAfter(c, onlyContiguous);
} }
virtual UBool isInert(UChar32 c) const { virtual UBool isInert(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); return impl.isCompInert(c, onlyContiguous);
} }
const UBool onlyContiguous; const UBool onlyContiguous;

Просмотреть файл

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h" #include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm.h" #include "unicode/unorm.h"
#include "cstring.h" #include "cstring.h"
@ -42,6 +44,20 @@ U_NAMESPACE_BEGIN
Normalizer2::~Normalizer2() {} Normalizer2::~Normalizer2() {}
void
Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr) {
errorCode = U_UNSUPPORTED_ERROR;
return;
}
UnicodeString src16 = UnicodeString::fromUTF8(src);
normalize(src16, errorCode).toUTF8(sink);
}
UBool UBool
Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
return FALSE; return FALSE;
@ -57,6 +73,11 @@ Normalizer2::getCombiningClass(UChar32 /*c*/) const {
return 0; return 0;
} }
UBool
Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const {
return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode);
}
// Normalizer2 implementation for the old UNORM_NONE. // Normalizer2 implementation for the old UNORM_NONE.
class NoopNormalizer2 : public Normalizer2 { class NoopNormalizer2 : public Normalizer2 {
virtual ~NoopNormalizer2(); virtual ~NoopNormalizer2();
@ -64,7 +85,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString & virtual UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&dest!=&src) { if(&dest!=&src) {
dest=src; dest=src;
@ -74,10 +95,27 @@ class NoopNormalizer2 : public Normalizer2 {
} }
return dest; return dest;
} }
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if (edits != nullptr) {
if ((options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
edits->addUnchanged(src.length());
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src.data(), src.length());
}
sink.Flush();
}
}
virtual UnicodeString & virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&first!=&second) { if(&first!=&second) {
first.append(second); first.append(second);
@ -90,7 +128,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString & virtual UnicodeString &
append(UnicodeString &first, append(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const { UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) { if(U_SUCCESS(errorCode)) {
if(&first!=&second) { if(&first!=&second) {
first.append(second); first.append(second);
@ -101,25 +139,29 @@ class NoopNormalizer2 : public Normalizer2 {
return first; return first;
} }
virtual UBool virtual UBool
getDecomposition(UChar32, UnicodeString &) const { getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
return FALSE; return FALSE;
} }
// No need to override the default getRawDecomposition(). // No need to U_OVERRIDE the default getRawDecomposition().
virtual UBool virtual UBool
isNormalized(const UnicodeString &, UErrorCode &) const { isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
return TRUE; return U_SUCCESS(errorCode);
}
virtual UBool
isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
return U_SUCCESS(errorCode);
} }
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &, UErrorCode &) const { quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
return UNORM_YES; return UNORM_YES;
} }
virtual int32_t virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
return s.length(); return s.length();
} }
virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool isInert(UChar32) const { return TRUE; } virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
}; };
NoopNormalizer2::~NoopNormalizer2() {} NoopNormalizer2::~NoopNormalizer2() {}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -35,6 +35,11 @@ U_NAMESPACE_BEGIN
struct CanonIterData; struct CanonIterData;
class ByteSink;
class Edits;
class InitCanonIterData;
class LcccContext;
class U_COMMON_API Hangul { class U_COMMON_API Hangul {
public: public:
/* Korean Hangul and Jamo constants */ /* Korean Hangul and Jamo constants */
@ -63,9 +68,9 @@ public:
return HANGUL_BASE<=c && c<HANGUL_LIMIT; return HANGUL_BASE<=c && c<HANGUL_LIMIT;
} }
static inline UBool static inline UBool
isHangulWithoutJamoT(UChar c) { isHangulLV(UChar32 c) {
c-=HANGUL_BASE; c-=HANGUL_BASE;
return c<HANGUL_COUNT && c%JAMO_T_COUNT==0; return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
} }
static inline UBool isJamoL(UChar32 c) { static inline UBool isJamoL(UChar32 c) {
return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT; return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
@ -73,6 +78,14 @@ public:
static inline UBool isJamoV(UChar32 c) { static inline UBool isJamoV(UChar32 c) {
return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT; return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
} }
static inline UBool isJamoT(UChar32 c) {
int32_t t=c-JAMO_T_BASE;
return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself
}
static UBool isJamo(UChar32 c) {
return JAMO_L_BASE<=c && c<=JAMO_T_END &&
(c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
}
/** /**
* Decomposes c, which must be a Hangul syllable, into buffer * Decomposes c, which must be a Hangul syllable, into buffer
@ -117,10 +130,13 @@ class Normalizer2Impl;
class U_COMMON_API ReorderingBuffer : public UMemory { class U_COMMON_API ReorderingBuffer : public UMemory {
public: public:
/** Constructs only; init() should be called. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) : ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
impl(ni), str(dest), impl(ni), str(dest),
start(NULL), reorderStart(NULL), limit(NULL), start(NULL), reorderStart(NULL), limit(NULL),
remainingCapacity(0), lastCC(0) {} remainingCapacity(0), lastCC(0) {}
/** Constructs, removes the string contents, and initializes for a small initial capacity. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
~ReorderingBuffer() { ~ReorderingBuffer() {
if(start!=NULL) { if(start!=NULL) {
str.releaseBuffer((int32_t)(limit-start)); str.releaseBuffer((int32_t)(limit-start));
@ -135,11 +151,7 @@ public:
uint8_t getLastCC() const { return lastCC; } uint8_t getLastCC() const { return lastCC; }
UBool equals(const UChar *start, const UChar *limit) const; UBool equals(const UChar *start, const UChar *limit) const;
UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;
// For Hangul composition, replacing the Leading consonant Jamo with the syllable.
void setLastChar(UChar c) {
*(limit-1)=c;
}
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) { UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ? return (c<=0xffff) ?
@ -218,6 +230,12 @@ private:
UChar *codePointStart, *codePointLimit; UChar *codePointStart, *codePointLimit;
}; };
/**
* Low-level implementation of the Unicode Normalization Algorithm.
* For the data structure and details see the documentation at the end of
* this normalizer2impl.h and in the design doc at
* http://site.icu-project.org/design/normalization/custom
*/
class U_COMMON_API Normalizer2Impl : public UObject { class U_COMMON_API Normalizer2Impl : public UObject {
public: public:
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
@ -234,8 +252,6 @@ public:
// low-level properties ------------------------------------------------ *** // low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
UBool ensureCanonIterData(UErrorCode &errorCode) const; UBool ensureCanonIterData(UErrorCode &errorCode) const;
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); } uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
@ -255,15 +271,22 @@ public:
uint8_t getCC(uint16_t norm16) const { uint8_t getCC(uint16_t norm16) const {
if(norm16>=MIN_NORMAL_MAYBE_YES) { if(norm16>=MIN_NORMAL_MAYBE_YES) {
return (uint8_t)norm16; return getCCFromNormalYesOrMaybe(norm16);
} }
if(norm16<minNoNo || limitNoNo<=norm16) { if(norm16<minNoNo || limitNoNo<=norm16) {
return 0; return 0;
} }
return getCCFromNoNo(norm16); return getCCFromNoNo(norm16);
} }
static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
return (uint8_t)(norm16 >> OFFSET_SHIFT);
}
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) { static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0; return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
}
uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
if (c < minCompNoMaybeCP) { return 0; }
return getCCFromYesOrMaybe(getNorm16(c));
} }
/** /**
@ -272,10 +295,8 @@ public:
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/ */
uint16_t getFCD16(UChar32 c) const { uint16_t getFCD16(UChar32 c) const {
if(c<0) { if(c<minDecompNoCP) {
return 0; return 0;
} else if(c<0x180) {
return tccc180[c];
} else if(c<=0xffff) { } else if(c<=0xffff) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
} }
@ -291,9 +312,7 @@ public:
*/ */
uint16_t nextFCD16(const UChar *&s, const UChar *limit) const { uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
UChar32 c=*s++; UChar32 c=*s++;
if(c<0x180) { if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
return tccc180[c];
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0; return 0;
} }
UChar c2; UChar c2;
@ -311,8 +330,8 @@ public:
*/ */
uint16_t previousFCD16(const UChar *start, const UChar *&s) const { uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
UChar32 c=*--s; UChar32 c=*--s;
if(c<0x180) { if(c<minDecompNoCP) {
return tccc180[c]; return 0;
} }
if(!U16_IS_TRAIL(c)) { if(!U16_IS_TRAIL(c)) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { if(!singleLeadMightHaveNonZeroFCD16(c)) {
@ -328,8 +347,6 @@ public:
return getFCD16FromNormData(c); return getFCD16FromNormData(c);
} }
/** Returns the FCD data for U+0000<=c<U+0180. */
uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
/** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */ /** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const { UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff // 0<=lead<=0xffff
@ -340,9 +357,6 @@ public:
/** Returns the FCD value from the regular normalization data. */ /** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const; uint16_t getFCD16FromNormData(UChar32 c) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
/** /**
* Gets the decomposition for one code point. * Gets the decomposition for one code point.
* @param c code point * @param c code point
@ -367,14 +381,25 @@ public:
UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const; UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
enum { enum {
MIN_CCC_LCCC_CP=0x300 // Fixed norm16 values.
}; MIN_YES_YES_WITH_CC=0xfe02,
JAMO_VT=0xfe00,
MIN_NORMAL_MAYBE_YES=0xfc00,
JAMO_L=2, // offset=1 hasCompBoundaryAfter=FALSE
INERT=1, // offset=0 hasCompBoundaryAfter=TRUE
// norm16 bit 0 is comp-boundary-after.
HAS_COMP_BOUNDARY_AFTER=1,
OFFSET_SHIFT=1,
// For algorithmic one-way mappings, norm16 bits 2..1 indicate the
// tccc (0, 1, >1) for quick FCC boundary-after tests.
DELTA_TCCC_0=0,
DELTA_TCCC_1=2,
DELTA_TCCC_GT_1=4,
DELTA_TCCC_MASK=6,
DELTA_SHIFT=3,
enum {
MIN_YES_YES_WITH_CC=0xff01,
JAMO_VT=0xff00,
MIN_NORMAL_MAYBE_YES=0xfe00,
JAMO_L=1,
MAX_DELTA=0x40 MAX_DELTA=0x40
}; };
@ -394,21 +419,32 @@ public:
IX_MIN_COMP_NO_MAYBE_CP, IX_MIN_COMP_NO_MAYBE_CP,
// Norm16 value thresholds for quick check combinations and types of extra data. // Norm16 value thresholds for quick check combinations and types of extra data.
IX_MIN_YES_NO, // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
/** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
IX_MIN_YES_NO,
/** Mappings are comp-normalized. */
IX_MIN_NO_NO, IX_MIN_NO_NO,
IX_LIMIT_NO_NO, IX_LIMIT_NO_NO,
IX_MIN_MAYBE_YES, IX_MIN_MAYBE_YES,
IX_MIN_YES_NO_MAPPINGS_ONLY, // Mappings only in [minYesNoMappingsOnly..minNoNo[. /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
IX_MIN_YES_NO_MAPPINGS_ONLY,
/** Mappings are not comp-normalized but have a comp boundary before. */
IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
/** Mappings do not have a comp boundary before. */
IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
/** Mappings to the empty string. */
IX_MIN_NO_NO_EMPTY,
IX_RESERVED15, IX_MIN_LCCC_CP,
IX_RESERVED19,
IX_COUNT IX_COUNT
}; };
enum { enum {
MAPPING_HAS_CCC_LCCC_WORD=0x80, MAPPING_HAS_CCC_LCCC_WORD=0x80,
MAPPING_HAS_RAW_MAPPING=0x40, MAPPING_HAS_RAW_MAPPING=0x40,
MAPPING_NO_COMP_BOUNDARY_AFTER=0x20, // unused bit 0x20,
MAPPING_LENGTH_MASK=0x1f MAPPING_LENGTH_MASK=0x1f
}; };
@ -457,6 +493,12 @@ public:
UnicodeString &safeMiddle, UnicodeString &safeMiddle,
ReorderingBuffer &buffer, ReorderingBuffer &buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
/** sink==nullptr: isNormalized() */
UBool composeUTF8(uint32_t options, UBool onlyContiguous,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
const UChar *makeFCD(const UChar *src, const UChar *limit, const UChar *makeFCD(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const; ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const UChar *src, const UChar *limit, void makeFCDAndAppend(const UChar *src, const UChar *limit,
@ -465,27 +507,42 @@ public:
ReorderingBuffer &buffer, ReorderingBuffer &buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
UBool hasDecompBoundary(UChar32 c, UBool before) const; UBool hasDecompBoundaryBefore(UChar32 c) const;
UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
UBool hasDecompBoundaryAfter(UChar32 c) const;
UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
UBool hasCompBoundaryBefore(UChar32 c) const { UBool hasCompBoundaryBefore(UChar32 c) const {
return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c)); return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
}
UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
uint16_t norm16=getNorm16(c);
return isCompYesAndZeroCC(norm16) &&
(norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
} }
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; } UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
UBool hasFCDBoundaryAfter(UChar32 c) const { UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
uint16_t fcd16=getFCD16(c);
return fcd16<=1 || (fcd16&0xff)==0;
}
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; } UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private: private:
friend class InitCanonIterData;
friend class LcccContext;
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; } UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==0; } static UBool isInert(uint16_t norm16) { return norm16==INERT; }
static UBool isJamoL(uint16_t norm16) { return norm16==1; } static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; } uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
UBool isHangulLVT(uint16_t norm16) const {
return norm16==hangulLVT();
}
UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; } UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
// UBool isCompYes(uint16_t norm16) const { // UBool isCompYes(uint16_t norm16) const {
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
@ -504,7 +561,7 @@ private:
/** /**
* A little faster and simpler than isDecompYesAndZeroCC() but does not include * A little faster and simpler than isDecompYesAndZeroCC() but does not include
* the MaybeYes which combine-forward and have ccc=0. * the MaybeYes which combine-forward and have ccc=0.
* (Standard Unicode 5.2 normalization does not have such characters.) * (Standard Unicode 10 normalization does not have such characters.)
*/ */
UBool isMostDecompYesAndZeroCC(uint16_t norm16) const { UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
@ -514,7 +571,7 @@ private:
// For use with isCompYes(). // For use with isCompYes().
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
// static uint8_t getCCFromYes(uint16_t norm16) { // static uint8_t getCCFromYes(uint16_t norm16) {
// return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0; // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
// } // }
uint8_t getCCFromNoNo(uint16_t norm16) const { uint8_t getCCFromNoNo(uint16_t norm16) const {
const uint16_t *mapping=getMapping(norm16); const uint16_t *mapping=getMapping(norm16);
@ -525,30 +582,47 @@ private:
} }
} }
// requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const; uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
if(norm16<=minYesNo) {
return 0; // yesYes and Hangul LV have ccc=tccc=0
} else {
// For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
}
}
uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;
// Requires algorithmic-NoNo. // Requires algorithmic-NoNo.
UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const { UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
return c+norm16-(minMaybeYes-MAX_DELTA-1); return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
}
UChar32 getAlgorithmicDelta(uint16_t norm16) const {
return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
} }
// Requires minYesNo<norm16<limitNoNo. // Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; } const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const { const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) { if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL; return NULL;
} else if(norm16<minMaybeYes) { } else if(norm16<minMaybeYes) {
return extraData+norm16; // for yesYes; if Jamo L: harmless empty list return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
} else { } else {
return maybeYesCompositions+norm16-minMaybeYes; return maybeYesCompositions+norm16-minMaybeYes;
} }
} }
const uint16_t *getCompositionsListForComposite(uint16_t norm16) const { const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
const uint16_t *list=extraData+norm16; // composite has both mapping & compositions list // A composite has both mapping & compositions list.
const uint16_t *list=getMapping(norm16);
return list+ // mapping pointer return list+ // mapping pointer
1+ // +1 to skip the first unit with the mapping lenth 1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length (*list&MAPPING_LENGTH_MASK); // + mapping length
} }
const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
// minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
}
/** /**
* @param c code point must have compositions * @param c code point must have compositions
* @return compositions list pointer * @return compositions list pointer
@ -563,46 +637,78 @@ private:
UChar32 minNeedDataCP, UChar32 minNeedDataCP,
ReorderingBuffer *buffer, ReorderingBuffer *buffer,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
UBool decomposeShort(const UChar *src, const UChar *limit, const UChar *decomposeShort(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const; UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
UBool decompose(UChar32 c, uint16_t norm16, UBool decompose(UChar32 c, uint16_t norm16,
ReorderingBuffer &buffer, UErrorCode &errorCode) const; ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail); static int32_t combine(const uint16_t *list, UChar32 trail);
void addComposites(const uint16_t *list, UnicodeSet &set) const; void addComposites(const uint16_t *list, UnicodeSet &set) const;
void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
UBool onlyContiguous) const; UBool onlyContiguous) const;
UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const; UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const; return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const; }
UBool norm16HasCompBoundaryBefore(uint16_t norm16) const {
return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
}
UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
UBool onlyContiguous) const;
UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
UBool onlyContiguous) const;
UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
}
/** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
}
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const; const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const; const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
int32_t getCanonValue(UChar32 c) const; int32_t getCanonValue(UChar32 c) const;
const UnicodeSet &getCanonStartSet(int32_t n) const; const UnicodeSet &getCanonStartSet(int32_t n) const;
// UVersionInfo dataVersion; // UVersionInfo dataVersion;
// Code point thresholds for quick check codes. // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
UChar32 minDecompNoCP; UChar minDecompNoCP;
UChar32 minCompNoMaybeCP; UChar minCompNoMaybeCP;
UChar minLcccCP;
// Norm16 value thresholds for quick check combinations and types of extra data. // Norm16 value thresholds for quick check combinations and types of extra data.
uint16_t minYesNo; uint16_t minYesNo;
uint16_t minYesNoMappingsOnly; uint16_t minYesNoMappingsOnly;
uint16_t minNoNo; uint16_t minNoNo;
uint16_t minNoNoCompBoundaryBefore;
uint16_t minNoNoCompNoMaybeCC;
uint16_t minNoNoEmpty;
uint16_t limitNoNo; uint16_t limitNoNo;
uint16_t centerNoNoDelta;
uint16_t minMaybeYes; uint16_t minMaybeYes;
const UTrie2 *normTrie; const UTrie2 *normTrie;
const uint16_t *maybeYesCompositions; const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
public: // CanonIterData is public to allow access from C callback functions.
UInitOnce fCanonIterDataInitOnce; UInitOnce fCanonIterDataInitOnce;
CanonIterData *fCanonIterData; CanonIterData *fCanonIterData;
}; };
@ -658,13 +764,14 @@ unorm_getFCD16(UChar32 c);
/** /**
* Format of Normalizer2 .nrm data files. * Format of Normalizer2 .nrm data files.
* Format version 2.0. * Format version 3.0.
* *
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms. * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms * ICU ships with data files for standard Unicode Normalization Forms
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm). * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
* Custom (application-specific) data can be built into additional .nrm files * Custom (application-specific) data can be built into additional .nrm files
* with the gennorm2 build tool. * with the gennorm2 build tool.
* ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
* *
* Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
* cached already. Internally, Normalizer2Impl.load() reads the .nrm file. * cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
@ -695,14 +802,20 @@ unorm_getFCD16(UChar32 c);
* with a decomposition mapping, that is, with NF*D_QC=No. * with a decomposition mapping, that is, with NF*D_QC=No.
* minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point * minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
* with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward). * with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
* minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
* is the lowest code point with lccc!=0.
* *
* The next five indexes are thresholds of 16-bit trie values for ranges of * The next eight indexes are thresholds of 16-bit trie values for ranges of
* values indicating multiple normalization properties. * values indicating multiple normalization properties.
* They are listed here in threshold order, not in the order they are stored in the indexes.
* minYesNo=indexes[IX_MIN_YES_NO]; * minYesNo=indexes[IX_MIN_YES_NO];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* minNoNo=indexes[IX_MIN_NO_NO]; * minNoNo=indexes[IX_MIN_NO_NO];
* minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
* minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
* minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
* limitNoNo=indexes[IX_LIMIT_NO_NO]; * limitNoNo=indexes[IX_LIMIT_NO_NO];
* minMaybeYes=indexes[IX_MIN_MAYBE_YES]; * minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* See the normTrie description below and the design doc for details. * See the normTrie description below and the design doc for details.
* *
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h * UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
@ -710,12 +823,14 @@ unorm_getFCD16(UChar32 c);
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value. * The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits), * Rather than using independent bits in the value (which would require more than 16 bits),
* information is extracted primarily via range checks. * information is extracted primarily via range checks.
* Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
* For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo * For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
* means that the character has NF*C_QC=Yes and NF*D_QC=No properties, * means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
* which means it has a two-way (round-trip) decomposition mapping. * which means it has a two-way (round-trip) decomposition mapping.
* Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData * Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
* pointing to mappings, compositions lists, or both. * pointing to mappings, compositions lists, or both.
* Value norm16==0 means that the character is normalization-inert, that is, * Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
* means that the character is normalization-inert, that is,
* it does not have a mapping, does not participate in composition, has a zero * it does not have a mapping, does not participate in composition, has a zero
* canonical combining class, and forms a boundary where text before it and after it * canonical combining class, and forms a boundary where text before it and after it
* can be normalized independently. * can be normalized independently.
@ -729,7 +844,7 @@ unorm_getFCD16(UChar32 c);
* The trie has a value for each lead surrogate code unit representing the "worst case" * The trie has a value for each lead surrogate code unit representing the "worst case"
* properties of the 1024 supplementary characters whose UTF-16 form starts with * properties of the 1024 supplementary characters whose UTF-16 form starts with
* the lead surrogate. If all of the 1024 supplementary characters are normalization-inert, * the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
* then their lead surrogate code unit has the trie value 0. * then their lead surrogate code unit has the trie value INERT.
* When the lead surrogate unit's value exceeds the quick check minimum during processing, * When the lead surrogate unit's value exceeds the quick check minimum during processing,
* the properties for the full supplementary code point need to be looked up. * the properties for the full supplementary code point need to be looked up.
* *
@ -738,6 +853,7 @@ unorm_getFCD16(UChar32 c);
* *
* There is only one byte offset for the end of these two arrays. * There is only one byte offset for the end of these two arrays.
* The split between them is given by the constant and variable mentioned above. * The split between them is given by the constant and variable mentioned above.
* In version 3, the difference must be shifted right by OFFSET_SHIFT.
* *
* The maybeYesCompositions array contains compositions lists for characters that * The maybeYesCompositions array contains compositions lists for characters that
* combine both forward (as starters in composition pairs) * combine both forward (as starters in composition pairs)
@ -754,6 +870,8 @@ unorm_getFCD16(UChar32 c);
* followed by only mappings for "NoNo" characters. * followed by only mappings for "NoNo" characters.
* (Referring to pairs of NFC/NFD quick check values.) * (Referring to pairs of NFC/NFD quick check values.)
* The norm16 values of those characters are directly indexes into the extraData array. * The norm16 values of those characters are directly indexes into the extraData array.
* In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
* for accessing extraData.
* *
* The data structures for compositions lists and mappings are described in the design doc. * The data structures for compositions lists and mappings are described in the design doc.
* *
@ -784,6 +902,50 @@ unorm_getFCD16(UChar32 c);
* This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag. * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
* It is needed for the new (in ICU 49) composePair(), not for other normalization. * It is needed for the new (in ICU 49) composePair(), not for other normalization.
* - Addition of the smallFCD[] bit set. * - Addition of the smallFCD[] bit set.
*
* Changes from format version 2 to format version 3 (ICU 60) ------------------
*
* - norm16 bit 0 indicates hasCompBoundaryAfter(),
* except that for contiguous composition (FCC) the tccc must be checked as well.
* Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
* Thresholds like minNoNo are tested before shifting.
*
* - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
* to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
* See DELTA_TCCC_MASK etc.
* This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
* minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
*
* - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
* and ASCII characters are mapped algorithmically only to other ASCII characters.
* This helps with hasCompBoundaryBefore() and compose() fast paths.
* It is never necessary any more to loop for algorithmic mappings.
*
* - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
* indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
* and separation of the noNo extraData into distinct ranges.
* With this, the noNo norm16 value indicates whether the mapping is
* compose-normalized, not normalized but hasCompBoundaryBefore(),
* not even that, or maps to an empty string.
* hasCompBoundaryBefore() can be determined solely from the norm16 value.
*
* - The norm16 value for Hangul LVT is now different from that for Hangul LV,
* so that hasCompBoundaryAfter() need not check for the syllable type.
* For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
* For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
* The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
* to simplify some code.
*
* - The extraData firstUnit bit 5 is no longer necessary
* (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
* is reserved again, and always set to 0.
*
* - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
* This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
* U+00AD Soft Hyphen maps to an empty string,
* which is artificially assigned "worst case" values lccc=1 and tccc=255.
*
* - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
*/ */
#endif /* !UCONFIG_NO_NORMALIZATION */ #endif /* !UCONFIG_NO_NORMALIZATION */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -676,8 +676,8 @@ extern U_IMPORT char *U_TZNAME[];
#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
/* These platforms are likely to use Olson timezone IDs. */ /* These platforms are likely to use Olson timezone IDs. */
/* common targets of the symbolic link at TZDEFAULT are: /* common targets of the symbolic link at TZDEFAULT are:
* "/usr/share/zoneinfo/<olsonID>" default, older Linus distros, macOS to 10.12 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
* "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu, SuSe Linux * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
* "/usr/share/lib/zoneinfo/<olsonID>" Solaris * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
* "../usr/share/lib/zoneinfo/<olsonID>" Solaris * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
* "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
@ -949,30 +949,30 @@ static CharString *gSearchTZFileResult = NULL;
* This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
*/ */
static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
DIR* dirp = opendir(path); DIR* dirp = NULL;
DIR* subDirp = NULL;
struct dirent* dirEntry = NULL; struct dirent* dirEntry = NULL;
char* result = NULL; char* result = NULL;
UErrorCode status = U_ZERO_ERROR;
/* Save the current path */
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
goto cleanupAndReturn;
}
dirp = opendir(path);
if (dirp == NULL) { if (dirp == NULL) {
return result; goto cleanupAndReturn;
} }
if (gSearchTZFileResult == NULL) { if (gSearchTZFileResult == NULL) {
gSearchTZFileResult = new CharString; gSearchTZFileResult = new CharString;
if (gSearchTZFileResult == NULL) { if (gSearchTZFileResult == NULL) {
return NULL; goto cleanupAndReturn;
} }
ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
} }
/* Save the current path */
UErrorCode status = U_ZERO_ERROR;
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
return NULL;
}
/* Check each entry in the directory. */ /* Check each entry in the directory. */
while((dirEntry = readdir(dirp)) != NULL) { while((dirEntry = readdir(dirp)) != NULL) {
const char* dirName = dirEntry->d_name; const char* dirName = dirEntry->d_name;
@ -981,15 +981,16 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
CharString newpath(curpath, status); CharString newpath(curpath, status);
newpath.append(dirName, -1, status); newpath.append(dirName, -1, status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
DIR* subDirp = NULL;
if ((subDirp = opendir(newpath.data())) != NULL) { if ((subDirp = opendir(newpath.data())) != NULL) {
/* If this new path is a directory, make a recursive call with the newpath. */ /* If this new path is a directory, make a recursive call with the newpath. */
closedir(subDirp); closedir(subDirp);
newpath.append('/', status); newpath.append('/', status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
result = searchForTZFile(newpath.data(), tzInfo); result = searchForTZFile(newpath.data(), tzInfo);
/* /*
@ -1013,7 +1014,7 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
gSearchTZFileResult->clear(); gSearchTZFileResult->clear();
gSearchTZFileResult->append(zoneid, -1, status); gSearchTZFileResult->append(zoneid, -1, status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return NULL; break;
} }
result = gSearchTZFileResult->data(); result = gSearchTZFileResult->data();
/* Get out after the first one found. */ /* Get out after the first one found. */
@ -1022,7 +1023,11 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
} }
} }
} }
closedir(dirp);
cleanupAndReturn:
if (dirp) {
closedir(dirp);
}
return result; return result;
} }
#endif #endif
@ -1055,7 +1060,7 @@ uprv_getWindowsTimeZone()
hr = timezone->GetTimeZone(timezoneString.GetAddressOf()); hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
if (SUCCEEDED(hr)) if (SUCCEEDED(hr))
{ {
int32_t length = wcslen(timezoneString.GetRawBuffer(NULL)); int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char)); char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
if (asciiId != nullptr) if (asciiId != nullptr)
{ {
@ -1074,6 +1079,7 @@ uprv_getWindowsTimeZone()
U_CAPI const char* U_EXPORT2 U_CAPI const char* U_EXPORT2
uprv_tzname(int n) uprv_tzname(int n)
{ {
(void)n; // Avoid unreferenced parameter warning.
const char *tzid = NULL; const char *tzid = NULL;
#if U_PLATFORM_USES_ONLY_WIN32_API #if U_PLATFORM_USES_ONLY_WIN32_API
#if U_PLATFORM_HAS_WINUWP_API > 0 #if U_PLATFORM_HAS_WINUWP_API > 0
@ -1229,7 +1235,7 @@ UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
static CharString *gTimeZoneFilesDirectory = NULL; static CharString *gTimeZoneFilesDirectory = NULL;
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
static char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
static bool gCorrectedPOSIXLocaleHeapAllocated = false; static bool gCorrectedPOSIXLocaleHeapAllocated = false;
#endif #endif
@ -1252,7 +1258,7 @@ static UBool U_CALLCONV putil_cleanup(void)
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
uprv_free(gCorrectedPOSIXLocale); uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
gCorrectedPOSIXLocale = NULL; gCorrectedPOSIXLocale = NULL;
gCorrectedPOSIXLocaleHeapAllocated = false; gCorrectedPOSIXLocaleHeapAllocated = false;
} }
@ -1288,7 +1294,7 @@ u_setDataDirectory(const char *directory) {
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
{ {
char *p; char *p;
while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
} }
@ -1446,7 +1452,7 @@ static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
gTimeZoneFilesDirectory->append(path, status); gTimeZoneFilesDirectory->append(path, status);
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
char *p = gTimeZoneFilesDirectory->data(); char *p = gTimeZoneFilesDirectory->data();
while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) { while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
#endif #endif
@ -1810,6 +1816,8 @@ The leftmost codepage (.xxx) wins.
} }
// Now normalize the resulting name // Now normalize the resulting name
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
/* TODO: Should we just exit on memory allocation failure? */
if (correctedPOSIXLocale) if (correctedPOSIXLocale)
{ {
int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
@ -2327,19 +2335,16 @@ u_getVersion(UVersionInfo versionArray) {
* icucfg.h dependent code * icucfg.h dependent code
*/ */
#if U_ENABLE_DYLOAD #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if HAVE_DLFCN_H #if HAVE_DLFCN_H
#ifdef __MVS__ #ifdef __MVS__
#ifndef __SUSV3 #ifndef __SUSV3
#define __SUSV3 1 #define __SUSV3 1
#endif #endif
#endif #endif
#include <dlfcn.h> #include <dlfcn.h>
#endif #endif /* HAVE_DLFCN_H */
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2379,38 +2384,10 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return uret.fp; return uret.fp;
} }
#else #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
/* null (nonexistent) implementation. */ /* Windows API implementation. */
// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
if(U_FAILURE(*status)) return NULL;
*status = U_UNSUPPORTED_ERROR;
return NULL;
}
U_INTERNAL void U_EXPORT2
uprv_dl_close(void *lib, UErrorCode *status) {
if(U_FAILURE(*status)) return;
*status = U_UNSUPPORTED_ERROR;
return;
}
U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
if(U_SUCCESS(*status)) {
*status = U_UNSUPPORTED_ERROR;
}
return (UVoidFunction*)NULL;
}
#endif
#elif U_PLATFORM_USES_ONLY_WIN32_API
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2437,7 +2414,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return; return;
} }
U_INTERNAL UVoidFunction* U_EXPORT2 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
HMODULE handle = (HMODULE)lib; HMODULE handle = (HMODULE)lib;
@ -2459,10 +2435,9 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return addr; return addr;
} }
#else #else
/* No dynamic loading set. */ /* No dynamic loading, null (nonexistent) implementation. */
U_INTERNAL void * U_EXPORT2 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) { uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2480,7 +2455,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return; return;
} }
U_INTERNAL UVoidFunction* U_EXPORT2 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
(void)lib; (void)lib;
@ -2491,7 +2465,7 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return (UVoidFunction*)NULL; return (UVoidFunction*)NULL;
} }
#endif /* U_ENABLE_DYLOAD */ #endif
/* /*
* Hey, Emacs, please set the following: * Hey, Emacs, please set the following:

Просмотреть файл

@ -72,15 +72,6 @@
typedef size_t uintptr_t; typedef size_t uintptr_t;
#endif #endif
/**
* \def U_HAVE_MSVC_2003_OR_EARLIER
* Flag for workaround of MSVC 2003 optimization bugs
* @internal
*/
#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
#define U_HAVE_MSVC_2003_OR_EARLIER
#endif
/*===========================================================================*/ /*===========================================================================*/
/** @{ Information about POSIX support */ /** @{ Information about POSIX support */
/*===========================================================================*/ /*===========================================================================*/
@ -120,15 +111,15 @@ typedef size_t uintptr_t;
/* Use the predefined value. */ /* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID #elif U_PLATFORM == U_PF_ANDROID
# define U_TIMEZONE timezone # define U_TIMEZONE timezone
#elif defined(__UCLIBC__)
// uClibc does not have __timezone or _timezone.
#elif defined(_NEWLIB_VERSION)
# define U_TIMEZONE _timezone
#elif defined(__GLIBC__)
// glibc
# define U_TIMEZONE __timezone
#elif U_PLATFORM_IS_LINUX_BASED #elif U_PLATFORM_IS_LINUX_BASED
# if defined(__UCLIBC__) // not defined
/* uClibc does not have __timezone or _timezone. */
# elif defined(_NEWLIB_VERSION)
# define U_TIMEZONE _timezone
# elif defined(__GLIBC__)
/* glibc */
# define U_TIMEZONE __timezone
# endif
#elif U_PLATFORM_USES_ONLY_WIN32_API #elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TIMEZONE _timezone # define U_TIMEZONE _timezone
#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__) #elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
@ -214,7 +205,7 @@ typedef size_t uintptr_t;
/** /**
* \def U_HAVE_STD_ATOMICS * \def U_HAVE_STD_ATOMICS
* Defines whether the standard C++11 <atomic> is available. * Defines whether the standard C++11 <atomic> is available.
* ICU will use this when avialable, * ICU will use this when available,
* otherwise will fall back to compiler or platform specific alternatives. * otherwise will fall back to compiler or platform specific alternatives.
* @internal * @internal
*/ */
@ -239,7 +230,7 @@ typedef size_t uintptr_t;
/** /**
* \def U_HAVE_CLANG_ATOMICS * \def U_HAVE_CLANG_ATOMICS
* Defines whether Clang c11 style built-in atomics are avaialable. * Defines whether Clang c11 style built-in atomics are available.
* These are used in preference to gcc atomics when both are available. * These are used in preference to gcc atomics when both are available.
*/ */
#ifdef U_HAVE_CLANG_ATOMICS #ifdef U_HAVE_CLANG_ATOMICS
@ -277,7 +268,7 @@ typedef size_t uintptr_t;
/** /**
* Platform utilities isolates the platform dependencies of the * Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these * library. For each platform which this code is ported to, these
* functions may have to be re-implemented. * functions may have to be re-implemented.
*/ */
@ -425,7 +416,7 @@ U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
/** /**
* Please use uloc_getDefault() instead. * Please use uloc_getDefault() instead.
* Return the default locale ID string by querying ths system, or * Return the default locale ID string by querying the system, or
* zero if one cannot be found. * zero if one cannot be found.
* This function can call setlocale() on Unix platforms. Please read the * This function can call setlocale() on Unix platforms. Please read the
* platform documentation on setlocale() before calling this function. * platform documentation on setlocale() before calling this function.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,630 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.cpp
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/ubrk.h"
#include "unicode/rbbi.h"
#include "rbbi_cache.h"
#include "brkeng.h"
#include "cmemory.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "uassert.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/*
* DictionaryCache implementation
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fBreaks(NULL), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
delete fBreaks;
fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
fPositionInCache = -1;
fStart = 0;
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
fBreaks->removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos >= fLimit || fromPos < fStart) {
fPositionInCache = -1;
return FALSE;
}
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
if (fPositionInCache >= fBreaks->size()) {
fPositionInCache = -1;
return FALSE;
}
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
// Random indexing. Linear search for the boundary following the given position.
for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
r= fBreaks->elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos <= fStart || fromPos > fLimit) {
fPositionInCache = -1;
return FALSE;
}
if (fromPos == fLimit) {
fPositionInCache = fBreaks->size() - 1;
if (fPositionInCache >= 0) {
U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
if (fPositionInCache == 0) {
fPositionInCache = -1;
return FALSE;
}
for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks->elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus) {
if ((endPos - startPos) <= 1) {
return;
}
reset();
fFirstRuleStatusIndex = firstRuleStatus;
fOtherRuleStatusIndex = otherRuleStatus;
int32_t rangeStart = startPos;
int32_t rangeEnd = endPos;
uint16_t category;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
int32_t foundBreakCount = 0;
UText *text = fBI->fText;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
// any breaks within the span.
utext_setNativeIndex(text, rangeStart);
UChar32 c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
while(U_SUCCESS(status)) {
while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
utext_next32(text); // TODO: cleaner loop structure.
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
if (current >= rangeEnd) {
break;
}
// We now have a dictionary character. Get the appropriate language object
// to deal with it.
const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
}
// Reload the loop variables for the next go-round
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
// If we found breaks, ensure that the first and last entries are
// the original starting and ending position. And initialize the
// cache iteration position to the first entry.
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
U_ASSERT(foundBreakCount == fBreaks->size());
if (startPos < fBreaks->elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
fBreaks->insertElementAt(startPos, 0, status);
}
if (endPos > fBreaks->peeki()) {
fBreaks->push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
fStart = fBreaks->elementAti(0);
fLimit = fBreaks->peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
// for this range will fail, and the calling code will fall back to the rule based boundaries.
}
}
/*
* BreakCache implemetation
*/
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fSideBuffer(status) {
reset();
}
RuleBasedBreakIterator::BreakCache::~BreakCache() {
}
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
fStartBufIdx = 0;
fEndBufIdx = 0;
fTextIdx = pos;
fBufIdx = 0;
fBoundaries[0] = pos;
fStatuses[0] = (uint16_t)ruleStatus;
}
int32_t RuleBasedBreakIterator::BreakCache::current() {
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
fBI->fDone = FALSE;
return fTextIdx;
}
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
// startPos is in the cache. Do a next() from that position.
// TODO: an awkward set of interactions with bi->fDone
// seek() does not clear it; it can't because of interactions with populateNear().
// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
fBI->fDone = false;
next();
}
return;
}
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
if (startPos == fTextIdx) {
previous(status);
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
// if the requested position is between two bounaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
U_ASSERT(startPos > fTextIdx);
current();
}
}
return;
}
/*
* Out-of-line code for BreakCache::next().
* Cache does not already contain the boundary
*/
void RuleBasedBreakIterator::BreakCache::nextOL() {
fBI->fDone = !populateFollowing();
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t initialBufIdx = fBufIdx;
if (fBufIdx == fStartBufIdx) {
// At start of cache. Prepend to it.
populatePreceding(status);
} else {
// Cache already holds the next boundary
fBufIdx = modChunkSize(fBufIdx - 1);
fTextIdx = fBoundaries[fBufIdx];
}
fBI->fDone = (fBufIdx == initialBufIdx);
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
return FALSE;
}
if (pos == fBoundaries[fStartBufIdx]) {
// Common case: seek(0), from BreakIterator::first()
fBufIdx = fStartBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
if (pos == fBoundaries[fEndBufIdx]) {
fBufIdx = fEndBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
int32_t min = fStartBufIdx;
int32_t max = fEndBufIdx;
while (min != max) {
int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
probe = modChunkSize(probe);
if (fBoundaries[probe] > pos) {
max = probe;
} else {
min = modChunkSize(probe + 1);
}
}
U_ASSERT(fBoundaries[max] > pos);
fBufIdx = modChunkSize(max - 1);
fTextIdx = fBoundaries[fBufIdx];
U_ASSERT(fTextIdx <= pos);
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
// Find a boundary somewhere in the vicinity of the requested position.
// Depending on the safe rules and the text data, it could be either before, at, or after
// the requested position.
// If the requested position is not near already cached positions, clear the existing cache,
// find a near-by boundary and begin new cache contents there.
if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
int32_t aBoundary = 0;
int32_t ruleStatusIndex = 0;
// TODO: check for position == length of text. Although may still need to back up to get rule status.
if (position > 20) {
int32_t backupPos = fBI->handlePrevious(position);
fBI->fPosition = backupPos;
aBoundary = fBI->handleNext(); // Ignore dictionary, just finding a rule based boundary.
ruleStatusIndex = fBI->fRuleStatusIndex;
}
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
}
// Fill in boundaries between existing cache content and the new requested position.
if (fBoundaries[fEndBufIdx] < position) {
// The last position in the cache precedes the requested position.
// Add following position(s) to the cache.
while (fBoundaries[fEndBufIdx] < position) {
if (!populateFollowing()) {
U_ASSERT(false);
return false;
}
}
fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
previous(status);
}
return true;
}
if (fBoundaries[fStartBufIdx] > position) {
// The first position in the cache is beyond the requested position.
// back up more until we get a boundary <= the requested position.
while (fBoundaries[fStartBufIdx] > position) {
populatePreceding(status);
}
fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
next();
}
if (fTextIdx > position) {
// If position is not itself a boundary, the next() loop above will overshoot.
// Back up one, leaving cache position at the boundary preceding the requested position.
previous(status);
}
return true;
}
U_ASSERT(fTextIdx == position);
return true;
}
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
int32_t fromPosition = fBoundaries[fEndBufIdx];
int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
int32_t pos = 0;
int32_t ruleStatusIdx = 0;
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
}
fBI->fPosition = fromPosition;
pos = fBI->handleNext();
if (pos == UBRK_DONE) {
return FALSE;
}
ruleStatusIdx = fBI->fRuleStatusIndex;
if (fBI->fDictionaryCharCount > 0) {
// The text segment obtained from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
// TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
// But be careful with interactions with populateNear().
}
}
// Rule based segment did not include dictionary characters.
// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
// meaning that we didn't take the return, above.
// Add its end point to the cache.
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
//
for (int count=0; count<6; ++count) {
pos = fBI->handleNext();
if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
break;
}
addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
}
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
int32_t fromPosition = fBoundaries[fStartBufIdx];
if (fromPosition == 0) {
return FALSE;
}
int32_t position = 0;
int32_t positionStatusIdx = 0;
if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
addPreceding(position, positionStatusIdx, UpdateCachePosition);
return TRUE;
}
int32_t backupPosition = fromPosition;
// Find a boundary somewhere preceding the first already-cached boundary
do {
backupPosition = backupPosition - 30;
if (backupPosition <= 0) {
backupPosition = 0;
} else {
backupPosition = fBI->handlePrevious(backupPosition);
}
if (backupPosition == UBRK_DONE || backupPosition == 0) {
position = 0;
positionStatusIdx = 0;
} else {
fBI->fPosition = backupPosition; // TODO: pass starting position in a clearer way.
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
}
} while (position >= fromPosition);
// Find boundaries between the one we just located and the first already-cached boundary
// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
fSideBuffer.removeAllElements();
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
do {
int32_t prevPosition = fBI->fPosition = position;
int32_t prevStatusIdx = positionStatusIdx;
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
if (position == UBRK_DONE) {
break;
}
UBool segmentHandledByDictionary = FALSE;
if (fBI->fDictionaryCharCount != 0) {
// Segment from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
int32_t dictSegEndPosition = position;
fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
segmentHandledByDictionary = true;
U_ASSERT(position > prevPosition);
if (position >= fromPosition) {
break;
}
U_ASSERT(position <= dictSegEndPosition);
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
prevPosition = position;
}
U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
}
if (!segmentHandledByDictionary && position < fromPosition) {
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
}
} while (position < fromPosition);
// Move boundaries from the side buffer to the main circular buffer.
UBool success = FALSE;
if (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
addPreceding(position, positionStatusIdx, UpdateCachePosition);
success = TRUE;
}
while (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
// No space in circular buffer to hold a new preceding result while
// also retaining the current cache (iteration) position.
// Bailing out is safe; the cache will refill again if needed.
break;
}
}
return success;
}
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position > fBoundaries[fEndBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
if (nextIdx == fStartBufIdx) {
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fEndBufIdx = nextIdx;
if (update == UpdateCachePosition) {
// Set current position to the newly added boundary.
fBufIdx = nextIdx;
fTextIdx = position;
} else {
// Retaining the original cache position.
// Check if the added boundary wraps around the buffer, and would over-write the original position.
// It's the responsibility of callers of this function to not add too many.
U_ASSERT(nextIdx != fBufIdx);
}
}
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position < fBoundaries[fStartBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
if (nextIdx == fEndBufIdx) {
if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
// Failure. The insertion of the new boundary would claim the buffer position that is the
// current iteration position. And we also want to retain the current iteration position.
// (The buffer is already completely full of entries that precede the iteration position.)
return false;
}
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fStartBufIdx = nextIdx;
if (update == UpdateCachePosition) {
fBufIdx = nextIdx;
fTextIdx = position;
}
return true;
}
void RuleBasedBreakIterator::BreakCache::dumpCache() {
#ifdef RBBI_DEBUG
RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
if (i == fEndBufIdx) {
break;
}
}
#endif
}
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION

Просмотреть файл

@ -0,0 +1,203 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.h
//
#ifndef RBBI_CACHE_H
#define RBBI_CACHE_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/uobject.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
* Dictionary boundaries are moved first to this cache, then from here
* to the main BreakCache, where they may inter-leave with non-dictionary
* boundaries. The public BreakIterator API always fetches directly
* from the main BreakCache, not from here.
*
* In common situations, the number of boundaries in a single dictionary run
* should be quite small, it will be terminated by punctuation, spaces,
* or any other non-dictionary characters. The main BreakCache may end
* up with boundaries from multiple dictionary based runs.
*
* The boundaries are stored in a simple ArrayList (vector), with the
* assumption that they will be accessed sequentially.
*/
class RuleBasedBreakIterator::DictionaryCache: public UMemory {
public:
DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
~DictionaryCache();
void reset();
UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
/**
* Populate the cache with the dictionary based boundaries within a region of text.
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param firstRuleStatus The rule status index that applies to the break at startPos
* @param otherRuleStatus The rule status index that applies to boundaries other than startPos
* @internal
*/
void populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus);
RuleBasedBreakIterator *fBI;
UVector32 *fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.
int32_t fLimit; // Last boundary in cache. Which is the limit of the
// text segment being handled by the dictionary.
int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
};
/*
* class BreakCache
*
* Cache of break boundary positions and rule status values.
* Break iterator API functions, next(), previous(), etc., will use cached results
* when possible, and otherwise cache new results as they are obtained.
*
* Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
*
* The cache is implemented as a single circular buffer.
*/
/*
* size of the circular cache buffer.
*/
class RuleBasedBreakIterator::BreakCache: public UMemory {
public:
BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
virtual ~BreakCache();
void reset(int32_t pos = 0, int32_t ruleStatus = 0);
void next() { if (fBufIdx == fEndBufIdx) {
nextOL();
} else {
fBufIdx = modChunkSize(fBufIdx + 1);
fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
}
};
void nextOL();
void previous(UErrorCode &status);
// Move the iteration state to the position following the startPosition.
// Input position must be pinned to the input length.
void following(int32_t startPosition, UErrorCode &status);
void preceding(int32_t startPosition, UErrorCode &status);
/*
* Update the state of the public BreakIterator (fBI) to reflect the
* current state of the break iterator cache (this).
*/
int32_t current();
/**
* Add boundaries to the cache near the specified position.
* The given position need not be a boundary itself.
* The input position must be within the range of the text, and
* on a code point boundary.
* If the requested position is a break boundary, leave the iteration
* position on it.
* If the requested position is not a boundary, leave the iteration
* position on the preceding boundary and include both the the
* preceding and following boundaries in the cache.
* Additional boundaries, either preceding or following, may be added
* to the cache as a side effect.
*
* Return FALSE if the operation failed.
*/
UBool populateNear(int32_t position, UErrorCode &status);
/**
* Add boundary(s) to the cache following the current last boundary.
* Return FALSE if at the end of the text, and no more boundaries can be added.
* Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
*/
UBool populateFollowing();
/**
* Add one or more boundaries to the cache preceding the first currently cached boundary.
* Leave the iteration position on the first added boundary.
* Return false if no boundaries could be added (if at the start of the text.)
*/
UBool populatePreceding(UErrorCode &status);
enum UpdatePositionValues {
RetainCachePosition = 0,
UpdateCachePosition = 1
};
/*
* Add the boundary following the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/*
* Add the boundary preceding the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/**
* Set the cache position to the specified position, or, if the position
* falls between to cached boundaries, to the preceding boundary.
* Fails if the requested position is outside of the range of boundaries currently held by the cache.
* The startPosition must be on a code point boundary.
*
* Return TRUE if successful, FALSE if the specified position is after
* the last cached boundary or before the first.
*/
UBool seek(int32_t startPosition);
void dumpCache();
private:
static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
static constexpr int32_t CACHE_SIZE = 128;
static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
RuleBasedBreakIterator *fBI;
int32_t fStartBufIdx;
int32_t fEndBufIdx; // inclusive
int32_t fTextIdx;
int32_t fBufIdx;
int32_t fBoundaries[CACHE_SIZE];
uint16_t fStatuses[CACHE_SIZE];
UVector32 fSideBuffer;
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION
#endif // RBBI_CACHE_H

Просмотреть файл

@ -14,7 +14,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "rbbidata.h" #include "rbbidata.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "utrie.h" #include "utrie2.h"
#include "udatamem.h" #include "udatamem.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
@ -23,23 +23,6 @@
#include "uassert.h" #include "uassert.h"
//-----------------------------------------------------------------------------------
//
// Trie access folding function. Copied as-is from properties code in uchar.c
//
//-----------------------------------------------------------------------------------
U_CDECL_BEGIN
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
dh->info.dataFormat[1] == 0x72 && dh->info.dataFormat[1] == 0x72 &&
dh->info.dataFormat[2] == 0x6b && dh->info.dataFormat[2] == 0x6b &&
dh->info.dataFormat[3] == 0x20) dh->info.dataFormat[3] == 0x20 &&
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is isDataVersionAcceptable(dh->info.formatVersion))
// validated when checking that.
) { ) {
status = U_INVALID_FORMAT_ERROR; status = U_INVALID_FORMAT_ERROR;
return; return;
@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
fUDataMem = udm; fUDataMem = udm;
} }
UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
return RBBI_DATA_FORMAT_VERSION[0] == version[0];
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// //
// init(). Does most of the work of construction, shared between the // init(). Does most of the work of construction, shared between the
@ -96,10 +83,11 @@ void RBBIDataWrapper::init0() {
fReverseTable = NULL; fReverseTable = NULL;
fSafeFwdTable = NULL; fSafeFwdTable = NULL;
fSafeRevTable = NULL; fSafeRevTable = NULL;
fRuleSource = NULL; fRuleSource = NULL;
fRuleStatusTable = NULL; fRuleStatusTable = NULL;
fUDataMem = NULL; fTrie = NULL;
fRefCount = 0; fUDataMem = NULL;
fRefCount = 0;
fDontFreeData = TRUE; fDontFreeData = TRUE;
} }
@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
return; return;
} }
fHeader = data; fHeader = data;
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
{
status = U_INVALID_FORMAT_ERROR; status = U_INVALID_FORMAT_ERROR;
return; return;
} }
@ -131,16 +118,23 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable); fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
} }
// Rule Compatibility Hacks
// If a rule set includes reverse rules but does not explicitly include safe reverse rules,
// the reverse rules are to be treated as safe reverse rules.
utrie_unserialize(&fTrie, if (fSafeRevTable == NULL && fReverseTable != NULL) {
(uint8_t *)data + fHeader->fTrie, fSafeRevTable = fReverseTable;
fHeader->fTrieLen, fReverseTable = NULL;
&status); }
fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
(uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen,
NULL, // *actual length
&status);
if (U_FAILURE(status)) { if (U_FAILURE(status)) {
return; return;
} }
fTrie.getFoldingOffset=getFoldingOffset;
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
fRuleString.setTo(TRUE, fRuleSource, -1); fRuleString.setTo(TRUE, fRuleSource, -1);
@ -165,6 +159,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
RBBIDataWrapper::~RBBIDataWrapper() { RBBIDataWrapper::~RBBIDataWrapper() {
U_ASSERT(fRefCount == 0); U_ASSERT(fRefCount == 0);
utrie2_close(fTrie);
fTrie = NULL;
if (fUDataMem) { if (fUDataMem) {
udata_close(fUDataMem); udata_close(fUDataMem);
} else if (!fDontFreeData) { } else if (!fDontFreeData) {
@ -323,7 +319,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b && pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 && pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) { RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->dataFormat[2], pInfo->dataFormat[3],
@ -344,17 +340,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
// //
// Get the RRBI Data Header, and check that it appears to be OK. // Get the RRBI Data Header, and check that it appears to be OK.
// //
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize; const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
rbbiDH->fFormatVersion[0] != 3 || !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
{
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR; *status=U_UNSUPPORTED_ERROR;
return 0; return 0;
@ -451,8 +441,8 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
} }
// Trie table for character categories // Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status); outBytes+ds->readUInt32(rbbiDH->fTrie), status);
// Source Rules Text. It's UChar data // Source Rules Text. It's UChar data
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),

Просмотреть файл

@ -51,22 +51,23 @@ ubrk_swap(const UDataSwapper *ds,
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/uversion.h"
#include "umutex.h" #include "umutex.h"
#include "utrie.h" #include "utrie2.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// The current RBBI data format version.
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
/* /*
* The following structs map exactly onto the raw data from ICU common data file. * The following structs map exactly onto the raw data from ICU common data file.
*/ */
struct RBBIDataHeader { struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */ uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */ /* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */ /* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */ uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */ /* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */ uint32_t fCatCount; /* Number of character categories. */
@ -152,6 +153,8 @@ public:
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper(); ~RBBIDataWrapper();
static UBool isDataVersionAcceptable(const UVersionInfo version);
void init0(); void init0();
void init(const RBBIDataHeader *data, UErrorCode &status); void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference(); RBBIDataWrapper *addReference();
@ -181,11 +184,11 @@ public:
/* number of int32_t values in the rule status table. Used to sanity check indexing */ /* number of int32_t values in the rule status table. Used to sanity check indexing */
int32_t fStatusMaxIdx; int32_t fStatusMaxIdx;
UTrie fTrie; UTrie2 *fTrie;
private: private:
u_atomic_int32_t fRefCount; u_atomic_int32_t fRefCount;
UDataMemory *fUDataMem; UDataMemory *fUDataMem;
UnicodeString fRuleString; UnicodeString fRuleString;
UBool fDontFreeData; UBool fDontFreeData;

Просмотреть файл

@ -24,16 +24,16 @@
#include "unicode/uchriter.h" #include "unicode/uchriter.h"
#include "unicode/parsepos.h" #include "unicode/parsepos.h"
#include "unicode/parseerr.h" #include "unicode/parseerr.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "rbbinode.h" #include "rbbinode.h"
#include "rbbiscan.h" #include "rbbiscan.h"
#include "rbbisetb.h" #include "rbbisetb.h"
#include "rbbitblb.h" #include "rbbitblb.h"
#include "rbbidata.h" #include "rbbidata.h"
#include "uassert.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
@ -164,8 +164,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize (void)safeFwdTableSize;
+ safeFwdTableSize + safeRevTableSize
int32_t totalSize = headerSize
+ forwardTableSize
+ /* reverseTableSize */ 0
+ /* safeFwdTableSize */ 0
+ (safeRevTableSize ? safeRevTableSize : reverseTableSize)
+ statusTableSize + trieSize + rulesSize; + statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
@ -177,23 +182,45 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fMagic = 0xb1a0; data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3; data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
data->fFormatVersion[1] = 1; data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
data->fFormatVersion[2] = 0; data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
data->fFormatVersion[3] = 0; data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
data->fLength = totalSize; data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories(); data->fCatCount = fSetBuilder->getNumCharCategories();
// Only save the forward table and the safe reverse table,
// because these are the only ones used at run-time.
//
// For the moment, we still build the other tables if they are present in the rule source files,
// for backwards compatibility. Old rule files need to work, and this is the simplest approach.
//
// Additional backwards compatibility consideration: if no safe rules are provided, consider the
// reverse rules to actually be the safe reverse rules.
data->fFTable = headerSize; data->fFTable = headerSize;
data->fFTableLen = forwardTableSize; data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = reverseTableSize;
data->fSFTable = data->fRTable + reverseTableSize;
data->fSFTableLen = safeFwdTableSize;
data->fSRTable = data->fSFTable + safeFwdTableSize;
data->fSRTableLen = safeRevTableSize;
data->fTrie = data->fSRTable + safeRevTableSize; // Do not save Reverse Table.
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = 0;
// Do not save the Safe Forward table.
data->fSFTable = data->fRTable + 0;
data->fSFTableLen = 0;
data->fSRTable = data->fSFTable + 0;
if (safeRevTableSize > 0) {
data->fSRTableLen = safeRevTableSize;
} else if (reverseTableSize > 0) {
data->fSRTableLen = reverseTableSize;
} else {
U_ASSERT(FALSE); // Rule build should have failed for lack of a reverse table
// before reaching this point.
}
data->fTrie = data->fSRTable + data->fSRTableLen;
data->fTrieLen = fSetBuilder->getTrieSize(); data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize; data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize; data->fStatusTableLen= statusTableSize;
@ -203,9 +230,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable); fForwardTables->exportTable((uint8_t *)data + data->fFTable);
fReverseTables->exportTable((uint8_t *)data + data->fRTable); // fReverseTables->exportTable((uint8_t *)data + data->fRTable);
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable); // fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable); if (safeRevTableSize > 0) {
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
} else {
fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
}
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable); int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);

Просмотреть файл

@ -15,6 +15,9 @@
#define RBBIRB_H #define RBBIRB_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "unicode/rbbi.h" #include "unicode/rbbi.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
@ -207,6 +210,9 @@ struct RBBISetTableEl {
#endif #endif
U_NAMESPACE_END U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif #endif

Просмотреть файл

@ -47,6 +47,7 @@
// //
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
static const UChar gRuleSet_rule_char_pattern[] = { static const UChar gRuleSet_rule_char_pattern[] = {
// Characters that may appear as literals in patterns without escaping or quoting.
// [ ^ [ \ p { Z } \ u 0 0 2 0 // [ ^ [ \ p { Z } \ u 0 0 2 0
0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
// - \ u 0 0 7 f ] - [ \ p // - \ u 0 0 7 f ] - [ \ p
@ -558,6 +559,10 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
fRB->fDefaultTree = &fRB->fSafeRevTree; fRB->fDefaultTree = &fRB->fSafeRevTree;
} else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
fRB->fLookAheadHardBreak = TRUE; fRB->fLookAheadHardBreak = TRUE;
} else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
fRuleSets[kRuleSet_rule_char-128].clear();
} else if (opt == UNICODE_STRING("unquoted_literals", 17)) {
fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
} else { } else {
error(U_BRK_UNRECOGNIZED_OPTION); error(U_BRK_UNRECOGNIZED_OPTION);
} }

Просмотреть файл

@ -35,7 +35,7 @@
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "utrie.h" #include "utrie2.h"
#include "uvector.h" #include "uvector.h"
#include "uassert.h" #include "uassert.h"
#include "cmemory.h" #include "cmemory.h"
@ -44,43 +44,6 @@
#include "rbbisetb.h" #include "rbbisetb.h"
#include "rbbinode.h" #include "rbbinode.h"
//------------------------------------------------------------------------
//
// getFoldedRBBIValue Call-back function used during building of Trie table.
// Folding value: just store the offset (16 bits)
// if there is any non-0 entry.
// (It'd really be nice if the Trie builder would provide a
// simple default, so this function could go away from here.)
//
//------------------------------------------------------------------------
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
U_CDECL_BEGIN
static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t value;
UChar32 limit;
UBool inBlockZero;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else if(value!=0) {
return (uint32_t)(offset|0x8000);
} else {
++start;
}
}
return 0;
}
U_CDECL_END
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
//------------------------------------------------------------------------ //------------------------------------------------------------------------
@ -116,7 +79,7 @@ RBBISetBuilder::~RBBISetBuilder()
delete r; delete r;
} }
utrie_close(fTrie); utrie2_close(fTrie);
} }
@ -287,33 +250,38 @@ void RBBISetBuilder::build() {
// Build the Trie table for mapping UChar32 values to the corresponding // Build the Trie table for mapping UChar32 values to the corresponding
// range group number // range group number
// //
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in fTrie = utrie2_open(0, // Initial value for all code points.
NULL, // Data array (utrie will allocate one) 0, // Error value for out-of-range input.
100000, // Max Data Length fStatus);
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately
for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { utrie2_setRange32(fTrie,
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE); rlRange->fStartChar, // Range start
rlRange->fEndChar, // Range end (inclusive)
rlRange->fNum, // value for range
TRUE, // Overwrite previously written values
fStatus);
} }
} }
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
// //
// getTrieSize() Return the size that will be required to serialize the Trie. // getTrieSize() Return the size that will be required to serialize the Trie.
// //
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
int32_t RBBISetBuilder::getTrieSize() /*const*/ { int32_t RBBISetBuilder::getTrieSize() {
fTrieSize = utrie_serialize(fTrie, if (U_FAILURE(*fStatus)) {
NULL, // Buffer return 0;
0, // Capacity }
getFoldedRBBIValue, utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
TRUE, // Reduce to 16 bits fTrieSize = utrie2_serialize(fTrie,
fStatus); NULL, // Buffer
0, // Capacity
fStatus);
if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
*fStatus = U_ZERO_ERROR;
}
// RBBIDebugPrintf("Trie table size is %d\n", trieSize); // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
return fTrieSize; return fTrieSize;
} }
@ -327,12 +295,10 @@ int32_t RBBISetBuilder::getTrieSize() /*const*/ {
// //
//----------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------
void RBBISetBuilder::serializeTrie(uint8_t *where) { void RBBISetBuilder::serializeTrie(uint8_t *where) {
utrie_serialize(fTrie, utrie2_serialize(fTrie,
where, // Buffer where, // Buffer
fTrieSize, // Capacity fTrieSize, // Capacity
getFoldedRBBIValue, fStatus);
TRUE, // Reduce to 16 bits
fStatus);
} }
//------------------------------------------------------------------------ //------------------------------------------------------------------------

Просмотреть файл

@ -13,12 +13,14 @@
#define RBBISETB_H #define RBBISETB_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h" #include "unicode/uobject.h"
#include "rbbirb.h" #include "rbbirb.h"
#include "utrie2.h"
#include "uvector.h" #include "uvector.h"
struct UNewTrie;
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// //
@ -109,8 +111,8 @@ private:
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing UTrie2 *fTrie; // The mapping TRIE that is the end result of processing
uint32_t fTrieSize; // the Unicode Sets. uint32_t fTrieSize; // the Unicode Sets.
// Groups correspond to character categories - // Groups correspond to character categories -
// groups of ranges that are in the same original UnicodeSets. // groups of ranges that are in the same original UnicodeSets.
@ -129,4 +131,7 @@ private:
U_NAMESPACE_END U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif #endif

Просмотреть файл

@ -11,37 +11,37 @@
#ifdef INCLUDED_FROM_UBIDI_PROPS_C #ifdef INCLUDED_FROM_UBIDI_PROPS_C
static const UVersionInfo ubidi_props_dataVersion={9,0,0,0}; static const UVersionInfo ubidi_props_dataVersion={0xa,0,0,0};
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6060,0x5ce8,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x5802b6}; static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6028,0x5cb0,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x6302b6};
static const uint16_t ubidi_props_trieIndex[11884]={ static const uint16_t ubidi_props_trieIndex[11856]={
0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392, 0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392,
0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7, 0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7,
0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425, 0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425,
0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1, 0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1,
0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508, 0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508,
0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e, 0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e,
0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x57f,0x581,0x500,0x4ef,0x38a,0x38a,0x589,0x38a, 0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x401,0x57b,0x500,0x4ef,0x38a,0x38a,0x583,0x38a,
0x38a,0x58f,0x597,0x38a,0x38a,0x59b,0x5a3,0x38a,0x5a7,0x5ae,0x38a,0x5b6,0x5be,0x5c5,0x541,0x38a, 0x38a,0x589,0x591,0x38a,0x38a,0x595,0x59d,0x38a,0x5a1,0x5a8,0x38a,0x5b0,0x5b8,0x5bf,0x541,0x38a,
0x38a,0x5cd,0x5d5,0x5dd,0x5e5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x5c7,0x5cf,0x5d7,0x5df,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5ed,0x38a,0x5f5,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5e7,0x38a,0x5ef,0x38a,0x38a,0x38a,
0x5fd,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x5f7,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x605,0x38a,0x38a,0x38a,0x60d,0x60d,0x504,0x504,0x38a,0x613,0x61b,0x5f5, 0x38a,0x38a,0x38a,0x38a,0x5ff,0x38a,0x38a,0x38a,0x607,0x607,0x504,0x504,0x38a,0x60d,0x615,0x5ef,
0x631,0x623,0x623,0x639,0x640,0x629,0x38a,0x38a,0x38a,0x648,0x650,0x38a,0x38a,0x38a,0x652,0x65a, 0x62b,0x61d,0x61d,0x633,0x63a,0x623,0x38a,0x38a,0x38a,0x642,0x64a,0x38a,0x38a,0x38a,0x64c,0x654,
0x662,0x38a,0x669,0x671,0x38a,0x679,0x38a,0x38a,0x681,0x684,0x542,0x68c,0x401,0x694,0x38a,0x69b, 0x65c,0x38a,0x663,0x66b,0x38a,0x673,0x38a,0x38a,0x534,0x67b,0x542,0x683,0x401,0x68b,0x38a,0x692,
0x38a,0x6a0,0x38a,0x38a,0x38a,0x38a,0x6a6,0x6ae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6b6, 0x38a,0x697,0x38a,0x38a,0x38a,0x38a,0x69d,0x6a5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6ad,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6be,0x6c6,0x6ca, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6b5,0x6bd,0x6c1,
0x6e2,0x6e8,0x6d2,0x6da,0x6f0,0x6f8,0x6fc,0x5c8,0x704,0x70c,0x714,0x38a,0x71c,0x65a,0x65a,0x65a, 0x6d9,0x6df,0x6c9,0x6d1,0x6e7,0x6ef,0x6f3,0x5c2,0x6fb,0x703,0x70b,0x38a,0x713,0x654,0x654,0x654,
0x72c,0x734,0x73c,0x744,0x749,0x751,0x759,0x724,0x761,0x769,0x38a,0x76f,0x776,0x65a,0x65a,0x65a, 0x723,0x72b,0x733,0x73b,0x740,0x748,0x750,0x71b,0x758,0x760,0x38a,0x766,0x76d,0x654,0x654,0x654,
0x65a,0x56d,0x77c,0x65a,0x784,0x38a,0x38a,0x657,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, 0x654,0x56d,0x773,0x654,0x77b,0x38a,0x38a,0x651,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x78c,0x65a,0x65a,0x65a,0x65a,0x65a,0x792,0x65a,0x65a,0x79a,0x7a2, 0x654,0x654,0x654,0x654,0x654,0x783,0x654,0x654,0x654,0x654,0x654,0x789,0x654,0x654,0x791,0x799,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a,0x65a,0x65a,0x7b2,0x7b9,0x7c1,0x7aa, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,0x654,0x654,0x7a9,0x7b0,0x7b8,0x7a1,
0x7d1,0x7d9,0x7e1,0x7e8,0x7f0,0x7f8,0x7ff,0x7c9,0x65a,0x65a,0x65a,0x807,0x80d,0x813,0x81b,0x820, 0x7c8,0x7d0,0x7d8,0x7df,0x7e7,0x7ef,0x7f6,0x7c0,0x654,0x654,0x654,0x7fe,0x804,0x80a,0x812,0x817,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x827,0x38a,0x38a,0x38a,0x82f,0x38a,0x38a,0x38a,0x3d8, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x81e,0x38a,0x38a,0x38a,0x826,0x38a,0x38a,0x38a,0x3d8,
0x837,0x83f,0x76c,0x38a,0x842,0x65a,0x65a,0x65d,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x849,0x84f, 0x82e,0x836,0x763,0x38a,0x839,0x654,0x654,0x657,0x654,0x654,0x654,0x654,0x654,0x654,0x840,0x846,
0x85f,0x857,0x38a,0x38a,0x867,0x5fd,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x82e, 0x856,0x84e,0x38a,0x38a,0x85e,0x5f7,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x825,
0x3bf,0x38a,0x86f,0x877,0x38a,0x87f,0x820,0x38a,0x38a,0x38a,0x38a,0x887,0x38a,0x38a,0x652,0x3b0, 0x3bf,0x38a,0x866,0x86e,0x38a,0x876,0x817,0x38a,0x38a,0x38a,0x38a,0x87e,0x38a,0x38a,0x64c,0x3b0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -54,7 +54,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -98,10 +98,10 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x86f,0x65a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x866,0x654,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x88e,0x38a,0x38a,0x893,0x557,0x38a,0x38a,0x5a9,0x65a,0x651,0x38a,0x38a,0x89b,0x38a,0x38a,0x38a, 0x885,0x38a,0x38a,0x88a,0x557,0x38a,0x38a,0x5a3,0x654,0x64b,0x38a,0x38a,0x892,0x38a,0x38a,0x38a,
0x8a3,0x8aa,0x623,0x8b2,0x38a,0x38a,0x8b9,0x8c1,0x38a,0x8c8,0x8cf,0x38a,0x4dd,0x8d4,0x38a,0x4f6, 0x89a,0x8a1,0x61d,0x8a9,0x38a,0x38a,0x579,0x8b1,0x38a,0x8b8,0x8bf,0x38a,0x4dd,0x8c4,0x38a,0x4f6,
0x38a,0x8dc,0x8e4,0x4f8,0x38a,0x8e8,0x4f7,0x8f0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8f7, 0x38a,0x8cc,0x8d4,0x4f8,0x38a,0x8d8,0x4f7,0x8e0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8e7,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -141,9 +141,9 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x90b,0x8ff,0x903,0x489,0x489,0x489,0x489,0x489, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8fb,0x8ef,0x8f3,0x489,0x489,0x489,0x489,0x489,
0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x913,0x489,0x489,0x489,0x489,0x91b,0x91f, 0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x903,0x489,0x489,0x489,0x489,0x90b,0x90f,
0x927,0x92f,0x933,0x93b,0x489,0x489,0x489,0x93f,0x947,0x37a,0x94f,0x957,0x38a,0x38a,0x38a,0x95f, 0x917,0x91f,0x923,0x92b,0x489,0x489,0x489,0x92f,0x937,0x37a,0x93f,0x947,0x38a,0x38a,0x38a,0x94f,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc, 0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc,
@ -180,61 +180,61 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17, 0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x65a,0x65a,0x96f,0x5fd,0x38a,0x4f0, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x957,0x38a,0x654,0x654,0x95f,0x5f7,0x38a,0x4f0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x977,0x38a,0x38a,0x38a,0x97e,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x38a,0x38a,0x96e,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x986,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x976,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x98e,0x992,0x41f,0x41f,0x41f,0x41f,0x9a2,0x99a,0x41f,0x9aa,0x41f,0x41f,0x9b2,0x9b8,0x41f,0x41f, 0x97e,0x982,0x41f,0x41f,0x41f,0x41f,0x992,0x98a,0x41f,0x99a,0x41f,0x41f,0x9a2,0x9a8,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x9c0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x9b0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x4f7,0x8bb,0x9c8,0x9cf,0x401,0x9d2,0x38a,0x38a,0x4dd,0x9da,0x38a,0x9e0,0x401,0x9e5,0x60f,0x38a, 0x4f7,0x9b8,0x9bf,0x9c6,0x401,0x9c9,0x38a,0x38a,0x4dd,0x9d1,0x38a,0x9d7,0x401,0x9dc,0x609,0x38a,
0x38a,0x9ed,0x38a,0x38a,0x38a,0x38a,0x82f,0x9f5,0x401,0x4f8,0x556,0x9fc,0x38a,0x38a,0x38a,0x38a, 0x38a,0x9e4,0x38a,0x38a,0x38a,0x38a,0x826,0x9ec,0x401,0x4f8,0x556,0x9f3,0x38a,0x38a,0x38a,0x38a,
0x38a,0x8bb,0xa04,0x38a,0x38a,0xa08,0xa10,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa14,0xa1c,0x38a, 0x38a,0x9b8,0x9fb,0x38a,0x38a,0x9ff,0xa07,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa0b,0xa13,0x38a,
0x38a,0xa24,0x556,0xa2c,0x38a,0xa32,0x38a,0x38a,0x5ed,0xa3a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xa1b,0x556,0xa23,0x38a,0xa29,0x38a,0x38a,0x5e7,0xa31,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa42,0xa46,0xa4e,0x38a,0xa55,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa39,0xa3d,0xa45,0x38a,0xa4c,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa5c,0x38a,0x38a,0xa64,0xa6a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa53,0x38a,0x38a,0xa61,0xa5b,
0x38a,0x38a,0x38a,0xa70,0xa78,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0xa69,0xa71,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa7c,0x38a,0xa82,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa75,0x38a,0xa7b,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0xa88,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xa81,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa90,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa89,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0xa97,0xa9f,0xaa5,0x38a,0x38a,0x65a,0x65a,0xaad,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a, 0x38a,0x38a,0xa90,0xa98,0xa9e,0x38a,0x38a,0x654,0x654,0xaa6,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,
0x65a,0xab5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x654,0xaae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xabb,0x38a,0xac2, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xab4,0x38a,0xabb,
0x38a,0xabe,0x38a,0xac5,0x38a,0xacd,0xad1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0xab7,0x38a,0xabe,0x38a,0xac6,0xaca,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad9,0x3d8,0xae0,0xae7,0xaef,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad2,0x3d8,0xad9,0xae0,0xae8,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf7,0xaff,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf0,0xaf8,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb07,0x41f,0xb0f, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb00,0x41f,0xb08,
0xb0f,0xb16,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0xb08,0xb0f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb1e,0x41f, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb17,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x65a,0xb26,0x65a,0x65a,0x65d,0xb2b,0xb2f,0x849,0xb37, 0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x654,0xb1f,0x654,0x654,0x657,0xb24,0xb28,0x840,0xb30,
0x38a,0x38a,0xb3d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x76d,0x38a,0x38a,0x38a,0x38a,0x65a, 0x38a,0x38a,0xb36,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x764,0x38a,0x38a,0x38a,0x38a,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, 0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0xb45,0xb4d,0x65a, 0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0xb3e,0xb46,0x654,
0x65a,0x65a,0x65d,0x65a,0x65a,0xb45,0x38a,0xb26,0x65a,0xb55,0x65a,0xb5d,0x84b,0x38a,0x38a,0xb26, 0x654,0x654,0x657,0x654,0x654,0xb3e,0x38a,0xb1f,0x654,0xb4e,0x654,0xb56,0x842,0x38a,0x38a,0xb1f,
0xb61,0xb69,0x65f,0x65c,0x38a,0xb71,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0xb5a,0xb62,0x659,0x656,0x38a,0xb6a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0xb89,0xb81,0xb81,0xb81,0xb8a,0xb8a,0xb8a,0xb8a,0x3d8, 0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0xb82,0xb7a,0xb7a,0xb7a,0xb83,0xb83,0xb83,0xb83,0x3d8,
0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb92,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb8b,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa, 0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa,
0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2, 0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2,
@ -319,7 +319,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1, 1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,
0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41, 0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1, 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1,
1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, 1,1,1,1,0x4d,0xd,0x4d,0x4d,0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,
0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1, 0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
@ -348,8 +348,8 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0, 0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0x11,0x11, 0,0,0,0,0,4,0,0,0,0,0,0,0,0,0xb1,0xb1,
0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, 0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
@ -367,215 +367,211 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0, 0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0,
0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, 0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x11,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x11, 0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, 0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0, 0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, 0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0, 0,0,0,4,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, 0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, 0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,
0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x310a, 0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,
0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,
0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
0xa,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,
0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x11,0x11,
0x11,0x11,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,
0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,
2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,
9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,
4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, 0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0, 0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0,
0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,
0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,
0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0x100a,0x100a,0xa,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,
0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,
0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,
0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,
0xf00a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a, 0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0,0,0,0,0,
0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a, 0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,4,
0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0,0xa,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,
0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0, 0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, 0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0, 0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,0,0,0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa, 0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0, 0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,0x813,0x816,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,2,2,3,3,
0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,9,9,9,0xb2,
0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,4,4,4,4,
4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0xa,
0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0,0,0,0,0x100a,0xa,0xa,0xa,
0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x900a,0x900a,
0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,0xa,0x100a,0x100a,0x100a,
0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,0xa,0x100a,3,4,
0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,
0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0x100a,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,0x300a,0xf00a,0xa,0x500a,
0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,
0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,
0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,
0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,
0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
@ -642,123 +638,125 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,
0,0,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, 0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, 0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0, 0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,
0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,
0x11,0,0,0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0x11,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0x11,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0x11,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0x11,0,0x11,0x11,0,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,2,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0, 0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0, 0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, 0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, 0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,2,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, 0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, 0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0
}; };
static const uint32_t ubidi_props_mirrors[26]={ static const uint32_t ubidi_props_mirrors[26]={
@ -803,7 +801,7 @@ static const uint8_t ubidi_props_jgArray[672]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -828,13 +826,13 @@ static const UBiDiProps ubidi_props_singleton={
ubidi_props_trieIndex+3496, ubidi_props_trieIndex+3496,
NULL, NULL,
3496, 3496,
8388, 8360,
0x1a0, 0x1a0,
0xe28, 0xe28,
0x0, 0x0,
0x0, 0x0,
0x110000, 0x110000,
0x2e68, 0x2e4c,
NULL, 0, FALSE, FALSE, 0, NULL NULL, 0, FALSE, FALSE, 0, NULL
}, },
{ 2,2,0,0 } { 2,2,0,0 }

Просмотреть файл

@ -961,6 +961,7 @@ ucase_toFullLower(UChar32 c,
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/ */
*pString=nullptr;
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/* /*
@ -1059,6 +1060,7 @@ toUpperOrTitle(UChar32 c,
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/ */
*pString=nullptr;
return 0; /* remove the dot (continue without output) */ return 0; /* remove the dot (continue without output) */
} else { } else {
/* no known conditional special case mapping, use a normal mapping */ /* no known conditional special case mapping, use a normal mapping */

Просмотреть файл

@ -61,7 +61,7 @@ enum {
/** /**
* Bit mask for getting just the options from a string compare options word * Bit mask for getting just the options from a string compare options word
* that are relevant for case-insensitive string comparison. * that are relevant for case-insensitive string comparison.
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
* @internal * @internal
*/ */
#define _STRCASECMP_OPTIONS_MASK 0xffff #define _STRCASECMP_OPTIONS_MASK 0xffff
@ -69,10 +69,16 @@ enum {
/** /**
* Bit mask for getting just the options from a string compare options word * Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point). * that are relevant for case folding (of a single string or code point).
* See uchar.h. *
* Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* It is conceivable that at some point we might use one more bit for using uppercase sharp s.
* It is conceivable that at some point we might want the option to use only simple case foldings
* when operating on strings.
*
* See stringoptions.h.
* @internal * @internal
*/ */
#define _FOLD_CASE_OPTIONS_MASK 0xff #define _FOLD_CASE_OPTIONS_MASK 7
/* single-code point functions */ /* single-code point functions */

Просмотреть файл

@ -11,36 +11,36 @@
#ifdef INCLUDED_FROM_UCASE_CPP #ifdef INCLUDED_FROM_UCASE_CPP
static const UVersionInfo ucase_props_dataVersion={9,0,0,0}; static const UVersionInfo ucase_props_dataVersion={0xa,0,0,0};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6c6c,0x5a10,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3}; static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6dfc,0x5ba0,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const uint16_t ucase_props_trieIndex[11520]={ static const uint16_t ucase_props_trieIndex[11720]={
0x327,0x32f,0x337,0x33f,0x34d,0x355,0x35d,0x365,0x36d,0x375,0x37c,0x384,0x38c,0x394,0x39c,0x3a4, 0x327,0x32f,0x337,0x33f,0x34d,0x355,0x35d,0x365,0x36d,0x375,0x37c,0x384,0x38c,0x394,0x39c,0x3a4,
0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3fa,0x402,0x40a,0x412,0x41a,0x422, 0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3fa,0x402,0x40a,0x412,0x41a,0x422,
0x42a,0x432,0x43a,0x442,0x44a,0x452,0x45a,0x462,0x45e,0x466,0x46b,0x473,0x47a,0x482,0x48a,0x492, 0x42a,0x432,0x43a,0x442,0x44a,0x452,0x45a,0x462,0x45e,0x466,0x46b,0x473,0x47a,0x482,0x48a,0x492,
0x49a,0x4a2,0x4aa,0x4b2,0x346,0x34e,0x4b7,0x4bf,0x4c4,0x4cc,0x4d4,0x4dc,0x4db,0x4e3,0x4e8,0x4f0, 0x49a,0x4a2,0x4aa,0x4b2,0x346,0x34e,0x4b7,0x4bf,0x4c4,0x4cc,0x4d4,0x4dc,0x4db,0x4e3,0x4e8,0x4f0,
0x4f7,0x4fe,0x502,0x346,0x346,0x327,0x512,0x50a,0x51a,0x51c,0x524,0x52c,0x530,0x531,0x539,0x541, 0x4f7,0x4fe,0x502,0x346,0x346,0x327,0x512,0x50a,0x51a,0x51c,0x524,0x52c,0x530,0x531,0x539,0x541,
0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x541,0x530,0x562,0x56a,0x541,0x56f,0x346,0x577,0x346, 0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x566,0x530,0x56e,0x576,0x541,0x57b,0x346,0x583,0x346,
0x4a1,0x4dd,0x57f,0x541,0x530,0x562,0x586,0x541,0x530,0x346,0x539,0x541,0x346,0x346,0x58c,0x346, 0x4a1,0x4dd,0x58b,0x541,0x530,0x56e,0x592,0x541,0x59a,0x59c,0x539,0x541,0x346,0x346,0x5a4,0x346,
0x346,0x592,0x599,0x346,0x346,0x59d,0x5a5,0x346,0x5a9,0x5b0,0x346,0x5b7,0x5bf,0x5c6,0x5ce,0x346, 0x346,0x5aa,0x5b1,0x346,0x346,0x5b5,0x5bd,0x346,0x5c1,0x5c8,0x346,0x5cf,0x5d7,0x5de,0x5e6,0x346,
0x346,0x5d3,0x5db,0x5e3,0x5eb,0x5f3,0x5fb,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x5eb,0x5f3,0x5fb,0x603,0x60b,0x613,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x5ff,0x346,0x346,0x60f,0x617,0x607, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x617,0x346,0x346,0x627,0x62f,0x61f,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x61f,0x61f,0x53d,0x53d,0x346,0x625,0x62d,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x637,0x637,0x53d,0x53d,0x346,0x63d,0x645,0x346,
0x635,0x346,0x63d,0x346,0x548,0x643,0x346,0x346,0x346,0x64b,0x346,0x346,0x346,0x346,0x346,0x346, 0x64d,0x346,0x655,0x346,0x548,0x65b,0x346,0x346,0x346,0x663,0x346,0x346,0x346,0x346,0x346,0x346,
0x652,0x346,0x659,0x661,0x346,0x669,0x346,0x346,0x671,0x674,0x67c,0x682,0x68a,0x692,0x346,0x699, 0x66a,0x346,0x671,0x679,0x346,0x681,0x346,0x346,0x56d,0x689,0x691,0x697,0x59a,0x69f,0x346,0x6a6,
0x346,0x69e,0x346,0x6a4,0x6ac,0x346,0x6b0,0x6b8,0x6c0,0x6c5,0x6c8,0x6d0,0x6e0,0x6d8,0x6f0,0x6e8, 0x346,0x6ab,0x346,0x6b1,0x6b9,0x346,0x6bd,0x6c5,0x6cd,0x6d2,0x6d5,0x6dd,0x6ed,0x6e5,0x6fd,0x6f5,
0x36d,0x6f8,0x36d,0x700,0x703,0x36d,0x70b,0x36d,0x713,0x71b,0x723,0x72b,0x733,0x73b,0x743,0x74b, 0x36d,0x705,0x36d,0x70d,0x710,0x36d,0x718,0x36d,0x720,0x728,0x730,0x738,0x740,0x748,0x750,0x758,
0x753,0x75a,0x346,0x762,0x76a,0x346,0x772,0x77a,0x782,0x78a,0x792,0x79a,0x7a2,0x346,0x346,0x346, 0x760,0x767,0x346,0x76f,0x777,0x346,0x77f,0x787,0x78f,0x797,0x79f,0x7a7,0x7af,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x7a5,0x7ab,0x7b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x7b2,0x7b8,0x7be,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x7b9,0x7be,0x7c2,0x7ca,0x36d,0x36d,0x36d,0x7d2,0x7da,0x7e2,0x346,0x7e7,0x346,0x346,0x346,0x7ef, 0x7c6,0x7cb,0x7cf,0x7d7,0x36d,0x36d,0x36d,0x7df,0x7e7,0x7ef,0x346,0x7f4,0x346,0x346,0x346,0x7fc,
0x346,0x63a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x652,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x52f,0x7f7,0x346,0x346,0x7fe,0x346,0x346,0x806,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x52f,0x804,0x346,0x346,0x80b,0x346,0x346,0x813,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -96,12 +96,12 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x80e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x81b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6a4,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x814,0x346,0x81c,0x821,0x829,0x346,0x346,0x831,0x839,0x841,0x36d,0x846,0x84e,0x854,0x346,0x85a, 0x821,0x346,0x829,0x82e,0x836,0x346,0x346,0x83e,0x846,0x84e,0x36d,0x853,0x85b,0x861,0x346,0x867,
0x862,0x548,0x346,0x346,0x346,0x346,0x869,0x871,0x346,0x878,0x87f,0x346,0x51a,0x884,0x88c,0x548, 0x86f,0x548,0x346,0x346,0x346,0x346,0x876,0x87e,0x346,0x885,0x88c,0x346,0x51a,0x891,0x899,0x548,
0x346,0x892,0x89a,0x89e,0x346,0x8a6,0x8ae,0x8b6,0x346,0x8bc,0x8c0,0x8c8,0x8d8,0x8d0,0x346,0x8e0, 0x346,0x89f,0x8a7,0x8ab,0x346,0x8b3,0x8bb,0x8c3,0x346,0x8c9,0x8cd,0x8d5,0x8e5,0x8dd,0x346,0x8ed,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -141,15 +141,15 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8e8,0x346,0x346,0x346,0x346,0x8f0,0x68a,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8f5,0x346,0x346,0x346,0x346,0x8fd,0x59a,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x8f5,0x8fd,0x901,0x346,0x346,0x346,0x346,0x329,0x32f,0x909,0x911,0x918,0x4dd,0x346,0x346,0x920, 0x902,0x90a,0x90e,0x346,0x346,0x346,0x346,0x329,0x32f,0x916,0x91e,0x925,0x4dd,0x346,0x346,0x92d,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0xd1c,0xd1c,0xd34,0xd74,0xdb4,0xdf0,0xe30,0xe70,0xea8,0xee8,0xf28,0xf68,0xfa8,0xfe8,0x1028,0x1068, 0xd1c,0xd1c,0xd34,0xd74,0xdb4,0xdf0,0xe30,0xe70,0xea8,0xee8,0xf28,0xf68,0xfa8,0xfe8,0x1028,0x1068,
0x10a8,0x10e8,0x1128,0x1168,0x1178,0x11ac,0x11e8,0x1228,0x1268,0x12a8,0xd18,0x12dc,0x1310,0x1350,0x136c,0x13a0, 0x10a8,0x10e8,0x1128,0x1168,0x1178,0x11ac,0x11e8,0x1228,0x1268,0x12a8,0xd18,0x12dc,0x1310,0x1350,0x136c,0x13a0,
0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab5,0x188,0x188, 0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab7,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf5,0x188,0x188,0xb2a,0xb69,0xba9,0xbe3,0xc1a,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf7,0x188,0x188,0xb2c,0xb6b,0xbab,0xbe5,0xc1c,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
@ -174,50 +174,50 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0xc5a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0xc5c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x63e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x928,0x346,0x346,0x346,0x92b,0x346,0x346,0x346, 0x656,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x935,0x346,0x346,0x346,0x938,0x346,0x346,0x346,
0x346,0x933,0x939,0x93d,0x346,0x346,0x941,0x945,0x94b,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x940,0x946,0x94a,0x346,0x346,0x94e,0x952,0x958,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x953,0x957,0x346,0x346,0x346,0x346,0x346,0x95f,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x960,0x964,0x346,0x346,0x346,0x346,0x346,0x96c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x967,0x96b,0x973,0x977,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x974,0x978,0x980,0x984,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x530,0x97c,0x983,0x985,0x68a,0x98d,0x346,0x346,0x995,0x99c,0x346,0x988,0x68a,0x9a2,0x9aa, 0x346,0x530,0x989,0x990,0x59b,0x59a,0x994,0x346,0x346,0x99c,0x9a3,0x346,0x9a9,0x59a,0x9ae,0x9b6,
0x346,0x346,0x9af,0x346,0x346,0x346,0x346,0x329,0x9b7,0x68a,0x531,0x9bf,0x9c6,0x346,0x346,0x346, 0x346,0x346,0x9bb,0x346,0x346,0x346,0x346,0x329,0x9c3,0x59a,0x531,0x9cb,0x9d2,0x346,0x346,0x346,
0x346,0x346,0x97c,0x9ce,0x346,0x346,0x9d2,0x9da,0x346,0x346,0x346,0x346,0x346,0x346,0x9de,0x9e6, 0x346,0x346,0x989,0x9da,0x346,0x346,0x9de,0x9e6,0x346,0x346,0x346,0x346,0x346,0x346,0x9ea,0x9f2,
0x346,0x346,0x9ee,0x4a1,0x346,0x346,0x9f6,0x346,0x346,0x9fc,0xa04,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x9fa,0x4a1,0x346,0x346,0xa02,0x346,0x346,0xa08,0xa10,0x346,0x346,0x346,0x346,0x346,
0x346,0xa0c,0xa14,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0xa18,0xa20,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa28,0xa2c,0xa34,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa1c,0x346,0x346, 0xa3b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa42,0x346,0x346,
0x8f0,0xa24,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x8fd,0xa4a,0x346,0x346,0x346,0xa50,0xa58,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa2a,0x346,0xa30,0x671, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa5c,0x346,
0xa62,0x56d,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa68,0x346,0x346,0x59a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0xa36,0x346,0x346,0x4a1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa70,0x56d,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa3e,0x671,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa78,0xa80,0xa86,0x346,0x346,0x346,0x346,0xa8e,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa96,0xa9e,0xaa3,0xaa9,
0xab1,0xab9,0xac1,0xa9a,0xac9,0xad1,0xad9,0xae0,0xa9b,0xa96,0xa9e,0xa99,0xaa9,0xa9c,0xa97,0xae8,
0xa9a,0xaf0,0xaf8,0xb00,0xb07,0xaf3,0xafb,0xb03,0xb0a,0xaf6,0xb12,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x846,0xb1a,0x846,0xb21,0xb28,
0xb30,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0xa46,0xa4e,0xa54,0x346,0x346,0x346,0x346,0xa5c,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb38,0xb40,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa64,0xa6c,0xa71,0xa77,0xa7f,0xa87,
0xa8f,0xa68,0xa97,0xa9f,0xaa7,0xaae,0xa69,0xa64,0xa6c,0xa67,0xa77,0xa6a,0xa65,0xab6,0xa68,0xabe,
0xac6,0xace,0xad5,0xac1,0xac9,0xad1,0xad8,0xac4,0xae0,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x839,0xae8,0x839,0xaef,0xaf6,0xafe,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb06,0xb0e,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb12,0x346,0xb1a,0xb22,0xb29,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb44,0x346,0xb4c,0xb54,0xb5b,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa60,0xb31,0xb31,0xb37,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0xa92,0xb63,0xb63,0xb69,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x997,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x99e,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x839,0x839,0x839,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x846,0x846,0x846,
0x346,0x346,0x839,0x839,0x839,0x839,0x839,0x839,0x839,0xa3a,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0xa6c,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x326,0x326,0,0,0,0, 0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,4,0,0,0,0,0, 0,0,0,4,0,0,0,0,0,0,4,0,0,0,0,0,
@ -361,6 +361,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,0,0,0,0,0,0,4,4,0,0,0,4,0,0, 0,0,0,0,0,0,0,0,4,4,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,4,4,0,4, 0,0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,
4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0, 4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x64,0,0,4,0,4,4,4,4,0,0,0, 0,0,0,0,0x64,0,0,4,0,4,4,4,4,0,0,0,
0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,4,0, 0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,4,0,
@ -371,6 +374,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,4,4,4,0,4,4,4,0x64,0,0,0,0,0,0, 0,0,4,4,4,0,4,4,4,0x64,0,0,0,0,0,0,
0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,4,0, 0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,4,0,
0,0,0,0,4,0x64,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,
0,0,4,4,4,0,4,0,0,0,0,0,0,0,0,0, 0,0,4,4,4,0,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,4,4,4,4, 0,0,0,0,0,0,0,0,0,4,0,0,4,4,4,4,
@ -428,211 +434,208 @@ static const uint16_t ucase_props_trieIndex[11520]={
4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44, 4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0, 0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44, 0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,
0x44,0x64,4,0,4,4,4,4,0,0,0,0,0,0,0,0, 0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0x64,0,4,4,4,4,4,0,
0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0, 4,0,0,0,0,0,4,0,0x60,0,0,0,0,0,0,0,
0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0,
4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0,
0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0,
0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64,
0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0,
0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1,
1,1,1,1,1,1,1,1,5,0x60b9,1,1,1,0x60f9,1,1,
5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x44,0x64,0x64,0x44,0x64,
0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64,
0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x6839,0x409,0x6939,0x409,
0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x2509,0x2509,0x2b09,0x2b09,
0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,0x6d59,0x6e39,0x6f19,0x6ff9,
0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,0x7b59,0x7c39,0x7d19,0x7df9,
0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,0x8959,0x8a39,0x8b19,0x8bf9,
0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,0x409,0x409,0x9759,0x9859,
0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,4,4,0x9e19,0x9f19,
0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,0x409,0x409,0xa439,0xa599,
0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,0x409,0x409,0xa999,0xaaf9,
0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,0,0,0xaff9,0xb0f9,
0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,
4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0,
0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,
0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44,
0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0xb61a,0,
2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,1,0,0,0,
0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,0xff89,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,0xb81a,0xb859,0xb899,0x8a,
0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,1,0x8a,0xff89,1,
1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,1,0,0,0,
0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,0xbb59,0xbb99,0xbbd9,0xbc19,
0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,0xbf59,0xbf99,0xbfd9,0xc019,
0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,0xc359,0xc399,0,0xc3d9,
0,0,0,0,0,0xc419,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,
4,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x64,0x64,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
5,5,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,1,1,1,1,
1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,0xc65a,0xc69a,0xc6da,0,
0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,4,4,4,4,0,0,4,0,0,0,
0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44,
0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0xc819,1,1,1,1,1,1,1,4,5,5,5,5,
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,0xccd9,0xcd39,0xcd99,0xcdf9,
0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,0xded9,0xdf39,0xdf99,0xdff9,
0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,0xe4d9,0xe539,0xe599,0xe5f9,
0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,0xd2d9,0xd339,0xd399,0xd3f9,
0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,0xd8d9,0xd939,0xd999,0xd9f9,
0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,0,0,0,0,
0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,0,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,
0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0,
0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,4,4,4,
0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,
0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,0,0,4,4,0x60,0x64,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,
4,4,0,0,0,4,0,4,4,4,0x60,0x60,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4, 0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,0,0,0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,
0,0,0,0,0,0,0,4,4,4,4,0,0,0x64,0x64,0, 0x64,0x64,0x64,0x64,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,
0,4,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, 0,0x64,0,0,0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,0x25,5,5,5,5,5,5,5,5,1,
1,1,1,1,1,1,1,1,1,1,1,1,5,0x60b9,1,1,
1,0x60f9,1,1,5,5,5,5,0x25,5,5,5,0x25,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,0x21,1,1,1,1,5,
5,5,5,5,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,
0x64,0x64,0x44,0x64,0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
0x44,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x6839,0x409,0x6939,0x409,0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x2509,0x2509,0x2b09,0x2b09,0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,
0x6d59,0x6e39,0x6f19,0x6ff9,0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,
0x7b59,0x7c39,0x7d19,0x7df9,0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,
0x8959,0x8a39,0x8b19,0x8bf9,0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,
0x409,0x409,0x9759,0x9859,0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,
4,4,0x9e19,0x9f19,0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,
0x409,0x409,0xa439,0xa599,0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,
0x409,0x409,0xa999,0xaaf9,0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,
0,0,0xaff9,0xb0f9,0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
4,0,0,4,0,0,4,4,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,0,4,4,4,4,4,4,4,4,4,4,0,0x25,0,0,
0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,
0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,
4,0x64,0x64,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,0,2,0,0,1,2,2,2,1,1,2,2,2,1,
0,2,0,0,0,2,2,2,2,2,0,0,0,0,0,0,
2,0,0xb61a,0,2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,
1,0,0,0,0,1,0,0,1,1,2,2,0,0,0,0,
0,2,1,1,0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,
0xff89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,
0xb81a,0xb859,0xb899,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,
1,0x8a,0xff89,1,1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,
1,0,0,0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,
0,0,0,0,0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,
0xbb59,0xbb99,0xbbd9,0xbc19,0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,
0xbf59,0xbf99,0xbfd9,0xc019,0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,
0xc359,0xc399,0,0xc3d9,0,0,0,0,0,0xc419,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,
0,4,4,4,4,4,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4, 0,0,0,0,0,0x64,0x64,4,4,4,4,0,0,0,0,0,
4,4,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,5,5,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,
1,1,1,1,1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,
1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,
0xc65a,0xc69a,0xc6da,0,0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,5,5,1,0,0,0,0,0,0,0,4,0,
0,0,0x64,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x64,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,0x64,
0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,4,4,4,4,0,0,
4,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,4,4,4,4,4,0,0,4,4,0,
0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,
0x64,0,0,0x44,0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,4,
4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,0xc819,1,1,1,1,1,1,1,4,
5,5,5,5,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,
0xccd9,0xcd39,0xcd99,0xcdf9,0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,
0xded9,0xdf39,0xdf99,0xdff9,0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,
0xe4d9,0xe539,0xe599,0xe5f9,0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,
0xd2d9,0xd339,0xd399,0xd3f9,0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,
0xd8d9,0xd939,0xd999,0xd9f9,0,0,0,0,0,4,0,0,4,0,0,0,
0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,
0,0,0,0,0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,
0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,
0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0,0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,
0,4,4,4,0,4,4,0,0,0,0,0,4,0x64,4,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,
0,0,0,0,0,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,0,0,0x64,0x64,0,0,4,0,0,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,0x64,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,4,4,4,4,4,0,0x60,0,0,0,0,0,0,0, 4,4,4,4,4,4,4,0,0x60,0,0,0,0,0,0,0,
0,0,0x64,4,4,0,0,0,0,0,0,0,0,0,0,0, 0,0,0x64,4,4,0,0,0,0,0,0,0,0,0,0,0,
@ -663,79 +666,89 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, 0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009, 0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009, 0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0,4,4,4,4,4,4,0,0,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,0,4,4,4,4,4,4,0,0x64, 0,0,0,4,0x64,4,4,4,4,0,0,4,4,4,4,0,
4,4,4,4,4,4,4,4,0,0,4,4,4,4,4,4, 0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
4,0,4,4,0,4,4,0,0,0,0,0,0,0,0,0, 0,4,4,4,4,4,4,0,0,4,4,4,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0, 0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,0,
4,4,4,4,4,4,0,0x64,4,4,4,4,4,4,4,4,
0,0,4,4,4,4,4,4,4,0,4,4,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, 0,4,4,4,4,4,4,0,0,0,4,0,4,4,0,4,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4, 4,4,0x64,4,0x64,0x64,0,4,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0x64,0,0,0,0,0,0,0x60,0x60,0x64, 0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4,4,4,4,4,
4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0x44,0x44,0x44,
0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,0,0x21,0x21,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,0,2,2,0,0,2,0,
0,2,2,0,0,2,2,2,2,0,2,2,2,2,2,2,
2,2,1,1,1,1,0,1,0,1,0x21,0x21,1,1,1,1,
0,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
2,2,0,2,2,2,2,0,0,2,2,2,2,2,2,2,
2,0,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,0,2,2,2,2,0,2,2,2,2,
2,0,2,0,0,0,2,2,2,2,2,2,2,0,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,1,1,1,1,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,0,1,1,1,1,1,1,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,0,0,0,0,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0, 0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0,0x44,0x44, 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,
0,0,0,0,0,0,0,0,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x64,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, 0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,
2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0, 0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,0,0x21,0x21,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,0,2,2,0,0,2,0,0,2,2,0,0,2,2,2,
2,0,2,2,2,2,2,2,2,2,1,1,1,1,0,1,
0,1,0x21,0x21,1,1,1,1,0,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,1,1,1,1,2,2,0,2,2,2,2,0,
0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,0,2,
2,2,2,0,2,2,2,2,2,0,2,0,0,0,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,0,1,1,1,1,
1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0
}; };
static const uint16_t ucase_props_exceptions[1948]={ static const uint16_t ucase_props_exceptions[1948]={
@ -900,13 +913,13 @@ static const UCaseProps ucase_props_singleton={
ucase_props_trieIndex+3228, ucase_props_trieIndex+3228,
NULL, NULL,
3228, 3228,
8292, 8492,
0x188, 0x188,
0xd18, 0xd18,
0x0, 0x0,
0x0, 0x0,
0xe0800, 0xe0800,
0x2cfc, 0x2dc4,
NULL, 0, FALSE, FALSE, 0, NULL NULL, 0, FALSE, FALSE, 0, NULL
}, },
{ 3,0,0,0 } { 3,0,0,0 }

Просмотреть файл

@ -20,8 +20,11 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/brkiter.h" #include "unicode/brkiter.h"
#include "unicode/bytestream.h"
#include "unicode/casemap.h" #include "unicode/casemap.h"
#include "unicode/edits.h" #include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/stringpiece.h"
#include "unicode/ubrk.h" #include "unicode/ubrk.h"
#include "unicode/uloc.h" #include "unicode/uloc.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
@ -32,6 +35,7 @@
#include "unicode/utf.h" #include "unicode/utf.h"
#include "unicode/utf8.h" #include "unicode/utf8.h"
#include "unicode/utf16.h" #include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "uassert.h" #include "uassert.h"
@ -39,27 +43,6 @@
#include "ucasemap_imp.h" #include "ucasemap_imp.h"
#include "ustr_imp.h" #include "ustr_imp.h"
U_NAMESPACE_BEGIN
namespace {
// TODO: share with UTF-16? inline in ucasemap_imp.h?
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {
if (destIndex > destCapacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != NULL) {
edits->copyErrorTo(errorCode);
}
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE U_NAMESPACE_USE
/* UCaseMap service object -------------------------------------------------- */ /* UCaseMap service object -------------------------------------------------- */
@ -150,148 +133,39 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
/* TODO(markus): Move to a new, separate utf8case.cpp file. */ /* TODO(markus): Move to a new, separate utf8case.cpp file. */
namespace {
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
static inline int32_t inline UBool
appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, appendResult(int32_t cpLength, int32_t result, const UChar *s,
int32_t result, const UChar *s, ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
int32_t cpLength, uint32_t options, icu::Edits *edits) { U_ASSERT(U_SUCCESS(errorCode));
UChar32 c;
int32_t length;
UErrorCode errorCode;
/* decode the result */ /* decode the result */
if(result<0) { if(result<0) {
/* (not) original code point */ /* (not) original code point */
if(edits!=NULL) { if(edits!=NULL) {
edits->addUnchanged(cpLength); edits->addUnchanged(cpLength);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
} }
c=~result; if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
if(destIndex<destCapacity && c<=0x7f) { // ASCII slightly-fastpath ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
dest[destIndex++]=(uint8_t)c;
return destIndex;
} }
length=cpLength;
} else { } else {
if(result<=UCASE_MAX_STRING_LENGTH) { if(result<=UCASE_MAX_STRING_LENGTH) {
// string: "result" is the UTF-16 length // string: "result" is the UTF-16 length
errorCode=U_ZERO_ERROR; return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
if(destIndex<destCapacity) {
u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
s, result, &errorCode);
} else {
u_strToUTF8(NULL, 0, &length, s, result, &errorCode);
}
if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
return -1;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
// We might have an overflow, but we know the actual length.
return destIndex+length;
} else if(destIndex<destCapacity && result<=0x7f) { // ASCII slightly-fastpath
dest[destIndex++]=(uint8_t)result;
if(edits!=NULL) {
edits->addReplace(cpLength, 1);
}
return destIndex;
} else { } else {
c=result; ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
length=U8_LENGTH(c);
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
} }
} }
// c>=0 single code point return TRUE;
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(destIndex<destCapacity) {
/* append the result */
UBool isError=FALSE;
U8_APPEND(dest, destIndex, destCapacity, c, isError);
if(isError) {
/* overflow, nothing written */
destIndex+=length;
}
} else {
/* preflight */
destIndex+=length;
}
return destIndex;
}
static inline int32_t
appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
} else if(destIndex==INT32_MAX) {
return -1; // integer overflow
}
return destIndex+1;
} }
// See unicode/utf8.h U8_APPEND_UNSAFE(). // See unicode/utf8.h U8_APPEND_UNSAFE().
static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
static inline int32_t } // namespace
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
U_ASSERT(0x370 <= c && c <= 0x3ff); // 2-byte UTF-8, main Greek block
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=getTwoByteLead(c);
dest[1]=getTwoByteTrail(c);
}
return limit;
}
static inline int32_t
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=(uint8_t)s[0];
dest[1]=(uint8_t)s[1];
}
return limit;
}
static inline int32_t
appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, s, length);
}
destIndex+=length;
}
return destIndex;
}
static UChar32 U_CALLCONV static UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) { utf8_caseContextIterator(void *context, int8_t dir) {
@ -329,17 +203,15 @@ utf8_caseContextIterator(void *context, int8_t dir) {
* Case-maps [srcStart..srcLimit[ but takes * Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account. * context [0..srcLength[ into account.
*/ */
static int32_t static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map, _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, UCaseContext *csc, const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit, int32_t srcStart, int32_t srcLimit,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
/* case mapping loop */ /* case mapping loop */
int32_t srcIndex=srcStart; int32_t srcIndex=srcStart;
int32_t destIndex=0; while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
while(srcIndex<srcLimit) {
int32_t cpStart; int32_t cpStart;
csc->cpStart=cpStart=srcIndex; csc->cpStart=cpStart=srcIndex;
UChar32 c; UChar32 c;
@ -347,45 +219,32 @@ _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
csc->cpLimit=srcIndex; csc->cpLimit=srcIndex;
if(c<0) { if(c<0) {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
src+cpStart, srcIndex-cpStart, options, edits); sink, options, edits, errorCode);
if(destIndex<0) { } else {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; const UChar *s;
return 0; c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
} appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
continue;
}
const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
return destIndex;
} }
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle( ucasemap_internalUTF8ToTitle(
int32_t caseLocale, uint32_t options, BreakIterator *iter, int32_t caseLocale, uint32_t options, BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
return 0; return;
} }
/* set up local variables */ /* set up local variables */
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
int32_t destIndex=0;
int32_t prev=0; int32_t prev=0;
UBool isFirstIndex=TRUE; UBool isFirstIndex=TRUE;
@ -404,45 +263,36 @@ ucasemap_internalUTF8ToTitle(
} }
/* /*
* Unicode 4 & 5 section 3.13 Default Case Operations: * Segment [prev..index[ into 3 parts:
* * a) skipped characters (copy as-is) [prev..titleStart[
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex * b) first letter (titlecase) [titleStart..titleLimit[
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* In this implementation, segment [prev..index[ into 3 parts:
* a) uncased characters (copy as-is) [prev..titleStart[
* b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[ * c) subsequent characters (lowercase) [titleLimit..index[
*/ */
if(prev<index) { if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */ /* find and copy skipped characters [prev..titleStart[ */
int32_t titleStart=prev; int32_t titleStart=prev;
int32_t titleLimit=prev; int32_t titleLimit=prev;
UChar32 c; UChar32 c;
U8_NEXT(src, titleLimit, index, c); U8_NEXT(src, titleLimit, index, c);
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) { if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */ // Adjust the titlecasing index to the next cased character,
for(;;) { // or to the next letter/number/symbol/private use.
// Stop with titleStart<titleLimit<=index
// if there is a character to be titlecased,
// or else stop with titleStart==titleLimit==index.
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
titleStart=titleLimit; titleStart=titleLimit;
if(titleLimit==index) { if(titleLimit==index) {
/*
* only uncased characters in [prev..index[
* stop with titleStart==titleLimit==index
*/
break; break;
} }
U8_NEXT(src, titleLimit, index, c); U8_NEXT(src, titleLimit, index, c);
if(UCASE_NONE!=ucase_getType(c)) {
break; /* cased letter at [titleStart..titleLimit[ */
}
} }
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (prev < titleStart) {
src+prev, titleStart-prev, options, edits); if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
if(destIndex<0) { sink, options, edits, errorCode)) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return;
return 0; }
} }
} }
@ -453,16 +303,15 @@ ucasemap_internalUTF8ToTitle(
csc.cpLimit=titleLimit; csc.cpLimit=titleLimit;
const UChar *s; const UChar *s;
c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s, if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
titleLimit-titleStart, options, edits); return;
}
} else { } else {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
src+titleStart, titleLimit-titleStart, options, edits); sink, options, edits, errorCode)) {
} return;
if(destIndex<0) { }
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
/* Special case Dutch IJ titlecasing */ /* Special case Dutch IJ titlecasing */
@ -470,22 +319,13 @@ ucasemap_internalUTF8ToTitle(
caseLocale == UCASE_LOC_DUTCH && caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
if (src[titleStart+1] == 0x006A) { if (src[titleStart+1] == 0x006A) {
destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A); ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(edits!=NULL) {
edits->addReplace(1, 1);
}
titleLimit++; titleLimit++;
} else if (src[titleStart+1] == 0x004A) { } else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased. // Keep the capital J from getting lowercased.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
src+titleStart+1, 1, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
titleLimit++; titleLimit++;
} }
@ -495,26 +335,18 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) { if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) { if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */ /* Normal operation: Lowercase the rest of the word. */
destIndex+= _caseMap(caseLocale, options, ucase_toFullLower,
_caseMap( src, &csc,
caseLocale, options, ucase_toFullLower, titleLimit, index,
dest+destIndex, destCapacity-destIndex, sink, edits, errorCode);
src, &csc,
titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
}
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return destIndex; return;
} }
} else { } else {
/* Optionally just copy the rest of the word unchanged. */ /* Optionally just copy the rest of the word unchanged. */
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
src+titleLimit, index-titleLimit, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
} }
@ -523,8 +355,6 @@ ucasemap_internalUTF8ToTitle(
prev=index; prev=index;
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
#endif #endif
@ -549,12 +379,10 @@ UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
} }
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
int32_t toUpper(uint32_t options, void toUpper(uint32_t options,
uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength,
const uint8_t *src, int32_t srcLength, ByteSink &sink, Edits *edits,
Edits *edits, UErrorCode &errorCode) {
UErrorCode &errorCode) {
int32_t destIndex=0;
uint32_t state = 0; uint32_t state = 0;
for (int32_t i = 0; i < srcLength;) { for (int32_t i = 0; i < srcLength;) {
int32_t nextIndex = i; int32_t nextIndex = i;
@ -630,8 +458,10 @@ int32_t toUpper(uint32_t options,
} }
} }
UBool change = TRUE; UBool change;
if (edits != NULL) { if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
change = TRUE; // common, simple usage
} else {
// Find out first whether we are changing the text. // Find out first whether we are changing the text.
U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
change = (i + 2) > nextIndex || change = (i + 2) > nextIndex ||
@ -662,148 +492,146 @@ int32_t toUpper(uint32_t options,
edits->addUnchanged(oldLength); edits->addUnchanged(oldLength);
} }
// Write unchanged text? // Write unchanged text?
change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0; change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
} }
} }
if (change) { if (change) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper); ByteSinkUtil::appendTwoBytes(upper, sink);
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { if ((data & HAS_EITHER_DIALYTIKA) != 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308"); // restore or add a dialytika sink.Append(u8"\u0308", 2); // restore or add a dialytika
} }
if (destIndex >= 0 && addTonos) { if (addTonos) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301"); sink.Append(u8"\u0301", 2);
} }
while (destIndex >= 0 && numYpogegrammeni > 0) { while (numYpogegrammeni > 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399"); sink.Append(u8"\u0399", 2);
--numYpogegrammeni; --numYpogegrammeni;
} }
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
} }
} else if(c>=0) { } else if(c>=0) {
const UChar *s; const UChar *s;
c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
destIndex = appendResult(dest, destIndex, destCapacity, c, s, if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
nextIndex - i, options, edits); return;
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} else { } else {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
src+i, nextIndex-i, options, edits); sink, options, edits, errorCode)) {
if(destIndex<0) { return;
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
i = nextIndex; i = nextIndex;
state = nextState; state = nextState;
} }
return destIndex;
} }
} // namespace GreekUpper } // namespace GreekUpper
U_NAMESPACE_END U_NAMESPACE_END
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
int32_t destIndex = _caseMap( _caseMap(
caseLocale, options, ucase_toFullLower, caseLocale, options, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
edits, errorCode); sink, edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
int32_t destIndex;
if (caseLocale == UCASE_LOC_GREEK) { if (caseLocale == UCASE_LOC_GREEK) {
destIndex = GreekUpper::toUpper(options, dest, destCapacity, GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
src, srcLength, edits, errorCode);
} else { } else {
UCaseContext csc=UCASECONTEXT_INITIALIZER; UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src; csc.p=(void *)src;
csc.limit=srcLength; csc.limit=srcLength;
destIndex = _caseMap( _caseMap(
caseLocale, options, ucase_toFullUpper, caseLocale, options, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength, src, &csc, 0, srcLength,
edits, errorCode); sink, edits, errorCode);
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
static int32_t U_CALLCONV static void U_CALLCONV
ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
/* case mapping loop */ /* case mapping loop */
int32_t srcIndex = 0; int32_t srcIndex = 0;
int32_t destIndex = 0; while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
while (srcIndex < srcLength) {
int32_t cpStart = srcIndex; int32_t cpStart = srcIndex;
UChar32 c; UChar32 c;
U8_NEXT(src, srcIndex, srcLength, c); U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) { if(c<0) {
// Malformed UTF-8. // Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity, ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
src+cpStart, srcIndex-cpStart, options, edits); sink, options, edits, errorCode);
if(destIndex<0) { } else {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR; const UChar *s;
return 0; c = ucase_toFullFolding(c, &s, options);
} appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
continue;
}
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
} }
} }
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
} }
U_CFUNC int32_t void
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity, const char *src, int32_t srcLength,
const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* check argument values */
if (U_FAILURE(errorCode)) {
return;
}
if ((src == nullptr && srcLength != 0) || srcLength < -1) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Get the string length.
if (srcLength == -1) {
srcLength = (int32_t)uprv_strlen((const char *)src);
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, errorCode);
sink.Flush();
if (U_SUCCESS(errorCode)) {
if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
}
int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper, UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits, icu::Edits *edits,
UErrorCode &errorCode) { UErrorCode &errorCode) {
int32_t destLength;
/* check argument values */ /* check argument values */
if(U_FAILURE(errorCode)) { if(U_FAILURE(errorCode)) {
return 0; return 0;
} }
if( destCapacity<0 || if( destCapacity<0 ||
(dest==NULL && destCapacity>0) || (dest==NULL && destCapacity>0) ||
src==NULL || (src==NULL && srcLength!=0) || srcLength<-1
srcLength<-1
) { ) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR; errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0; return 0;
@ -823,12 +651,21 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
return 0; return 0;
} }
if(edits!=NULL) { CheckedArrayByteSink sink(dest, destCapacity);
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset(); edits->reset();
} }
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, edits, errorCode); (const uint8_t *)src, srcLength, sink, edits, errorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode); sink.Flush();
if (U_SUCCESS(errorCode)) {
if (sink.Overflowed()) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
} }
/* public API functions */ /* public API functions */
@ -840,8 +677,8 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToLower, NULL, *pErrorCode); ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
} }
@ -852,8 +689,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
} }
@ -864,13 +701,43 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
UErrorCode *pErrorCode) { UErrorCode *pErrorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8Fold, NULL, *pErrorCode); ucasemap_internalUTF8Fold, NULL, *pErrorCode);
} }
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
void CaseMap::utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToLower, sink, edits, errorCode);
}
void CaseMap::utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
}
void CaseMap::utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8Fold, sink, edits, errorCode);
}
int32_t CaseMap::utf8ToLower( int32_t CaseMap::utf8ToLower(
const char *locale, uint32_t options, const char *locale, uint32_t options,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
@ -878,8 +745,8 @@ int32_t CaseMap::utf8ToLower(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToLower, edits, errorCode); ucasemap_internalUTF8ToLower, edits, errorCode);
} }
@ -890,8 +757,8 @@ int32_t CaseMap::utf8ToUpper(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToUpper, edits, errorCode); ucasemap_internalUTF8ToUpper, edits, errorCode);
} }
@ -902,8 +769,8 @@ int32_t CaseMap::utf8Fold(
UErrorCode &errorCode) { UErrorCode &errorCode) {
return ucasemap_mapUTF8( return ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8Fold, edits, errorCode); ucasemap_internalUTF8Fold, edits, errorCode);
} }

Просмотреть файл

@ -9,16 +9,26 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/ucasemap.h" #include "unicode/ucasemap.h"
#include "unicode/uchar.h"
#include "ucase.h" #include "ucase.h"
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/** /**
* Option bit for unorm_compare: * Bit mask for the titlecasing iterator options bit field.
* Perform case-insensitive comparison. * Currently only 3 out of 8 values are used:
* 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* See stringoptions.h.
* @internal
*/ */
#define U_COMPARE_IGNORE_CASE 0x10000 #define U_TITLECASE_ITERATOR_MASK 0xe0
#endif
/**
* Bit mask for the titlecasing index adjustment options bit set.
* Currently two bits are defined:
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
* See stringoptions.h.
* @internal
*/
#define U_TITLECASE_ADJUSTMENT_MASK 0x600
/** /**
* Internal API, used by u_strcasecmp() etc. * Internal API, used by u_strcasecmp() etc.
@ -32,7 +42,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
UErrorCode *pErrorCode); UErrorCode *pErrorCode);
/** /**
* Interanl API, used for detecting length of * Internal API, used for detecting length of
* shared prefix case-insensitively. * shared prefix case-insensitively.
* @param s1 input string 1 * @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated) * @param length1 length of string 1, or -1 (NULL terminated)
@ -61,6 +71,44 @@ uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus #ifdef __cplusplus
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
class ByteSink;
class Locale; // unicode/locid.h
/** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */
inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
// Both options together.
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
return TRUE;
}
inline UBool ustrcase_isLNS(UChar32 c) {
// Letter, number, symbol,
// or a private use code point because those are typically used as letters or numbers.
// Consider modifier letters only if they are cased.
const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
int gc = u_charType(c);
return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
}
#if !UCONFIG_NO_BREAK_ITERATION
/** Returns nullptr if error. Pass in either locale or locID, not both. */
U_CFUNC
BreakIterator *ustrcase_getTitleBreakIterator(
const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
#endif
U_NAMESPACE_END
#include "unicode/unistr.h" // for UStringCaseMapper #include "unicode/unistr.h" // for UStringCaseMapper
/* /*
@ -163,39 +211,43 @@ ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITE
* UTF-8 version of UStringCaseMapper. * UTF-8 version of UStringCaseMapper.
* All error checking must be done. * All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed. * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/ */
typedef int32_t U_CALLCONV typedef void U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options, UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter, icu::BreakIterator *iter,
#endif #endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */ /** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter, icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const uint8_t *src, int32_t srcLength,
icu::Edits *edits, icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
#endif #endif
void
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode);
/** /**
* Implements argument checking and buffer handling * Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function. * for UTF-8 string case mapping as a common function.
*/ */
U_CFUNC int32_t int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity, char *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength, const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper, UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits, icu::Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);

Просмотреть файл

@ -31,6 +31,29 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
void CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
UText utext = UTEXT_INITIALIZER;
utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if (iter == nullptr) {
utext_close(&utext);
return;
}
iter->setText(&utext, errorCode);
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter,
src.data(), src.length(),
ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
utext_close(&utext);
}
int32_t CaseMap::utf8ToTitle( int32_t CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter, const char *locale, uint32_t options, BreakIterator *iter,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
@ -42,19 +65,16 @@ int32_t CaseMap::utf8ToTitle(
UText utext=UTEXT_INITIALIZER; UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, src, srcLength, &errorCode); utext_openUTF8(&utext, src, srcLength, &errorCode);
LocalPointer<BreakIterator> ownedIter; LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if(iter==NULL) { if(iter==NULL) {
iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
ownedIter.adoptInstead(iter);
}
if(U_FAILURE(errorCode)) {
utext_close(&utext); utext_close(&utext);
return 0; return 0;
} }
iter->setText(&utext, errorCode); iter->setText(&utext, errorCode);
int32_t length=ucasemap_mapUTF8( int32_t length=ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter, ustrcase_getCaseLocale(locale), options, iter,
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToTitle, edits, errorCode); ucasemap_internalUTF8ToTitle, edits, errorCode);
utext_close(&utext); utext_close(&utext);
return length; return length;
@ -88,17 +108,24 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
} }
UText utext=UTEXT_INITIALIZER; UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(csm->iter==NULL) {
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) { if (U_FAILURE(*pErrorCode)) {
return 0; return 0;
} }
if(csm->iter==NULL) {
LocalPointer<BreakIterator> ownedIter;
BreakIterator *iter = ustrcase_getTitleBreakIterator(
nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
if (iter == nullptr) {
utext_close(&utext);
return 0;
}
csm->iter = ownedIter.orphan();
}
csm->iter->setText(&utext, *pErrorCode); csm->iter->setText(&utext, *pErrorCode);
int32_t length=ucasemap_mapUTF8( int32_t length=ucasemap_mapUTF8(
csm->caseLocale, csm->options, csm->iter, csm->caseLocale, csm->options, csm->iter,
(uint8_t *)dest, destCapacity, dest, destCapacity,
(const uint8_t *)src, srcLength, src, srcLength,
ucasemap_internalUTF8ToTitle, NULL, *pErrorCode); ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
utext_close(&utext); utext_close(&utext);
return length; return length;

Просмотреть файл

@ -729,8 +729,5 @@ upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
} }
/* add the start code point of each same-value range of the properties vectors trie */ /* add the start code point of each same-value range of the properties vectors trie */
if(propsVectorsColumns>0) { utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
/* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
}
} }

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -287,7 +287,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode) UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) { : LinearMatchNode(len, nextNode), s(units) {
hash=hash*37+ustr_hashUCharsN(units, len); hash=hash*37u+ustr_hashUCharsN(units, len);
} }
UBool UBool

Просмотреть файл

@ -35,7 +35,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1, UCLN_COMMON_START = -1,
UCLN_COMMON_USPREP, UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR, UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT, UCLN_COMMON_RBBI,
UCLN_COMMON_SERVICE, UCLN_COMMON_SERVICE,
UCLN_COMMON_LOCALE_KEY_TYPE, UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE, UCLN_COMMON_LOCALE,

Просмотреть файл

@ -315,6 +315,7 @@ _CompoundTextClose(UConverter *converter) {
} }
uprv_free(converter->extraInfo); uprv_free(converter->extraInfo);
converter->extraInfo = NULL;
} }
} }
@ -519,7 +520,7 @@ UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args,
currentState = tmpState; currentState = tmpState;
} }
sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength; sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength);
mySource += sourceOffset; mySource += sourceOffset;

Просмотреть файл

@ -966,26 +966,26 @@ _LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_L1, pLMBCS, &uniChar, ULMBCS_GRP_L1, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
if(!bytes_written) if(!bytes_written)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }
if(!bytes_written) if(!bytes_written)
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar, extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }
} }
else else
{ {
bytes_written = LMBCSConversionWorker (extraInfo, bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar, extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried); &lastConverterIndex, groups_tried);
} }

Просмотреть файл

@ -1323,9 +1323,17 @@ _UTF16GetName(const UConverter *cnv) {
U_CDECL_END U_CDECL_END
extern const UConverterSharedData _UTF16Data; extern const UConverterSharedData _UTF16Data;
#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData) static inline bool IS_UTF16BE(const UConverter *cnv) {
#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData) return ((cnv)->sharedData == &_UTF16BEData);
#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data) }
static inline bool IS_UTF16LE(const UConverter *cnv) {
return ((cnv)->sharedData == &_UTF16LEData);
}
static inline bool IS_UTF16(const UConverter *cnv) {
return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
}
U_CDECL_BEGIN U_CDECL_BEGIN
static void U_CALLCONV static void U_CALLCONV

Просмотреть файл

@ -31,6 +31,7 @@
#include "ucnv_bld.h" #include "ucnv_bld.h"
#include "ucnv_cnv.h" #include "ucnv_cnv.h"
#include "cmemory.h" #include "cmemory.h"
#include "ustr_imp.h"
/* Prototypes --------------------------------------------------------------- */ /* Prototypes --------------------------------------------------------------- */
@ -44,51 +45,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args
/* UTF-8 -------------------------------------------------------------------- */ /* UTF-8 -------------------------------------------------------------------- */
/* UTF-8 Conversion DATA
* for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
*/
/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
#define MAXIMUM_UCS2 0x0000FFFF #define MAXIMUM_UCS2 0x0000FFFF
#define MAXIMUM_UTF 0x0010FFFF
#define MAXIMUM_UCS4 0x7FFFFFFF
#define HALF_SHIFT 10
#define HALF_BASE 0x0010000
#define HALF_MASK 0x3FF
#define SURROGATE_HIGH_START 0xD800
#define SURROGATE_HIGH_END 0xDBFF
#define SURROGATE_LOW_START 0xDC00
#define SURROGATE_LOW_END 0xDFFF
/* -SURROGATE_LOW_START + HALF_BASE */ static const uint32_t offsetsFromUTF8[5] = {0,
#define SURROGATE_LOW_BASE 9216
static const uint32_t offsetsFromUTF8[7] = {0,
(uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
(uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080 (uint32_t) 0x03C82080
}; };
/* END OF UTF-8 Conversion DATA */
static const int8_t bytesFromUTF8[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};
/*
* Starting with Unicode 3.0.1:
* UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
* byte sequences with more than 4 bytes are illegal in UTF-8,
* which is tested with impossible values for them
*/
static const uint32_t
utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
static UBool hasCESU8Data(const UConverter *cnv) static UBool hasCESU8Data(const UConverter *cnv)
{ {
#if UCONFIG_ONLY_HTML_CONVERSION #if UCONFIG_ONLY_HTML_CONVERSION
@ -127,7 +90,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
while (mySource < sourceLimit && myTarget < targetLimit) while (mySource < sourceLimit && myTarget < targetLimit)
{ {
ch = *(mySource++); ch = *(mySource++);
if (ch < 0x80) /* Simple case */ if (U8_IS_SINGLE(ch)) /* Simple case */
{ {
*(myTarget++) = (UChar) ch; *(myTarget++) = (UChar) ch;
} }
@ -135,7 +98,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
{ {
/* store the first char */ /* store the first char */
toUBytes[0] = (char)ch; toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */ inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
i = 1; i = 1;
morebytes: morebytes:
@ -144,7 +107,8 @@ morebytes:
if (mySource < sourceLimit) if (mySource < sourceLimit)
{ {
toUBytes[i] = (char) (ch2 = *mySource); toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2)) if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ {
break; /* i < inBytes */ break; /* i < inBytes */
} }
@ -162,24 +126,12 @@ morebytes:
} }
} }
/* Remove the accumulated high bits */ // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
ch -= offsetsFromUTF8[inBytes]; if (i == inBytes && (!isCESU8 || i <= 3))
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
{ {
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2) if (ch <= MAXIMUM_UCS2)
{ {
@ -189,9 +141,8 @@ morebytes:
else else
{ {
/* write out the surrogates */ /* write out the surrogates */
ch -= HALF_BASE; *(myTarget++) = U16_LEAD(ch);
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START); ch = U16_TRAIL(ch);
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
if (myTarget < targetLimit) if (myTarget < targetLimit)
{ {
*(myTarget++) = (UChar)ch; *(myTarget++) = (UChar)ch;
@ -256,7 +207,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
while (mySource < sourceLimit && myTarget < targetLimit) while (mySource < sourceLimit && myTarget < targetLimit)
{ {
ch = *(mySource++); ch = *(mySource++);
if (ch < 0x80) /* Simple case */ if (U8_IS_SINGLE(ch)) /* Simple case */
{ {
*(myTarget++) = (UChar) ch; *(myTarget++) = (UChar) ch;
*(myOffsets++) = offsetNum++; *(myOffsets++) = offsetNum++;
@ -264,7 +215,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
else else
{ {
toUBytes[0] = (char)ch; toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch]; inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
i = 1; i = 1;
morebytes: morebytes:
@ -273,7 +224,8 @@ morebytes:
if (mySource < sourceLimit) if (mySource < sourceLimit)
{ {
toUBytes[i] = (char) (ch2 = *mySource); toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2)) if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{ {
break; /* i < inBytes */ break; /* i < inBytes */
} }
@ -290,24 +242,12 @@ morebytes:
} }
} }
/* Remove the accumulated high bits */ // In CESU-8, only surrogates, not supplementary code points, are encoded directly.
ch -= offsetsFromUTF8[inBytes]; if (i == inBytes && (!isCESU8 || i <= 3))
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
{ {
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2) if (ch <= MAXIMUM_UCS2)
{ {
@ -318,10 +258,9 @@ morebytes:
else else
{ {
/* write out the surrogates */ /* write out the surrogates */
ch -= HALF_BASE; *(myTarget++) = U16_LEAD(ch);
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
*(myOffsets++) = offsetNum; *(myOffsets++) = offsetNum;
ch = (ch & HALF_MASK) + SURROGATE_LOW_START; ch = U16_TRAIL(ch);
if (myTarget < targetLimit) if (myTarget < targetLimit)
{ {
*(myTarget++) = (UChar)ch; *(myTarget++) = (UChar)ch;
@ -616,10 +555,9 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
UConverter *cnv; UConverter *cnv;
const uint8_t *sourceInitial; const uint8_t *sourceInitial;
const uint8_t *source; const uint8_t *source;
uint16_t extraBytesToWrite;
uint8_t myByte; uint8_t myByte;
UChar32 ch; UChar32 ch;
int8_t i, isLegalSequence; int8_t i;
/* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
@ -633,14 +571,14 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
} }
myByte = (uint8_t)*(source++); myByte = (uint8_t)*(source++);
if (myByte < 0x80) if (U8_IS_SINGLE(myByte))
{ {
args->source = (const char *)source; args->source = (const char *)source;
return (UChar32)myByte; return (UChar32)myByte;
} }
extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte]; uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
if (extraBytesToWrite == 0) { if (countTrailBytes == 0) {
cnv->toUBytes[0] = myByte; cnv->toUBytes[0] = myByte;
cnv->toULength = 1; cnv->toULength = 1;
*err = U_ILLEGAL_CHAR_FOUND; *err = U_ILLEGAL_CHAR_FOUND;
@ -649,15 +587,17 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
} }
/*The byte sequence is longer than the buffer area passed*/ /*The byte sequence is longer than the buffer area passed*/
if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit) if (((const char *)source + countTrailBytes) > args->sourceLimit)
{ {
/* check if all of the remaining bytes are trail bytes */ /* check if all of the remaining bytes are trail bytes */
uint16_t extraBytesToWrite = countTrailBytes + 1;
cnv->toUBytes[0] = myByte; cnv->toUBytes[0] = myByte;
i = 1; i = 1;
*err = U_TRUNCATED_CHAR_FOUND; *err = U_TRUNCATED_CHAR_FOUND;
while(source < (const uint8_t *)args->sourceLimit) { while(source < (const uint8_t *)args->sourceLimit) {
if(U8_IS_TRAIL(myByte = *source)) { uint8_t b = *source;
cnv->toUBytes[i++] = myByte; if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
cnv->toUBytes[i++] = b;
++source; ++source;
} else { } else {
/* error even before we run out of input */ /* error even before we run out of input */
@ -670,81 +610,28 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
return 0xffff; return 0xffff;
} }
isLegalSequence = 1;
ch = myByte << 6; ch = myByte << 6;
switch(extraBytesToWrite) if(countTrailBytes == 2) {
{ uint8_t t1 = *source, t2;
/* note: code falls through cases! (sic)*/ if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
case 6: args->source = (const char *)(source + 1);
ch += (myByte = *source); return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} }
++source; } else if(countTrailBytes == 1) {
U_FALLTHROUGH; uint8_t t1 = *source;
case 5: if(U8_IS_TRAIL(t1)) {
ch += (myByte = *source); args->source = (const char *)(source + 1);
ch <<= 6; return (ch + t1) - offsetsFromUTF8[2];
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} }
++source; } else { // countTrailBytes == 3
U_FALLTHROUGH; uint8_t t1 = *source, t2, t3;
case 4: if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
ch += (myByte = *source); U8_IS_TRAIL(t3 = *++source)) {
ch <<= 6; args->source = (const char *)(source + 1);
if (!U8_IS_TRAIL(myByte)) return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
{
isLegalSequence = 0;
break;
} }
++source;
U_FALLTHROUGH;
case 3:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
}
++source;
U_FALLTHROUGH;
case 2:
ch += (myByte = *source);
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
}
++source;
};
ch -= offsetsFromUTF8[extraBytesToWrite];
args->source = (const char *)source;
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
*/
if (isLegalSequence &&
(uint32_t)ch <= MAXIMUM_UTF &&
(uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
!U_IS_SURROGATE(ch)
) {
return ch; /* return the code point */
} }
args->source = (const char *)source;
for(i = 0; sourceInitial < source; ++i) { for(i = 0; sourceInitial < source; ++i) {
cnv->toUBytes[i] = *sourceInitial++; cnv->toUBytes[i] = *sourceInitial++;
@ -757,14 +644,6 @@ U_CDECL_END
/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ /* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
U_CDECL_BEGIN U_CDECL_BEGIN
/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
static void U_CALLCONV static void U_CALLCONV
@ -812,39 +691,35 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
*pErrorCode=U_USING_DEFAULT_WARNING; *pErrorCode=U_USING_DEFAULT_WARNING;
return; return;
} else { } else {
/* // Use a single counter for source and target, counting the minimum of
* Use a single counter for source and target, counting the minimum of // the source length and the target capacity.
* the source length and the target capacity. // Let the standard converter handle edge cases.
* As a result, the source length is checked only once per multi-byte
* character instead of twice.
*
* Make sure that the last byte sequence is complete, or else
* stop just before it.
* (The longest legal byte sequence has 3 trail bytes.)
* Count oldToULength (number of source bytes from a previous buffer)
* into the source length but reduce the source index by toULimit
* while going back over trail bytes in order to not go back into
* the bytes that will be read for finishing a partial
* sequence from the previous buffer.
* Let the standard converter handle edge cases.
*/
int32_t i;
if(count>targetCapacity) { if(count>targetCapacity) {
count=targetCapacity; count=targetCapacity;
} }
i=0; // The conversion loop checks count>0 only once per 1/2/3-byte character.
while(i<3 && i<(count-toULimit)) { // If the buffer ends with a truncated 2- or 3-byte sequence,
b=source[count-oldToULength-i-1]; // then we reduce the count to stop before that,
if(U8_IS_TRAIL(b)) { // and collect the remaining bytes after the conversion loop.
++i; {
} else { // Do not go back into the bytes that will be read for finishing a partial
if(i<U8_COUNT_TRAIL_BYTES(b)) { // sequence from the previous buffer.
/* stop converting before the lead byte if there are not enough trail bytes for it */ int32_t length=count-toULimit;
count-=i+1; if(length>0) {
uint8_t b1=*(sourceLimit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(sourceLimit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
count-=2;
}
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--count;
} }
break;
} }
} }
} }
@ -859,17 +734,17 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* conversion loop */ /* conversion loop */
while(count>0) { while(count>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
*target++=b; *target++=b;
--count; --count;
continue; continue;
} else { } else {
if(b>0xe0) { if(b>=0xe0) {
if( /* handle U+1000..U+D7FF inline */ if( /* handle U+0800..U+FFFF inline */
(t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) || b<0xf0 &&
(b==0xed && (t1 <= 0x9f))) && U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf U8_IS_TRAIL(t2=source[1])
) { ) {
source+=2; source+=2;
*target++=b; *target++=b;
@ -878,10 +753,10 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=3; count-=3;
continue; continue;
} }
} else if(b<0xe0) { } else {
if( /* handle U+0080..U+07FF inline */ if( /* handle U+0080..U+07FF inline */
b>=0xc2 && b>=0xc2 &&
(t1=*source) >= 0x80 && t1 <= 0xbf U8_IS_TRAIL(t1=*source)
) { ) {
++source; ++source;
*target++=b; *target++=b;
@ -889,30 +764,18 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=2; count-=2;
continue; continue;
} }
} else if(b==0xe0) {
if( /* handle U+0800..U+0FFF inline */
(t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf
) {
source+=2;
*target++=b;
*target++=t1;
*target++=t2;
count-=3;
continue;
}
} }
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
if(source<sourceLimit) { if(source<sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -934,18 +797,7 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength!=toULimit) {
(toULength==3 || toULength==2) && /* BMP */
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
/* legal byte sequence for BMP code point */
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* legal byte sequence for supplementary code point */
} else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
source-=(toULength-oldToULength); source-=(toULength-oldToULength);
while(oldToULength<toULength) { while(oldToULength<toULength) {
@ -979,7 +831,7 @@ moreBytes:
*pErrorCode=U_BUFFER_OVERFLOW_ERROR; *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} else { } else {
b=*source; b=*source;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
if(toULimit>(sourceLimit-source)) { if(toULimit>(sourceLimit-source)) {
/* collect a truncated byte sequence */ /* collect a truncated byte sequence */
toULength=0; toULength=0;

Просмотреть файл

@ -23,6 +23,7 @@
#include "unicode/utf8.h" #include "unicode/utf8.h"
#include "ucnv_bld.h" #include "ucnv_bld.h"
#include "ucnv_cnv.h" #include "ucnv_cnv.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */ /* control optimizations according to the platform */
#define LATIN1_UNROLL_FROM_UNICODE 1 #define LATIN1_UNROLL_FROM_UNICODE 1
@ -374,7 +375,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
*target++=(uint8_t)b; *target++=(uint8_t)b;
--targetCapacity; --targetCapacity;
@ -409,7 +410,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
utf8->toULength=1; utf8->toULength=1;
utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1; utf8->mode=U8_COUNT_BYTES(b);
} }
/* write back the updated pointers */ /* write back the updated pointers */

Просмотреть файл

@ -59,6 +59,7 @@
#include "cmemory.h" #include "cmemory.h"
#include "cstring.h" #include "cstring.h"
#include "umutex.h" #include "umutex.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */ /* control optimizations according to the platform */
#define MBCS_UNROLL_SINGLE_TO_BMP 1 #define MBCS_UNROLL_SINGLE_TO_BMP 1
@ -5011,13 +5012,9 @@ ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ /* MBCS-from-UTF-8 conversion functions ------------------------------------- */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ /* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32 static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
static void U_CALLCONV static void U_CALLCONV
ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
@ -5037,7 +5034,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t b, t1, t2; uint8_t b, t1, t2;
uint32_t asciiRoundtrips; uint32_t asciiRoundtrips;
uint16_t value, minValue; uint16_t value, minValue = 0;
UBool hasSupplementary; UBool hasSupplementary;
/* set up the local pointers */ /* set up the local pointers */
@ -5075,28 +5072,27 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0; toULength=oldToULength=toULimit=0;
} }
/* // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
* Make sure that the last byte sequence before sourceLimit is complete // If the buffer ends with a truncated 2- or 3-byte sequence,
* or runs into a lead byte. // then we reduce the sourceLimit to before that,
* Do not go back into the bytes that will be read for finishing a partial // and collect the remaining bytes after the conversion loop.
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
{ {
int32_t i, length; // Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) { if(length>0) {
b=*(sourceLimit-i-1); uint8_t b1=*(sourceLimit-1);
if(U8_IS_TRAIL(b)) { if(U8_IS_SINGLE(b1)) {
++i; // common ASCII character
} else { } else if(U8_IS_TRAIL(b1) && length>=2) {
if(i<U8_COUNT_TRAIL_BYTES(b)) { uint8_t b2=*(sourceLimit-2);
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
sourceLimit-=i+1; // truncated 3-byte sequence
sourceLimit-=2;
} }
break; } else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
} }
} }
} }
@ -5130,7 +5126,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=(uint8_t)b; *target++=(uint8_t)b;
@ -5185,7 +5181,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
@ -5198,7 +5194,7 @@ moreBytes:
*/ */
if(source<(uint8_t *)pToUArgs->sourceLimit) { if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -5220,22 +5216,18 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength==toULimit) {
(toULength==3 || toULength==2) && /* BMP */ c-=utf8_offsets[toULength];
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && if(toULength<=3) { /* BMP */
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
value=0;
} else {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} else {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
value=0;
} else {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
}
} }
} else { } else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
@ -5310,7 +5302,7 @@ moreBytes:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++; c=utf8->toUBytes[0]=b=*source++;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) { while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++; utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b; c=(c<<6)+b;
@ -5344,7 +5336,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint32_t stage2Entry; uint32_t stage2Entry;
uint32_t asciiRoundtrips; uint32_t asciiRoundtrips;
uint16_t value; uint16_t value = 0;
UBool hasSupplementary; UBool hasSupplementary;
/* set up the local pointers */ /* set up the local pointers */
@ -5375,28 +5367,27 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0; toULength=oldToULength=toULimit=0;
} }
/* // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
* Make sure that the last byte sequence before sourceLimit is complete // If the buffer ends with a truncated 2- or 3-byte sequence,
* or runs into a lead byte. // then we reduce the sourceLimit to before that,
* Do not go back into the bytes that will be read for finishing a partial // and collect the remaining bytes after the conversion loop.
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
{ {
int32_t i, length; // Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) { if(length>0) {
b=*(sourceLimit-i-1); uint8_t b1=*(sourceLimit-1);
if(U8_IS_TRAIL(b)) { if(U8_IS_SINGLE(b1)) {
++i; // common ASCII character
} else { } else if(U8_IS_TRAIL(b1) && length>=2) {
if(i<U8_COUNT_TRAIL_BYTES(b)) { uint8_t b2=*(sourceLimit-2);
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
sourceLimit-=i+1; // truncated 3-byte sequence
sourceLimit-=2;
} }
break; } else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
} }
} }
} }
@ -5412,7 +5403,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) { while(source<sourceLimit) {
if(targetCapacity>0) { if(targetCapacity>0) {
b=*source++; b=*source++;
if((int8_t)b>=0) { if(U8_IS_SINGLE(b)) {
/* convert ASCII */ /* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=b; *target++=b;
@ -5426,13 +5417,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} }
} }
} else { } else {
if(b>0xe0) { if(b>=0xe0) {
if( /* handle U+1000..U+D7FF inline */ if( /* handle U+0800..U+D7FF inline */
(((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) || b<=0xed && // do not assume maxFastUChar>0xd7ff
(b==0xed && (t1 <= 0x1f))) && U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
(t2=(uint8_t)(source[1]-0x80)) <= 0x3f (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
) { ) {
c=((b&0xf)<<6)|t1; c=((b&0xf)<<6)|(t1&0x3f);
source+=2; source+=2;
value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
if(value==0) { if(value==0) {
@ -5442,7 +5433,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else { } else {
c=-1; c=-1;
} }
} else if(b<0xe0) { } else {
if( /* handle U+0080..U+07FF inline */ if( /* handle U+0080..U+07FF inline */
b>=0xc2 && b>=0xc2 &&
(t1=(uint8_t)(*source-0x80)) <= 0x3f (t1=(uint8_t)(*source-0x80)) <= 0x3f
@ -5457,15 +5448,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else { } else {
c=-1; c=-1;
} }
} else {
c=-1;
} }
if(c<0) { if(c<0) {
/* handle "complicated" and error cases, and continuing partial characters */ /* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0; oldToULength=0;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b; c=b;
moreBytes: moreBytes:
while(toULength<toULimit) { while(toULength<toULimit) {
@ -5478,7 +5467,7 @@ moreBytes:
*/ */
if(source<(uint8_t *)pToUArgs->sourceLimit) { if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source; b=*source;
if(U8_IS_TRAIL(b)) { if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source; ++source;
++toULength; ++toULength;
c=(c<<6)+b; c=(c<<6)+b;
@ -5500,22 +5489,18 @@ moreBytes:
} }
} }
if( toULength==toULimit && /* consumed all trail bytes */ if(toULength==toULimit) {
(toULength==3 || toULength==2) && /* BMP */ c-=utf8_offsets[toULength];
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && if(toULength<=3) { /* BMP */
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
stage2Entry=0;
} else {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c); stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} else {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
stage2Entry=0;
} else {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
}
} }
} else { } else {
/* error handling: illegal UTF-8 byte sequence */ /* error handling: illegal UTF-8 byte sequence */
@ -5620,7 +5605,7 @@ unassigned:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++; c=utf8->toUBytes[0]=b=*source++;
toULength=1; toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1; toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) { while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++; utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b; c=(c<<6)+b;

Просмотреть файл

@ -25,6 +25,7 @@
#include "uenumimp.h" #include "uenumimp.h"
#include "uhash.h" #include "uhash.h"
#include "hash.h" #include "hash.h"
#include "uinvchar.h"
#include "uresimp.h" #include "uresimp.h"
#include "ulist.h" #include "ulist.h"
#include "ureslocs.h" #include "ureslocs.h"
@ -545,93 +546,97 @@ U_CAPI int32_t U_EXPORT2
ucurr_forLocale(const char* locale, ucurr_forLocale(const char* locale,
UChar* buff, UChar* buff,
int32_t buffCapacity, int32_t buffCapacity,
UErrorCode* ec) UErrorCode* ec) {
{ if (U_FAILURE(*ec)) { return 0; }
int32_t resLen = 0; if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) {
const UChar* s = NULL; *ec = U_ILLEGAL_ARGUMENT_ERROR;
if (ec != NULL && U_SUCCESS(*ec)) { return 0;
if ((buff && buffCapacity) || !buffCapacity) { }
UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY];
if ((resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus))) {
// there is a currency keyword. Try to see if it's valid
if(buffCapacity > resLen) {
/* Normalize the currency keyword value to upper case. */
T_CString_toUpperCase(id);
u_charsToUChars(id, buff, resLen);
}
} else {
// get country or country_variant in `id'
uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
if (U_FAILURE(*ec)) { char currency[4]; // ISO currency codes are alpha3 codes.
return 0; UErrorCode localStatus = U_ZERO_ERROR;
} int32_t resLen = uloc_getKeywordValue(locale, "currency",
currency, UPRV_LENGTHOF(currency), &localStatus);
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
if (resLen < buffCapacity) {
T_CString_toUpperCase(currency);
u_charsToUChars(currency, buff, resLen);
}
return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
// get country or country_variant in `id'
char id[ULOC_FULLNAME_CAPACITY];
uint32_t variantType = idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
if (U_FAILURE(*ec)) {
return 0;
}
#if !UCONFIG_NO_SERVICE #if !UCONFIG_NO_SERVICE
const UChar* result = CReg::get(id); const UChar* result = CReg::get(id);
if (result) { if (result) {
if(buffCapacity > u_strlen(result)) { if(buffCapacity > u_strlen(result)) {
u_strcpy(buff, result); u_strcpy(buff, result);
} }
return u_strlen(result); resLen = u_strlen(result);
} return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
#endif #endif
// Remove variants, which is only needed for registration. // Remove variants, which is only needed for registration.
char *idDelim = strchr(id, VAR_DELIM); char *idDelim = uprv_strchr(id, VAR_DELIM);
if (idDelim) { if (idDelim) {
idDelim[0] = 0; idDelim[0] = 0;
} }
// Look up the CurrencyMap element in the root bundle. const UChar* s = NULL; // Currency code from data file.
UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); if (id[0] == 0) {
UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); // No point looking in the data for an empty string.
UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); // This is what we would get.
UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus); localStatus = U_MISSING_RESOURCE_ERROR;
} else {
// Look up the CurrencyMap element in the root bundle.
localStatus = U_ZERO_ERROR;
UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
// Get the second item when PREEURO is requested, and this is a known Euro country.
// If the requested variant is PREEURO, and this isn't a Euro country,
// assume that the country changed over to the Euro in the future.
// This is probably an old version of ICU that hasn't been updated yet.
// The latest currency is probably correct.
if (U_SUCCESS(localStatus)) {
if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) {
currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
} else if ((variantType & VARIANT_IS_EURO)) {
/* s = EUR_STR;
Get the second item when PREEURO is requested, and this is a known Euro country.
If the requested variant is PREEURO, and this isn't a Euro country, assume
that the country changed over to the Euro in the future. This is probably
an old version of ICU that hasn't been updated yet. The latest currency is
probably correct.
*/
if (U_SUCCESS(localStatus)) {
if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) {
currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
}
else if ((variantType & VARIANT_IS_EURO)) {
s = EUR_STR;
}
}
ures_close(countryArray);
ures_close(currencyReq);
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0)
{
// We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, sizeof(id), ec);
*ec = U_USING_FALLBACK_WARNING;
return ucurr_forLocale(id, buff, buffCapacity, ec);
}
else if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible.
*ec = localStatus;
}
if (U_SUCCESS(*ec)) {
if(buffCapacity > resLen) {
u_strcpy(buff, s);
}
}
} }
return u_terminateUChars(buff, buffCapacity, resLen, ec); }
} else { ures_close(currencyReq);
*ec = U_ILLEGAL_ARGUMENT_ERROR; ures_close(countryArray);
}
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
// We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
*ec = U_USING_FALLBACK_WARNING;
// TODO: Loop over the shortened id rather than recursing and
// looking again for a currency keyword.
return ucurr_forLocale(id, buff, buffCapacity, ec);
}
if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible.
*ec = localStatus;
}
if (U_SUCCESS(*ec)) {
if(buffCapacity > resLen) {
u_strcpy(buff, s);
} }
} }
return resLen; return u_terminateUChars(buff, buffCapacity, resLen, ec);
} }
// end registration // end registration
@ -648,7 +653,16 @@ static UBool fallback(char *loc) {
return FALSE; return FALSE;
} }
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status); if (uprv_strcmp(loc, "en_GB") == 0) {
// HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
// in order to consume the correct data strings. This hack will be removed
// when proper data sink loading is implemented here.
// NOTE: "001" adds 1 char over "GB". However, both call sites allocate
// arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
uprv_strcpy(loc + 3, "001");
} else {
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
}
/* /*
char *i = uprv_strrchr(loc, '_'); char *i = uprv_strrchr(loc, '_');
if (i == NULL) { if (i == NULL) {
@ -2216,6 +2230,7 @@ ucurr_countCurrencies(const char* locale,
UErrorCode localStatus = U_ZERO_ERROR; UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY]; char id[ULOC_FULLNAME_CAPACITY];
uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus); uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
// get country or country_variant in `id' // get country or country_variant in `id'
/*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec); /*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec);

Просмотреть файл

@ -206,6 +206,8 @@ setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to ca
return didUpdate; return didUpdate;
} }
#if U_PLATFORM_HAS_WINUWP_API == 0
static UBool static UBool
setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) { setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
UDataMemory tData; UDataMemory tData;
@ -215,6 +217,8 @@ setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCod
return setCommonICUData(&tData, FALSE, pErrorCode); return setCommonICUData(&tData, FALSE, pErrorCode);
} }
#endif
static const char * static const char *
findBasename(const char *path) { findBasename(const char *path) {
const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
@ -982,7 +986,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
/* init path iterator for individual files */ /* init path iterator for individual files */
UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode); UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
while((pathBuffer = iter.next(pErrorCode))) while((pathBuffer = iter.next(pErrorCode)) != NULL)
{ {
#ifdef UDATA_DEBUG #ifdef UDATA_DEBUG
fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
@ -1165,7 +1169,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) { if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
altSepPath.append(path, *pErrorCode); altSepPath.append(path, *pErrorCode);
char *p; char *p;
while((p=uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR))) { while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR; *p = U_FILE_SEP_CHAR;
} }
#if defined (UDATA_DEBUG) #if defined (UDATA_DEBUG)

Просмотреть файл

@ -79,14 +79,14 @@
* prime number while being less than a power of two. * prime number while being less than a power of two.
*/ */
static const int32_t PRIMES[] = { static const int32_t PRIMES[] = {
13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
16777213, 33554393, 67108859, 134217689, 268435399, 536870909, 16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
1073741789, 2147483647 /*, 4294967291 */ 1073741789, 2147483647 /*, 4294967291 */
}; };
#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
#define DEFAULT_PRIME_INDEX 3 #define DEFAULT_PRIME_INDEX 4
/* These ratios are tuned to the PRIMES array such that a resize /* These ratios are tuned to the PRIMES array such that a resize
* places the table back into the zone of non-resizing. That is, * places the table back into the zone of non-resizing. That is,
@ -231,7 +231,7 @@ _uhash_allocate(UHashtable *hash,
emptytok.pointer = NULL; /* Only one of these two is needed */ emptytok.pointer = NULL; /* Only one of these two is needed */
emptytok.integer = 0; /* but we don't know which one. */ emptytok.integer = 0; /* but we don't know which one. */
limit = p + hash->length; limit = p + hash->length;
while (p < limit) { while (p < limit) {
p->key = emptytok; p->key = emptytok;
@ -247,7 +247,7 @@ _uhash_allocate(UHashtable *hash,
static UHashtable* static UHashtable*
_uhash_init(UHashtable *result, _uhash_init(UHashtable *result,
UHashFunction *keyHash, UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
int32_t primeIndex, int32_t primeIndex,
@ -275,7 +275,7 @@ _uhash_init(UHashtable *result,
} }
static UHashtable* static UHashtable*
_uhash_create(UHashFunction *keyHash, _uhash_create(UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
int32_t primeIndex, int32_t primeIndex,
@ -415,7 +415,7 @@ _uhash_rehash(UHashtable *hash, UErrorCode *status) {
if (U_FAILURE(*status)) { if (U_FAILURE(*status)) {
hash->elements = old; hash->elements = old;
hash->length = oldLength; hash->length = oldLength;
return; return;
} }
@ -536,7 +536,7 @@ _uhash_put(UHashtable *hash,
********************************************************************/ ********************************************************************/
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash, uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
UErrorCode *status) { UErrorCode *status) {
@ -545,7 +545,7 @@ uhash_open(UHashFunction *keyHash,
} }
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash, uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
int32_t size, int32_t size,
@ -562,7 +562,7 @@ uhash_openSize(UHashFunction *keyHash,
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *fillinResult, uhash_init(UHashtable *fillinResult,
UHashFunction *keyHash, UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
UErrorCode *status) { UErrorCode *status) {
@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
} }
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *fillinResult,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status) {
// Find the smallest index i for which PRIMES[i] >= size.
int32_t i = 0;
while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
++i;
}
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
}
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash) { uhash_close(UHashtable *hash) {
if (hash == NULL) { if (hash == NULL) {
@ -604,7 +620,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
hash->keyComparator = fn; hash->keyComparator = fn;
return result; return result;
} }
U_CAPI UValueComparator *U_EXPORT2 U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
UValueComparator *result = hash->valueComparator; UValueComparator *result = hash->valueComparator;
hash->valueComparator = fn; hash->valueComparator = fn;
@ -630,7 +646,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
_uhash_internalSetResizePolicy(hash, policy); _uhash_internalSetResizePolicy(hash, policy);
hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
_uhash_rehash(hash, &status); _uhash_rehash(hash, &status);
} }
@ -844,7 +860,7 @@ uhash_hashUChars(const UHashTok key) {
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) { uhash_hashChars(const UHashTok key) {
const char *s = (const char *)key.pointer; const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s)); return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, uprv_strlen(s)));
} }
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
@ -853,7 +869,7 @@ uhash_hashIChars(const UHashTok key) {
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s)); return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
} }
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
int32_t count1, count2, pos, i; int32_t count1, count2, pos, i;
@ -886,14 +902,14 @@ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
if(count1!=count2){ if(count1!=count2){
return FALSE; return FALSE;
} }
pos=UHASH_FIRST; pos=UHASH_FIRST;
for(i=0; i<count1; i++){ for(i=0; i<count1; i++){
const UHashElement* elem1 = uhash_nextElement(hash1, &pos); const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
const UHashTok key1 = elem1->key; const UHashTok key1 = elem1->key;
const UHashTok val1 = elem1->value; const UHashTok val1 = elem1->value;
/* here the keys are not compared, instead the key form hash1 is used to fetch /* here the keys are not compared, instead the key form hash1 is used to fetch
* value from hash2. If the hashes are equal then then both hashes should * value from hash2. If the hashes are equal then then both hashes should
* contain equal values for the same key! * contain equal values for the same key!
*/ */
const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));

Просмотреть файл

@ -154,7 +154,7 @@ struct UHashtable {
* If NULL won't do anything */ * If NULL won't do anything */
/* Size parameters */ /* Size parameters */
int32_t count; /* The number of key-value pairs in this table. int32_t count; /* The number of key-value pairs in this table.
* 0 <= count <= length. In practice we * 0 <= count <= length. In practice we
* never let count == length (see code). */ * never let count == length (see code). */
@ -162,12 +162,12 @@ struct UHashtable {
* and values. Must be prime. */ * and values. Must be prime. */
/* Rehashing thresholds */ /* Rehashing thresholds */
int32_t highWaterMark; /* If count > highWaterMark, rehash */ int32_t highWaterMark; /* If count > highWaterMark, rehash */
int32_t lowWaterMark; /* If count < lowWaterMark, rehash */ int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
float highWaterRatio; /* 0..1; high water as a fraction of length */ float highWaterRatio; /* 0..1; high water as a fraction of length */
float lowWaterRatio; /* 0..1; low water as a fraction of length */ float lowWaterRatio; /* 0..1; low water as a fraction of length */
int8_t primeIndex; /* Index into our prime table for length. int8_t primeIndex; /* Index into our prime table for length.
* length == PRIMES[primeIndex] */ * length == PRIMES[primeIndex] */
UBool allocated; /* Was this UHashtable allocated? */ UBool allocated; /* Was this UHashtable allocated? */
@ -190,7 +190,7 @@ U_CDECL_END
* @return A pointer to a UHashtable, or 0 if an error occurred. * @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize * @see uhash_openSize
*/ */
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash, uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
@ -207,7 +207,7 @@ uhash_open(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred. * @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_open * @see uhash_open
*/ */
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash, uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
@ -224,18 +224,37 @@ uhash_openSize(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred. * @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize * @see uhash_openSize
*/ */
U_CAPI UHashtable* U_EXPORT2 U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *hash, uhash_init(UHashtable *hash,
UHashFunction *keyHash, UHashFunction *keyHash,
UKeyComparator *keyComp, UKeyComparator *keyComp,
UValueComparator *valueComp, UValueComparator *valueComp,
UErrorCode *status); UErrorCode *status);
/**
* Initialize an existing UHashtable.
* @param keyHash A pointer to the key hashing function. Must not be
* NULL.
* @param keyComp A pointer to the function that compares keys. Must
* not be NULL.
* @param size The initial capacity of this hash table.
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status);
/** /**
* Close a UHashtable, releasing the memory used. * Close a UHashtable, releasing the memory used.
* @param hash The UHashtable to close. If hash is NULL no operation is performed. * @param hash The UHashtable to close. If hash is NULL no operation is performed.
*/ */
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash); uhash_close(UHashtable *hash);
@ -246,7 +265,7 @@ uhash_close(UHashtable *hash);
* @param fn the function to be used hash keys; must not be NULL * @param fn the function to be used hash keys; must not be NULL
* @return the previous key hasher; non-NULL * @return the previous key hasher; non-NULL
*/ */
U_CAPI UHashFunction *U_EXPORT2 U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn); uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
/** /**
@ -256,7 +275,7 @@ uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
* @param fn the function to be used compare keys; must not be NULL * @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL * @return the previous key comparator; non-NULL
*/ */
U_CAPI UKeyComparator *U_EXPORT2 U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn); uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
/** /**
@ -266,7 +285,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
* @param fn the function to be used compare keys; must not be NULL * @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL * @return the previous key comparator; non-NULL
*/ */
U_CAPI UValueComparator *U_EXPORT2 U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn); uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
/** /**
@ -279,7 +298,7 @@ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
* @param fn the function to be used delete keys, or NULL * @param fn the function to be used delete keys, or NULL
* @return the previous key deleter; may be NULL * @return the previous key deleter; may be NULL
*/ */
U_CAPI UObjectDeleter *U_EXPORT2 U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn); uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
/** /**
@ -292,7 +311,7 @@ uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param fn the function to be used delete values, or NULL * @param fn the function to be used delete values, or NULL
* @return the previous value deleter; may be NULL * @return the previous value deleter; may be NULL
*/ */
U_CAPI UObjectDeleter *U_EXPORT2 U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn); uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
/** /**
@ -302,7 +321,7 @@ uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param hash The UHashtable to set * @param hash The UHashtable to set
* @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED} * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
*/ */
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy); uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
/** /**
@ -310,7 +329,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
* @param hash The UHashtable to query. * @param hash The UHashtable to query.
* @return The number of key-value pairs stored in hash. * @return The number of key-value pairs stored in hash.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable *hash); uhash_count(const UHashtable *hash);
/** /**
@ -326,7 +345,7 @@ uhash_count(const UHashtable *hash);
* @return The previous value, or NULL if none. * @return The previous value, or NULL if none.
* @see uhash_get * @see uhash_get
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash, uhash_put(UHashtable *hash,
void *key, void *key,
void *value, void *value,
@ -344,7 +363,7 @@ uhash_put(UHashtable *hash,
* @return The previous value, or NULL if none. * @return The previous value, or NULL if none.
* @see uhash_get * @see uhash_get
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_iput(UHashtable *hash, uhash_iput(UHashtable *hash,
int32_t key, int32_t key,
void* value, void* value,
@ -362,7 +381,7 @@ uhash_iput(UHashtable *hash,
* @return The previous value, or 0 if none. * @return The previous value, or 0 if none.
* @see uhash_get * @see uhash_get
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable *hash, uhash_puti(UHashtable *hash,
void* key, void* key,
int32_t value, int32_t value,
@ -380,7 +399,7 @@ uhash_puti(UHashtable *hash,
* @return The previous value, or 0 if none. * @return The previous value, or 0 if none.
* @see uhash_get * @see uhash_get
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable *hash, uhash_iputi(UHashtable *hash,
int32_t key, int32_t key,
int32_t value, int32_t value,
@ -393,8 +412,8 @@ uhash_iputi(UHashtable *hash,
* @param key A pointer key stored in a hashtable * @param key A pointer key stored in a hashtable
* @return The requested item, or NULL if not found. * @return The requested item, or NULL if not found.
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash, uhash_get(const UHashtable *hash,
const void *key); const void *key);
/** /**
@ -404,7 +423,7 @@ uhash_get(const UHashtable *hash,
* @param key An integer key stored in a hashtable * @param key An integer key stored in a hashtable
* @return The requested item, or NULL if not found. * @return The requested item, or NULL if not found.
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable *hash, uhash_iget(const UHashtable *hash,
int32_t key); int32_t key);
@ -415,7 +434,7 @@ uhash_iget(const UHashtable *hash,
* @param key A pointer key stored in a hashtable * @param key A pointer key stored in a hashtable
* @return The requested item, or 0 if not found. * @return The requested item, or 0 if not found.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable *hash, uhash_geti(const UHashtable *hash,
const void* key); const void* key);
/** /**
@ -425,7 +444,7 @@ uhash_geti(const UHashtable *hash,
* @param key An integer key stored in a hashtable * @param key An integer key stored in a hashtable
* @return The requested item, or 0 if not found. * @return The requested item, or 0 if not found.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash, uhash_igeti(const UHashtable *hash,
int32_t key); int32_t key);
@ -435,7 +454,7 @@ uhash_igeti(const UHashtable *hash,
* @param key A key stored in a hashtable * @param key A key stored in a hashtable
* @return The item removed, or NULL if not found. * @return The item removed, or NULL if not found.
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_remove(UHashtable *hash, uhash_remove(UHashtable *hash,
const void *key); const void *key);
@ -445,7 +464,7 @@ uhash_remove(UHashtable *hash,
* @param key An integer key stored in a hashtable * @param key An integer key stored in a hashtable
* @return The item removed, or NULL if not found. * @return The item removed, or NULL if not found.
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable *hash, uhash_iremove(UHashtable *hash,
int32_t key); int32_t key);
@ -455,7 +474,7 @@ uhash_iremove(UHashtable *hash,
* @param key An key stored in a hashtable * @param key An key stored in a hashtable
* @return The item removed, or 0 if not found. * @return The item removed, or 0 if not found.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable *hash, uhash_removei(UHashtable *hash,
const void* key); const void* key);
@ -465,7 +484,7 @@ uhash_removei(UHashtable *hash,
* @param key An integer key stored in a hashtable * @param key An integer key stored in a hashtable
* @return The item removed, or 0 if not found. * @return The item removed, or 0 if not found.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable *hash, uhash_iremovei(UHashtable *hash,
int32_t key); int32_t key);
@ -473,7 +492,7 @@ uhash_iremovei(UHashtable *hash,
* Remove all items from a UHashtable. * Remove all items from a UHashtable.
* @param hash The target UHashtable. * @param hash The target UHashtable.
*/ */
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash); uhash_removeAll(UHashtable *hash);
/** /**
@ -487,7 +506,7 @@ uhash_removeAll(UHashtable *hash);
* @param key A key stored in a hashtable * @param key A key stored in a hashtable
* @return a hash element, or NULL if the key is not found. * @return a hash element, or NULL if the key is not found.
*/ */
U_CAPI const UHashElement* U_EXPORT2 U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key); uhash_find(const UHashtable *hash, const void* key);
/** /**
@ -510,7 +529,7 @@ uhash_find(const UHashtable *hash, const void* key);
* @return a hash element, or NULL if no further key-value pairs * @return a hash element, or NULL if no further key-value pairs
* exist in the table. * exist in the table.
*/ */
U_CAPI const UHashElement* U_EXPORT2 U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable *hash, uhash_nextElement(const UHashtable *hash,
int32_t *pos); int32_t *pos);
@ -525,7 +544,7 @@ uhash_nextElement(const UHashtable *hash,
* modified. * modified.
* @return the value that was removed. * @return the value that was removed.
*/ */
U_CAPI void* U_EXPORT2 U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable *hash, const UHashElement* e); uhash_removeElement(UHashtable *hash, const UHashElement* e);
/******************************************************************** /********************************************************************
@ -537,7 +556,7 @@ uhash_removeElement(UHashtable *hash, const UHashElement* e);
* @param i The given integer * @param i The given integer
* @return a UHashTok for an integer. * @return a UHashTok for an integer.
*/ */
/*U_CAPI UHashTok U_EXPORT2 /*U_CAPI UHashTok U_EXPORT2
uhash_toki(int32_t i);*/ uhash_toki(int32_t i);*/
/** /**
@ -545,7 +564,7 @@ uhash_toki(int32_t i);*/
* @param p The given pointer * @param p The given pointer
* @return a UHashTok for a pointer. * @return a UHashTok for a pointer.
*/ */
/*U_CAPI UHashTok U_EXPORT2 /*U_CAPI UHashTok U_EXPORT2
uhash_tokp(void* p);*/ uhash_tokp(void* p);*/
/******************************************************************** /********************************************************************
@ -559,7 +578,7 @@ uhash_tokp(void* p);*/
* @param key The string (const UChar*) to hash. * @param key The string (const UChar*) to hash.
* @return A hash code for the key. * @return A hash code for the key.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key); uhash_hashUChars(const UHashTok key);
/** /**
@ -569,7 +588,7 @@ uhash_hashUChars(const UHashTok key);
* @param key The string (const char*) to hash. * @param key The string (const char*) to hash.
* @return A hash code for the key. * @return A hash code for the key.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key); uhash_hashChars(const UHashTok key);
/** /**
@ -589,7 +608,7 @@ uhash_hashIChars(const UHashTok key);
* @param key2 The string for comparison * @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise. * @return true if key1 and key2 are equal, return false otherwise.
*/ */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1, const UHashTok key2); uhash_compareUChars(const UHashTok key1, const UHashTok key2);
/** /**
@ -599,7 +618,7 @@ uhash_compareUChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison * @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise. * @return true if key1 and key2 are equal, return false otherwise.
*/ */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1, const UHashTok key2); uhash_compareChars(const UHashTok key1, const UHashTok key2);
/** /**
@ -609,7 +628,7 @@ uhash_compareChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison * @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise. * @return true if key1 and key2 are equal, return false otherwise.
*/ */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1, const UHashTok key2); uhash_compareIChars(const UHashTok key1, const UHashTok key2);
/******************************************************************** /********************************************************************
@ -621,7 +640,7 @@ uhash_compareIChars(const UHashTok key1, const UHashTok key2);
* @param key The string (const char*) to hash. * @param key The string (const char*) to hash.
* @return A hash code for the key. * @return A hash code for the key.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key); uhash_hashUnicodeString(const UElement key);
/** /**
@ -630,7 +649,7 @@ uhash_hashUnicodeString(const UElement key);
* @param key The string (const char*) to hash. * @param key The string (const char*) to hash.
* @return A hash code for the key. * @return A hash code for the key.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key); uhash_hashCaselessUnicodeString(const UElement key);
/******************************************************************** /********************************************************************
@ -642,7 +661,7 @@ uhash_hashCaselessUnicodeString(const UElement key);
* @param key The string (const char*) to hash. * @param key The string (const char*) to hash.
* @return A hash code for the key. * @return A hash code for the key.
*/ */
U_CAPI int32_t U_EXPORT2 U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key); uhash_hashLong(const UHashTok key);
/** /**
@ -651,7 +670,7 @@ uhash_hashLong(const UHashTok key);
* @param Key2 The integer for comparison * @param Key2 The integer for comparison
* @return true if key1 and key2 are equal, return false otherwise * @return true if key1 and key2 are equal, return false otherwise
*/ */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2); uhash_compareLong(const UHashTok key1, const UHashTok key2);
/******************************************************************** /********************************************************************
@ -662,7 +681,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
* Deleter for Hashtable objects. * Deleter for Hashtable objects.
* @param obj The object to be deleted * @param obj The object to be deleted
*/ */
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj); uhash_deleteHashtable(void *obj);
/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */ /* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
@ -673,7 +692,7 @@ uhash_deleteHashtable(void *obj);
* @param hash2 * @param hash2
* @return true if the hashtables are equal and false if not. * @return true if the hashtables are equal and false if not.
*/ */
U_CAPI UBool U_EXPORT2 U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2); uhash_equals(const UHashtable* hash1, const UHashtable* hash2);

Просмотреть файл

@ -573,7 +573,7 @@ uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst; uint8_t *orig_dst = dst;
if(n==-1) { if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
} }
/* copy non-null */ /* copy non-null */
while(*src && n>0) { while(*src && n>0) {
@ -594,7 +594,7 @@ uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst; uint8_t *orig_dst = dst;
if(n==-1) { if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */ n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
} }
/* copy non-null */ /* copy non-null */
while(*src && n>0) { while(*src && n>0) {

Просмотреть файл

@ -252,7 +252,7 @@ U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t
s = (const char *)ulist_getNext((UList *)(en->context)); s = (const char *)ulist_getNext((UList *)(en->context));
if (s != NULL && resultLength != NULL) { if (s != NULL && resultLength != NULL) {
*resultLength = uprv_strlen(s); *resultLength = static_cast<int32_t>(uprv_strlen(s));
} }
return s; return s;
} }

Просмотреть файл

@ -98,6 +98,7 @@ locale_getKeywords(const char *localeID,
*/ */
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */ /* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES[] = { static const char * const LANGUAGES[] = {
"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
@ -109,7 +110,7 @@ static const char * const LANGUAGES[] = {
"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
"ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg", "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
"cs", "csb", "cu", "cv", "cy", "cs", "csb", "cu", "cv", "cy",
@ -213,6 +214,7 @@ static const char* const REPLACEMENT_LANGUAGES[]={
*/ */
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */ /* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES_3[] = { static const char * const LANGUAGES_3[] = {
"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
@ -224,7 +226,7 @@ static const char * const LANGUAGES_3[] = {
"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
"cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg", "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
"ces", "csb", "chu", "chv", "cym", "ces", "csb", "chu", "chv", "cym",
@ -529,14 +531,16 @@ static const VariantMap VARIANT_MAP[] = {
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
#define _ConvertBCP47(finalID, id, buffer, length,err) \ #define _ConvertBCP47(finalID, id, buffer, length,err) \
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
finalID=id; \ finalID=id; \
if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
} else { \ } else { \
finalID=buffer; \ finalID=buffer; \
} }
/* Gets the size of the shortest subtag in the given localeID. */ /* Gets the size of the shortest subtag in the given localeID. */
static int32_t getShortestSubtagLength(const char *localeID) { static int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = uprv_strlen(localeID); int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
int32_t length = localeIDLength; int32_t length = localeIDLength;
int32_t tmpLength = 0; int32_t tmpLength = 0;
int32_t i; int32_t i;
@ -2486,7 +2490,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr,"%02d: %s\n", i, acceptList[i]); fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
#endif #endif
while((l=uenum_next(availableLocales, NULL, status))) { while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l); fprintf(stderr," %s\n", l);
#endif #endif
@ -2526,7 +2530,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr,"Try: [%s]", fallbackList[i]); fprintf(stderr,"Try: [%s]", fallbackList[i]);
#endif #endif
while((l=uenum_next(availableLocales, NULL, status))) { while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG) #if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l); fprintf(stderr," %s\n", l);
#endif #endif

Просмотреть файл

@ -1022,7 +1022,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
no known mapping. This implementation normalizes the no known mapping. This implementation normalizes the
the value to lower case the value to lower case
*/ */
int32_t bcpValueLen = uprv_strlen(bcpValue); int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
if (bcpValueLen < extBufCapacity) { if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue); uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf); T_CString_toLowerCase(pExtBuf);
@ -1288,7 +1288,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
bufIdx++; bufIdx++;
} }
len = uprv_strlen(attr->attribute); len = static_cast<int32_t>(uprv_strlen(attr->attribute));
uprv_memcpy(buf + bufIdx, attr->attribute, len); uprv_memcpy(buf + bufIdx, attr->attribute, len);
bufIdx += len; bufIdx += len;
@ -1841,7 +1841,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
int32_t newTagLength; int32_t newTagLength;
grandfatheredLen = tagLen; /* back up for output parsedLen */ grandfatheredLen = tagLen; /* back up for output parsedLen */
newTagLength = uprv_strlen(GRANDFATHERED[i+1]); newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
if (tagLen < newTagLength) { if (tagLen < newTagLength) {
uprv_free(tagBuf); uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1); tagBuf = (char*)uprv_malloc(newTagLength + 1);

Просмотреть файл

@ -102,10 +102,7 @@
{ {
HANDLE map; HANDLE map;
HANDLE file; HANDLE file;
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
UDataMemory_init(pData); /* Clear the output struct. */ UDataMemory_init(pData); /* Clear the output struct. */
/* open the input file */ /* open the input file */
@ -143,6 +140,11 @@
This is required for multiuser systems on Windows 2000 SP4 and beyond */ This is required for multiuser systems on Windows 2000 SP4 and beyond */
// TODO: UWP does not have this function and I do not think it is required? // TODO: UWP does not have this function and I do not think it is required?
#if U_PLATFORM_HAS_WINUWP_API == 0 #if U_PLATFORM_HAS_WINUWP_API == 0
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) { if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) {
/* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */ /* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */
if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) { if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) {

Просмотреть файл

@ -132,7 +132,7 @@ umtx_condBroadcast(UConditionVar *condition) {
} }
U_CAPI void U_EXPORT2 U_CAPI void U_EXPORT2
umtx_condSignal(UConditionVar *condition) { umtx_condSignal(UConditionVar * /* condition */) {
// Function not implemented. There is no immediate requirement from ICU to have it. // Function not implemented. There is no immediate requirement from ICU to have it.
// Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be // Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be
// changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function // changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function

Просмотреть файл

@ -250,7 +250,7 @@ public:
virtual int32_t next(void) = 0; virtual int32_t next(void) = 0;
/** /**
* Return character index of the current interator position within the text. * Return character index of the current iterator position within the text.
* @return The boundary most recently returned. * @return The boundary most recently returned.
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
@ -277,7 +277,7 @@ public:
virtual int32_t preceding(int32_t offset) = 0; virtual int32_t preceding(int32_t offset) = 0;
/** /**
* Return true if the specfied position is a boundary position. * Return true if the specified position is a boundary position.
* As a side effect, the current position of the iterator is set * As a side effect, the current position of the iterator is set
* to the first boundary position at or following the specified offset. * to the first boundary position at or following the specified offset.
* @param offset the offset to check. * @param offset the offset to check.
@ -331,7 +331,7 @@ public:
* @param fillInVec an array to be filled in with the status values. * @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes * @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the * the function to return the number of status values, in the
* normal way, without attemtping to store any values. * normal way, without attempting to store any values.
* @param status receives error codes. * @param status receives error codes.
* @return The number of rule status values from rules that determined * @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator. * the most recent boundary returned by the break iterator.
@ -469,7 +469,7 @@ public:
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/** /**
* Get name of the object for the desired Locale, in the desired langauge. * Get name of the object for the desired Locale, in the desired language.
* @param objectLocale must be from getAvailableLocales. * @param objectLocale must be from getAvailableLocales.
* @param displayLocale specifies the desired locale for output. * @param displayLocale specifies the desired locale for output.
* @param name the fill-in parameter of the return value * @param name the fill-in parameter of the return value
@ -482,7 +482,7 @@ public:
UnicodeString& name); UnicodeString& name);
/** /**
* Get name of the object for the desired Locale, in the langauge of the * Get name of the object for the desired Locale, in the language of the
* default locale. * default locale.
* @param objectLocale must be from getMatchingLocales * @param objectLocale must be from getMatchingLocales
* @param name the fill-in parameter of the return value * @param name the fill-in parameter of the return value
@ -629,10 +629,12 @@ protected:
/** @internal */ /** @internal */
BreakIterator(); BreakIterator();
/** @internal */ /** @internal */
BreakIterator (const BreakIterator &other) : UObject(other) {} BreakIterator (const BreakIterator &other);
#ifndef U_HIDE_INTERNAL_API #ifndef U_HIDE_INTERNAL_API
/** @internal */ /** @internal */
BreakIterator (const Locale& valid, const Locale& actual); BreakIterator (const Locale& valid, const Locale &actual);
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
BreakIterator &operator = (const BreakIterator &other);
#endif /* U_HIDE_INTERNAL_API */ #endif /* U_HIDE_INTERNAL_API */
private: private:
@ -640,12 +642,6 @@ private:
/** @internal */ /** @internal */
char actualLocale[ULOC_FULLNAME_CAPACITY]; char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY];
/**
* The assignment operator has no real implementation.
* It's provided to make the compiler happy. Do not call.
*/
BreakIterator& operator=(const BreakIterator&);
}; };
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
@ -661,5 +657,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif // _BRKITER #endif // BRKITER_H
//eof //eof

Просмотреть файл

@ -126,8 +126,8 @@ public:
virtual void Flush(); virtual void Flush();
private: private:
ByteSink(const ByteSink &); // copy constructor not implemented ByteSink(const ByteSink &) = delete;
ByteSink &operator=(const ByteSink &); // assignment operator not implemented ByteSink &operator=(const ByteSink &) = delete;
}; };
// ------------------------------------------------------------- // -------------------------------------------------------------
@ -217,9 +217,10 @@ private:
int32_t size_; int32_t size_;
int32_t appended_; int32_t appended_;
UBool overflowed_; UBool overflowed_;
CheckedArrayByteSink(); ///< default constructor not implemented
CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented CheckedArrayByteSink() = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
}; };
/** /**
@ -236,6 +237,21 @@ class StringByteSink : public ByteSink {
* @stable ICU 4.2 * @stable ICU 4.2
*/ */
StringByteSink(StringClass* dest) : dest_(dest) { } StringByteSink(StringClass* dest) : dest_(dest) { }
#ifndef U_HIDE_DRAFT_API
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @draft ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
#endif // U_HIDE_DRAFT_API
/** /**
* Append "bytes[0,n-1]" to this. * Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes * @param data the pointer to the bytes
@ -245,9 +261,10 @@ class StringByteSink : public ByteSink {
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private: private:
StringClass* dest_; StringClass* dest_;
StringByteSink(); ///< default constructor not implemented
StringByteSink(const StringByteSink &); ///< copy constructor not implemented StringByteSink() = delete;
StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented StringByteSink(const StringByteSink &) = delete;
StringByteSink &operator=(const StringByteSink &) = delete;
}; };
U_NAMESPACE_END U_NAMESPACE_END

Просмотреть файл

@ -8,6 +8,7 @@
#define __CASEMAP_H__ #define __CASEMAP_H__
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h" #include "unicode/uobject.h"
/** /**
@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API #ifndef U_HIDE_DRAFT_API
class BreakIterator; class BreakIterator;
class ByteSink;
class Edits; class Edits;
/** /**
@ -36,7 +38,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -48,7 +50,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -71,7 +74,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -83,7 +86,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -112,8 +116,10 @@ public:
* all others. (This can be modified with options bits.) * all others. (This can be modified with options bits.)
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. * U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased. * @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText()) * It is set to the source string (setText())
* and used one or more times for iteration (first() and next()). * and used one or more times for iteration (first() and next()).
@ -130,7 +136,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -159,7 +166,7 @@ public:
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -172,7 +179,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -188,6 +196,129 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits, char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode); UErrorCode &errorCode);
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToLower
* @draft ICU 60
*/
static void utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToUpper
* @draft ICU 60
*/
static void utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToTitle
* @draft ICU 60
*/
static void utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-8 string and optionally records edits.
*
* Case folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8FoldCase
* @draft ICU 60
*/
static void utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/** /**
* Lowercases a UTF-8 string and optionally records edits. * Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive. * Casing is locale-dependent and context-sensitive.
@ -195,7 +326,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -207,7 +338,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -217,7 +349,7 @@ public:
* @see ucasemap_utf8ToLower * @see ucasemap_utf8ToLower
* @draft ICU 59 * @draft ICU 59
*/ */
static int32_t utf8ToLower( static int32_t utf8ToLower(
const char *locale, uint32_t options, const char *locale, uint32_t options,
const char *src, int32_t srcLength, const char *src, int32_t srcLength,
char *dest, int32_t destCapacity, Edits *edits, char *dest, int32_t destCapacity, Edits *edits,
@ -230,7 +362,7 @@ public:
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if * @param dest A buffer for the result string. The result will be NUL-terminated if
@ -242,7 +374,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -271,10 +404,12 @@ public:
* all others. (This can be modified with options bits.) * all others. (This can be modified with options bits.)
* *
* @param locale The locale ID. ("" = root locale, NULL = default locale.) * @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. * U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased. * @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText()) * It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()). * and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used * If NULL, then a word break iterator for the locale is used
* (or something equivalent). * (or something equivalent).
@ -289,7 +424,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.
@ -317,7 +453,7 @@ public:
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap. * The source string and the destination buffer must not overlap.
* *
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string. * @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -330,7 +466,8 @@ public:
* @param edits Records edits for index mapping, working with styled text, * @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any). * and getting only changes (if any).
* The Edits contents is undefined if any error occurs. * The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL. * This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value * @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call. * which must not indicate a failure before the function call.
* @return The length of the result string, if successful. * @return The length of the result string, if successful.

Просмотреть файл

@ -95,45 +95,45 @@ private:
return reinterpret_cast<char16_t *>(t); return reinterpret_cast<char16_t *>(t);
} }
char16_t *p; char16_t *p_;
#else #else
union { union {
char16_t *cp; char16_t *cp;
uint16_t *up; uint16_t *up;
wchar_t *wp; wchar_t *wp;
} u; } u_;
#endif #endif
}; };
#ifdef U_ALIASING_BARRIER #ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif #endif
Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() { Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p); U_ALIASING_BARRIER(p_);
} }
char16_t *Char16Ptr::get() const { return p; } char16_t *Char16Ptr::get() const { return p_; }
#else #else
Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif #endif
Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {} Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u.cp; } char16_t *Char16Ptr::get() const { return u_.cp; }
#endif #endif
@ -203,45 +203,45 @@ private:
return reinterpret_cast<const char16_t *>(t); return reinterpret_cast<const char16_t *>(t);
} }
const char16_t *p; const char16_t *p_;
#else #else
union { union {
const char16_t *cp; const char16_t *cp;
const uint16_t *up; const uint16_t *up;
const wchar_t *wp; const wchar_t *wp;
} u; } u_;
#endif #endif
}; };
#ifdef U_ALIASING_BARRIER #ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif #endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() { ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p); U_ALIASING_BARRIER(p_);
} }
const char16_t *ConstChar16Ptr::get() const { return p; } const char16_t *ConstChar16Ptr::get() const { return p_; }
#else #else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF #if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif #endif
#if U_SIZEOF_WCHAR_T==2 #if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif #endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {} ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u.cp; } const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif #endif

Просмотреть файл

@ -140,7 +140,7 @@
* <tr> * <tr>
* <td>Number Formatting</td> * <td>Number Formatting</td>
* <td>unum.h</td> * <td>unum.h</td>
* <td>icu::NumberFormat</td> * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
* </tr> * </tr>
* <tr> * <tr>
* <td>Number Spellout<br/>(Rule Based Number Formatting)</td> * <td>Number Spellout<br/>(Rule Based Number Formatting)</td>

Просмотреть файл

@ -36,19 +36,61 @@ public:
* @draft ICU 59 * @draft ICU 59
*/ */
Edits() : Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
errorCode(U_ZERO_ERROR) {} errorCode_(U_ZERO_ERROR) {}
/**
* Copy constructor.
* @param other source edits
* @draft ICU 60
*/
Edits(const Edits &other) :
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
delta(other.delta), numChanges(other.numChanges),
errorCode_(other.errorCode_) {
copyArray(other);
}
/**
* Move constructor, might leave src empty.
* This object will have the same contents that the source object had.
* @param src source edits
* @draft ICU 60
*/
Edits(Edits &&src) U_NOEXCEPT :
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
delta(src.delta), numChanges(src.numChanges),
errorCode_(src.errorCode_) {
moveArray(src);
}
/** /**
* Destructor. * Destructor.
* @draft ICU 59 * @draft ICU 59
*/ */
~Edits(); ~Edits();
/**
* Assignment operator.
* @param other source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(const Edits &other);
/**
* Move assignment operator, might leave src empty.
* This object will have the same contents that the source object had.
* The behavior is undefined if *this and src are the same object.
* @param src source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(Edits &&src) U_NOEXCEPT;
/** /**
* Resets the data but may not release memory. * Resets the data but may not release memory.
* @draft ICU 59 * @draft ICU 59
*/ */
void reset(); void reset() U_NOEXCEPT;
/** /**
* Adds a record for an unchanged segment of text. * Adds a record for an unchanged segment of text.
@ -66,6 +108,9 @@ public:
* Sets the UErrorCode if an error occurred while recording edits. * Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode. * Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code. * Normally called from inside ICU string transformation functions, not user code.
* @param outErrorCode Set to an error code if it does not contain one already
* and an error occurred while recording edits.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode) * @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59 * @draft ICU 59
*/ */
@ -81,7 +126,13 @@ public:
* @return TRUE if there are any change edits * @return TRUE if there are any change edits
* @draft ICU 59 * @draft ICU 59
*/ */
UBool hasChanges() const; UBool hasChanges() const { return numChanges != 0; }
/**
* @return the number of change edits
* @draft ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
/** /**
* Access to the list of edits. * Access to the list of edits.
@ -90,6 +141,15 @@ public:
* @draft ICU 59 * @draft ICU 59
*/ */
struct U_COMMON_API Iterator U_FINAL : public UMemory { struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
* Default constructor, empty iterator.
* @draft ICU 60
*/
Iterator() :
array(nullptr), index(0), length(0),
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
/** /**
* Copy constructor. * Copy constructor.
* @draft ICU 59 * @draft ICU 59
@ -103,6 +163,9 @@ public:
/** /**
* Advances to the next edit. * Advances to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit * @return TRUE if there is another edit
* @draft ICU 59 * @draft ICU 59
*/ */
@ -121,10 +184,86 @@ public:
* if the source index is out of bounds for the source string. * if the source index is out of bounds for the source string.
* *
* @param i source index * @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found * @return TRUE if the edit for the source index was found
* @draft ICU 59 * @draft ICU 59
*/ */
UBool findSourceIndex(int32_t i, UErrorCode &errorCode); UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, TRUE, errorCode) == 0;
}
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the destination index was found
* @draft ICU 60
*/
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, FALSE, errorCode) == 0;
}
/**
* Returns the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return destination index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* Returns the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return source index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
/** /**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones. * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
@ -167,15 +306,22 @@ public:
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head); int32_t readLength(int32_t head);
void updateIndexes(); void updateNextIndexes();
void updatePreviousIndexes();
UBool noNext(); UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode); UBool next(UBool onlyChanges, UErrorCode &errorCode);
UBool previous(UErrorCode &errorCode);
/** @return -1: error or i<0; 0: found; 1: i>=string length */
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
const uint16_t *array; const uint16_t *array;
int32_t index, length; int32_t index, length;
// 0 if we are not within compressed equal-length changes.
// Otherwise the number of remaining changes, including the current one.
int32_t remaining; int32_t remaining;
UBool onlyChanges_, coarse; UBool onlyChanges_, coarse;
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
UBool changed; UBool changed;
int32_t oldLength_, newLength_; int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex; int32_t srcIndex, replIndex, destIndex;
@ -219,9 +365,39 @@ public:
return Iterator(array, length, FALSE, FALSE); return Iterator(array, length, FALSE, FALSE);
} }
/**
* Merges the two input Edits and appends the result to this object.
*
* Consider two string transformations (for example, normalization and case mapping)
* where each records Edits in addition to writing an output string.<br>
* Edits ab reflect how substrings of input string a
* map to substrings of intermediate string b.<br>
* Edits bc reflect how substrings of intermediate string b
* map to substrings of output string c.<br>
* This function merges ab and bc such that the additional edits
* recorded in this object reflect how substrings of input string a
* map to substrings of output string c.
*
* If unrelated Edits are passed in where the output string of the first
* has a different length than the input string of the second,
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
*
* @param ab reflects how substrings of input string a
* map to substrings of intermediate string b.
* @param bc reflects how substrings of intermediate string b
* map to substrings of output string c.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return *this, with the merged edits appended
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
private: private:
Edits(const Edits &) = delete; void releaseArray() U_NOEXCEPT;
Edits &operator=(const Edits &) = delete; Edits &copyArray(const Edits &other);
Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@ -234,7 +410,8 @@ private:
int32_t capacity; int32_t capacity;
int32_t length; int32_t length;
int32_t delta; int32_t delta;
UErrorCode errorCode; int32_t numChanges;
UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY]; uint16_t stackArray[STACK_CAPACITY];
}; };

Просмотреть файл

@ -55,14 +55,30 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/ */
static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of createEmptyInstance, which has
* identical behavior.
* @param status The error code.
* @return the new builder
* @deprecated ICU 60 use createEmptyInstance instead
* @see createEmptyInstance()
*/
static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
return createEmptyInstance(status);
}
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/** /**
* Construct an empty FilteredBreakIteratorBuilder. * Construct an empty FilteredBreakIteratorBuilder.
* In this state, it will not suppress any segment boundaries. * In this state, it will not suppress any segment boundaries.
* @param status The error code. * @param status The error code.
* @return the new builder * @return the new builder
* @stable ICU 56 * @draft ICU 60
*/ */
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
#endif /* U_HIDE_DRAFT_API */
/** /**
* Suppress a certain string from being the end of a segment. * Suppress a certain string from being the end of a segment.
@ -89,6 +105,20 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/ */
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of wrapIteratorWithFilter()
* The behavior is identical.
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
* @deprecated ICU 60 use wrapIteratorWithFilter() instead
* @see wrapBreakIteratorWithFilter()
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/** /**
* Wrap (adopt) an existing break iterator in a new filtered instance. * Wrap (adopt) an existing break iterator in a new filtered instance.
* The resulting BreakIterator is owned by the caller. * The resulting BreakIterator is owned by the caller.
@ -96,12 +126,16 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* Note that the adoptBreakIterator is adopted by the new BreakIterator * Note that the adoptBreakIterator is adopted by the new BreakIterator
* and should no longer be used by the caller. * and should no longer be used by the caller.
* The FilteredBreakIteratorBuilder may be reused. * The FilteredBreakIteratorBuilder may be reused.
* This function is an alias for build()
* @param adoptBreakIterator the break iterator to adopt * @param adoptBreakIterator the break iterator to adopt
* @param status error code * @param status error code
* @return the new BreakIterator, owned by the caller. * @return the new BreakIterator, owned by the caller.
* @stable ICU 56 * @draft ICU 60
*/ */
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
return build(adoptBreakIterator, status);
}
#endif /* U_HIDE_DRAFT_API */
protected: protected:
/** /**

Просмотреть файл

@ -213,7 +213,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR; errorCode=U_MEMORY_ALLOCATION_ERROR;
} }
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -222,7 +221,6 @@ public:
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the object it owns. * Destructor deletes the object it owns.
* @stable ICU 4.4 * @stable ICU 4.4
@ -230,7 +228,6 @@ public:
~LocalPointer() { ~LocalPointer() {
delete LocalPointerBase<T>::ptr; delete LocalPointerBase<T>::ptr;
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -241,7 +238,6 @@ public:
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT { LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
@ -362,7 +358,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR; errorCode=U_MEMORY_ALLOCATION_ERROR;
} }
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move constructor, leaves src with isNull(). * Move constructor, leaves src with isNull().
* @param src source smart pointer * @param src source smart pointer
@ -371,7 +366,6 @@ public:
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL; src.ptr=NULL;
} }
#endif
/** /**
* Destructor deletes the array it owns. * Destructor deletes the array it owns.
* @stable ICU 4.4 * @stable ICU 4.4
@ -379,7 +373,6 @@ public:
~LocalArray() { ~LocalArray() {
delete[] LocalPointerBase<T>::ptr; delete[] LocalPointerBase<T>::ptr;
} }
#if U_HAVE_RVALUE_REFERENCES
/** /**
* Move assignment operator, leaves src with isNull(). * Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object. * The behavior is undefined if *this and src are the same object.
@ -390,7 +383,6 @@ public:
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT { LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
return moveFrom(src); return moveFrom(src);
} }
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/** /**
* Move assignment, leaves src with isNull(). * Move assignment, leaves src with isNull().
@ -492,7 +484,6 @@ public:
* @see LocalPointer * @see LocalPointer
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
#if U_HAVE_RVALUE_REFERENCES
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ #define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \ class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \ public: \
@ -526,34 +517,6 @@ public:
ptr=p; \ ptr=p; \
} \ } \
} }
#else
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
using LocalPointerBase<Type>::operator*; \
using LocalPointerBase<Type>::operator->; \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
~LocalPointerClassName() { closeFunction(ptr); } \
LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \
if (ptr != NULL) { closeFunction(ptr); } \
LocalPointerBase<Type>::ptr=src.ptr; \
src.ptr=NULL; \
return *this; \
} \
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
Type *temp=LocalPointerBase<Type>::ptr; \
LocalPointerBase<Type>::ptr=other.ptr; \
other.ptr=temp; \
} \
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
p1.swap(p2); \
} \
void adoptInstead(Type *p) { \
if (ptr != NULL) { closeFunction(ptr); } \
ptr=p; \
} \
}
#endif
U_NAMESPACE_END U_NAMESPACE_END

Просмотреть файл

@ -88,7 +88,7 @@ class UnicodeString;
* <P> * <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG> * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific. * The Variant codes are vendor and browser-specific.
* For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX. * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and * Where there are two variants, separate them with an underscore, and
* put the most important one first. For * put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with * example, a Traditional Spanish collation might be referenced, with

Просмотреть файл

@ -28,12 +28,15 @@
#if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_NORMALIZATION
#include "unicode/stringpiece.h"
#include "unicode/uniset.h" #include "unicode/uniset.h"
#include "unicode/unistr.h" #include "unicode/unistr.h"
#include "unicode/unorm2.h" #include "unicode/unorm2.h"
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
class ByteSink;
/** /**
* Unicode normalization functionality for standard Unicode normalization or * Unicode normalization functionality for standard Unicode normalization or
* for using custom mapping tables. * for using custom mapping tables.
@ -215,6 +218,35 @@ public:
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const = 0; UErrorCode &errorCode) const = 0;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const;
/** /**
* Appends the normalized form of the second string to the first string * Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -340,6 +372,30 @@ public:
*/ */
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
@ -479,7 +535,36 @@ public:
virtual UnicodeString & virtual UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
UnicodeString &dest, UnicodeString &dest,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Appends the normalized form of the second string to the first string * Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -497,7 +582,7 @@ public:
virtual UnicodeString & virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Appends the second string to the first string * Appends the second string to the first string
* (merging them at the boundary) and returns the first string. * (merging them at the boundary) and returns the first string.
@ -515,7 +600,7 @@ public:
virtual UnicodeString & virtual UnicodeString &
append(UnicodeString &first, append(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,
UErrorCode &errorCode) const; UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Gets the decomposition mapping of c. * Gets the decomposition mapping of c.
@ -529,7 +614,7 @@ public:
* @stable ICU 4.6 * @stable ICU 4.6
*/ */
virtual UBool virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const; getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/** /**
* Gets the raw decomposition mapping of c. * Gets the raw decomposition mapping of c.
@ -543,7 +628,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual UBool virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/** /**
* Performs pairwise composition of a & b and returns the composite if there is one. * Performs pairwise composition of a & b and returns the composite if there is one.
@ -556,7 +641,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual UChar32 virtual UChar32
composePair(UChar32 a, UChar32 b) const; composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
/** /**
* Gets the combining class of c. * Gets the combining class of c.
@ -567,7 +652,7 @@ public:
* @stable ICU 49 * @stable ICU 49
*/ */
virtual uint8_t virtual uint8_t
getCombiningClass(UChar32 c) const; getCombiningClass(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
@ -581,7 +666,30 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Tests if the string is normalized. * Tests if the string is normalized.
* For details see the Normalizer2 base class documentation. * For details see the Normalizer2 base class documentation.
@ -594,7 +702,7 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UNormalizationCheckResult virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Returns the end of the normalized substring of the input string. * Returns the end of the normalized substring of the input string.
* For details see the Normalizer2 base class documentation. * For details see the Normalizer2 base class documentation.
@ -607,7 +715,7 @@ public:
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual int32_t virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/** /**
* Tests if the character always has a normalization boundary before it, * Tests if the character always has a normalization boundary before it,
@ -617,7 +725,7 @@ public:
* @return TRUE if c has a normalization boundary before it * @return TRUE if c has a normalization boundary before it
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool hasBoundaryBefore(UChar32 c) const; virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the character always has a normalization boundary after it, * Tests if the character always has a normalization boundary after it,
@ -627,7 +735,7 @@ public:
* @return TRUE if c has a normalization boundary after it * @return TRUE if c has a normalization boundary after it
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool hasBoundaryAfter(UChar32 c) const; virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
/** /**
* Tests if the character is normalization-inert. * Tests if the character is normalization-inert.
@ -636,7 +744,7 @@ public:
* @return TRUE if c is normalization-inert * @return TRUE if c is normalization-inert
* @stable ICU 4.4 * @stable ICU 4.4
*/ */
virtual UBool isInert(UChar32 c) const; virtual UBool isInert(UChar32 c) const U_OVERRIDE;
private: private:
UnicodeString & UnicodeString &
normalize(const UnicodeString &src, normalize(const UnicodeString &src,
@ -644,6 +752,12 @@ private:
USetSpanCondition spanCondition, USetSpanCondition spanCondition,
UErrorCode &errorCode) const; UErrorCode &errorCode) const;
void
normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const;
UnicodeString & UnicodeString &
normalizeSecondAndAppend(UnicodeString &first, normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second, const UnicodeString &second,

Просмотреть файл

@ -132,6 +132,8 @@
#define U_PF_BROWSER_NATIVE_CLIENT 4020 #define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */ /** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050 #define U_PF_ANDROID 4050
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */ /* Maximum value for Linux-based platform is 4499 */
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ /** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000 #define U_PF_OS390 9000
@ -152,6 +154,8 @@
# include <android/api-level.h> # include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__) #elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT # define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
#elif defined(__Fuchsia__)
# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux) #elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX # define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__) #elif defined(__APPLE__) && defined(__MACH__)
@ -192,6 +196,20 @@
# define U_PLATFORM U_PF_UNKNOWN # define U_PLATFORM U_PF_UNKNOWN
#endif #endif
/**
* \def UPRV_INCOMPLETE_CPP11_SUPPORT
* This switch turns off ICU 60 NumberFormatter code.
* By default, this switch is enabled on AIX and z/OS,
* which have poor C++11 support.
*
* NOTE: This switch is intended to be temporary; see #13393.
*
* @internal
*/
#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT
# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS )
#endif
/** /**
* \def CYGWINMSVC * \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc. * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
@ -330,31 +348,6 @@
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H # define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif #endif
/**
* \def U_IOSTREAM_SOURCE
* Defines what support for C++ streams is available.
*
* If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
* (the ISO/IEC C++ FDIS was published in November 1997), and then
* one should qualify streams using the std namespace in ICU header
* files.
* Starting with ICU 49, this is the only supported version.
*
* If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
* available instead (in June 1985 Stroustrup published
* "An Extensible I/O Facility for C++" at the summer USENIX conference).
* Starting with ICU 49, this version is not supported any more.
*
* If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
* then C++ streams are not available and
* support for them will be silently suppressed in ICU.
*
* @internal
*/
#ifndef U_IOSTREAM_SOURCE
#define U_IOSTREAM_SOURCE 199711
#endif
/*===========================================================================*/ /*===========================================================================*/
/** @{ Compiler and environment features */ /** @{ Compiler and environment features */
/*===========================================================================*/ /*===========================================================================*/
@ -505,22 +498,6 @@ namespace std {
}; };
#endif #endif
/**
* \def U_HAVE_RVALUE_REFERENCES
* Set to 1 if the compiler supports rvalue references.
* C++11 feature, necessary for move constructor & move assignment.
* @internal
*/
#ifdef U_HAVE_RVALUE_REFERENCES
/* Use the predefined value. */
#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_rvalue_references) \
|| defined(__GXX_EXPERIMENTAL_CXX0X__) \
|| (defined(_MSC_VER) && _MSC_VER >= 1600) /* Visual Studio 2010 */
# define U_HAVE_RVALUE_REFERENCES 1
#else
# define U_HAVE_RVALUE_REFERENCES 0
#endif
/** /**
* \def U_NOEXCEPT * \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty. * "noexcept" if supported, otherwise empty.
@ -871,6 +848,16 @@ namespace std {
# define U_CALLCONV U_EXPORT2 # define U_CALLCONV U_EXPORT2
#endif #endif
/**
* \def U_CALLCONV_FPTR
* Similar to U_CALLCONV, but only used on function pointers.
* @internal
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV_FPTR U_CALLCONV
#else
# define U_CALLCONV_FPTR
#endif
/* @} */ /* @} */
#endif #endif

Просмотреть файл

@ -31,23 +31,14 @@
#include "unicode/schriter.h" #include "unicode/schriter.h"
#include "unicode/uchriter.h" #include "unicode/uchriter.h"
struct UTrie;
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
/** @internal */ /** @internal */
struct RBBIDataHeader;
class RuleBasedBreakIteratorTables;
class BreakIterator;
class RBBIDataWrapper;
class UStack;
class LanguageBreakEngine; class LanguageBreakEngine;
struct RBBIDataHeader;
class RBBIDataWrapper;
class UnhandledEngine; class UnhandledEngine;
struct RBBIStateTable; class UStack;
/** /**
* *
@ -96,19 +87,36 @@ private:
*/ */
RBBIDataWrapper *fData; RBBIDataWrapper *fData;
/** Index of the Rule {tag} values for the most recent match. /**
* The iteration state - current position, rule status for the current position,
* and whether the iterator ran off the end, yielding UBRK_DONE.
* Current position is pinned to be 0 < position <= text.length.
* Current position is always set to a boundary.
* @internal * @internal
*/ */
int32_t fLastRuleStatusIndex; /**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
*/
int32_t fPosition;
/** /**
* Rule tag value valid flag. * TODO:
* Some iterator operations don't intrinsically set the correct tag value. */
* This flag lets us lazily compute the value if we are ever asked for it. int32_t fRuleStatusIndex;
* @internal
*/
UBool fLastStatusIndexValid;
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
UBool fDone;
/**
* Cache of previously determined boundary positions.
*/
public: // TODO: debug, return to private.
class BreakCache;
BreakCache *fBreakCache;
private:
/** /**
* Counter for the number of characters encountered with the "dictionary" * Counter for the number of characters encountered with the "dictionary"
* flag set. * flag set.
@ -117,26 +125,11 @@ private:
uint32_t fDictionaryCharCount; uint32_t fDictionaryCharCount;
/** /**
* When a range of characters is divided up using the dictionary, the break * Cache of boundary positions within a region of text that has been
* positions that are discovered are stored here, preventing us from having * sub-divided by dictionary based breaking.
* to use either the dictionary or the state table again until the iterator
* leaves this range of text. Has the most impact for line breaking.
* @internal
*/ */
int32_t* fCachedBreakPositions; class DictionaryCache;
DictionaryCache *fDictionaryCache;
/**
* The number of elements in fCachedBreakPositions
* @internal
*/
int32_t fNumCachedBreakPositions;
/**
* if fCachedBreakPositions is not null, this indicates which item in the
* cache the current iteration position refers to
* @internal
*/
int32_t fPositionInCache;
/** /**
* *
@ -179,13 +172,11 @@ private:
*/ */
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
/** @internal */
friend class RBBIRuleBuilder; friend class RBBIRuleBuilder;
/** @internal */ /** @internal */
friend class BreakIterator; friend class BreakIterator;
public: public:
/** Default constructor. Creates an empty shell of an iterator, with no /** Default constructor. Creates an empty shell of an iterator, with no
@ -469,7 +460,10 @@ public:
virtual UBool isBoundary(int32_t offset); virtual UBool isBoundary(int32_t offset);
/** /**
* Returns the current iteration position. * Returns the current iteration position. Note that UBRK_DONE is never
* returned from this function; if iteration has run to the end of a
* string, current() will return the length of the string while
* next() will return UBRK_DONE).
* @return The current iteration position. * @return The current iteration position.
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
@ -501,6 +495,7 @@ public:
* Note: this function is not thread safe. It should not have been * Note: this function is not thread safe. It should not have been
* declared const, and the const remains only for compatibility * declared const, and the const remains only for compatibility
* reasons. (The function is logically const, but not bit-wise const). * reasons. (The function is logically const, but not bit-wise const).
* TODO: check this. Probably thread safe now.
* <p> * <p>
* @return the status from the break rule that determined the most recently * @return the status from the break rule that determined the most recently
* returned break position. * returned break position.
@ -660,46 +655,31 @@ private:
* Common initialization function, used by constructors and bufferClone. * Common initialization function, used by constructors and bufferClone.
* @internal * @internal
*/ */
void init(); void init(UErrorCode &status);
/** /**
* This method backs the iterator back up to a "safe position" in the text. * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
* This is a position that we know, without any context, must be a break position. * This locates a "Safe Position" from which the forward break rules
* The various calling methods then iterate forward from this safe position to * will operate correctly. A Safe Position is not necessarily a boundary itself.
* the appropriate position to return. (For more information, see the description *
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) * @param fromPosition the position in the input text to begin the iteration.
* @param statetable state table used of moving backwards
* @internal * @internal
*/ */
int32_t handlePrevious(const RBBIStateTable *statetable); int32_t handlePrevious(int32_t fromPosition);
/** /**
* This method is the actual implementation of the next() method. All iteration * Find a rule-based boundary by running the state machine.
* vectors through here. This method initializes the state machine to state 1 * Input
* and advances through the text character by character until we reach the end * fPosition, the position in the text to begin from.
* of the text or the state machine transitions to state 0. We update our return * Output
* value every time the state machine passes through a possible end state. * fPosition: the boundary following the starting position.
* @param statetable state table used of moving forwards * fDictionaryCharCount the number of dictionary characters encountered.
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @internal * @internal
*/ */
int32_t handleNext(const RBBIStateTable *statetable); int32_t handleNext();
/**
* This is the function that actually implements dictionary-based
* breaking. Covering at least the range from startPos to endPos,
* it checks for dictionary characters, and if it finds them determines
* the appropriate object to deal with them. It may cache found breaks in
* fCachedBreakPositions as it goes. It may well also look at text outside
* the range startPos to endPos.
* If going forward, endPos is the normal Unicode break result, and
* if goind in reverse, startPos is the normal Unicode break result
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param reverse The call is for the reverse direction
* @internal
*/
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
/** /**
@ -710,11 +690,14 @@ private:
*/ */
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
public:
#ifndef U_HIDE_INTERNAL_API
/** /**
* @internal * Debugging function only.
* @internal
*/ */
void makeRuleStatusValid(); void dumpCache();
#endif /* U_HIDE_INTERNAL_API */
}; };
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

Просмотреть файл

@ -21,6 +21,13 @@
U_NAMESPACE_BEGIN U_NAMESPACE_BEGIN
// Forward declaration:
namespace number {
namespace impl {
class SimpleModifier;
}
}
/** /**
* Formats simple patterns like "{1} was born in {0}". * Formats simple patterns like "{1} was born in {0}".
* Minimal subset of MessageFormat; fast, simple, minimal dependencies. * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
@ -286,6 +293,9 @@ private:
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength, int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode); UErrorCode &errorCode);
// Give access to internals to SimpleModifier for number formatting
friend class number::impl::SimpleModifier;
}; };
U_NAMESPACE_END U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,198 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// stringoptions.h
// created: 2017jun08 Markus W. Scherer
#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Bit set option bit constants for various string and character processing functions.
*/
/**
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
#ifndef U_HIDE_DRAFT_API
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
/**
* Titlecase sentences rather than words.
* (Titlecase only the first character of each sentence, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
#endif // U_HIDE_DRAFT_API
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the character at each
* (possibly adjusted) BreakIterator index and
* lowercase all other characters up to the next iterator index.
* With this option, the other characters will not be modified.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing BreakIterator indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
* and titlecase that one.
*
* Other characters are lowercased.
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see U_TITLECASE_NO_LOWERCASE
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
#ifndef U_HIDE_DRAFT_API
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
* Option bit for titlecasing APIs that take an options bit set.
*
* This used to be the default index adjustment in ICU.
* Since ICU 60, the default index adjustment is to the next character that is
* a letter, number, symbol, or private use code point.
* (Uncased modifier letters are skipped.)
* The difference in behavior is small for word titlecasing,
* but the new adjustment is much better for whole-string and sentence titlecasing:
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @draft ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
#endif // U_HIDE_DRAFT_API
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

Просмотреть файл

@ -256,7 +256,7 @@ protected:
/** @internal */ /** @internal */
class FinalValueNode : public Node { class FinalValueNode : public Node {
public: public:
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual void write(StringTrieBuilder &builder); virtual void write(StringTrieBuilder &builder);
protected: protected:
@ -276,7 +276,7 @@ protected:
void setValue(int32_t v) { void setValue(int32_t v) {
hasValue=TRUE; hasValue=TRUE;
value=v; value=v;
hash=hash*37+v; hash=hash*37u+v;
} }
protected: protected:
UBool hasValue; UBool hasValue;
@ -290,7 +290,7 @@ protected:
class IntermediateValueNode : public ValueNode { class IntermediateValueNode : public ValueNode {
public: public:
IntermediateValueNode(int32_t v, Node *nextNode) IntermediateValueNode(int32_t v, Node *nextNode)
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(StringTrieBuilder &builder); virtual void write(StringTrieBuilder &builder);
@ -307,7 +307,7 @@ protected:
class LinearMatchNode : public ValueNode { class LinearMatchNode : public ValueNode {
public: public:
LinearMatchNode(int32_t len, Node *nextNode) LinearMatchNode(int32_t len, Node *nextNode)
: ValueNode((0x333333*37+len)*37+hashCode(nextNode)), : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
length(len), next(nextNode) {} length(len), next(nextNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -342,7 +342,7 @@ protected:
equal[length]=NULL; equal[length]=NULL;
values[length]=value; values[length]=value;
++length; ++length;
hash=(hash*37+c)*37+value; hash=(hash*37u+c)*37u+value;
} }
// Adds a unit which leads to another match node. // Adds a unit which leads to another match node.
void add(int32_t c, Node *node) { void add(int32_t c, Node *node) {
@ -350,7 +350,7 @@ protected:
equal[length]=node; equal[length]=node;
values[length]=0; values[length]=0;
++length; ++length;
hash=(hash*37+c)*37+hashCode(node); hash=(hash*37u+c)*37u+hashCode(node);
} }
protected: protected:
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
@ -365,8 +365,8 @@ protected:
class SplitBranchNode : public BranchNode { class SplitBranchNode : public BranchNode {
public: public:
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
: BranchNode(((0x555555*37+middleUnit)*37+ : BranchNode(((0x555555u*37u+middleUnit)*37u+
hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -382,7 +382,7 @@ protected:
class BranchHeadNode : public ValueNode { class BranchHeadNode : public ValueNode {
public: public:
BranchHeadNode(int32_t len, Node *subNode) BranchHeadNode(int32_t len, Node *subNode)
: ValueNode((0x666666*37+len)*37+hashCode(subNode)), : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
length(len), next(subNode) {} length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const; virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual int32_t markRightEdgesFirst(int32_t edgeNumber);

Просмотреть файл

@ -23,8 +23,6 @@
#include "unicode/uchar.h" #include "unicode/uchar.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#ifndef U_HIDE_DRAFT_API
/** /**
* \file * \file
* \brief Bidi Transformations * \brief Bidi Transformations
@ -60,17 +58,17 @@
* @see UBIDI_REORDER_DEFAULT * @see UBIDI_REORDER_DEFAULT
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_RUNS_ONLY * @see UBIDI_REORDER_RUNS_ONLY
* @draft ICU 58 * @stable ICU 58
*/ */
typedef enum { typedef enum {
/** 0: Constant indicating a logical order. /** 0: Constant indicating a logical order.
* This is the default for input text. * This is the default for input text.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_LOGICAL = 0, UBIDI_LOGICAL = 0,
/** 1: Constant indicating a visual order. /** 1: Constant indicating a visual order.
* This is a default for output text. * This is a default for output text.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_VISUAL UBIDI_VISUAL
} UBiDiOrder; } UBiDiOrder;
@ -83,20 +81,20 @@ typedef enum {
* @see ubidi_setReorderingOptions * @see ubidi_setReorderingOptions
* @see ubidi_writeReordered * @see ubidi_writeReordered
* @see ubidi_writeReverse * @see ubidi_writeReverse
* @draft ICU 58 * @stable ICU 58
*/ */
typedef enum { typedef enum {
/** 0: Constant indicating that character mirroring should not be /** 0: Constant indicating that character mirroring should not be
* performed. * performed.
* This is the default. * This is the default.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_MIRRORING_OFF = 0, UBIDI_MIRRORING_OFF = 0,
/** 1: Constant indicating that character mirroring should be performed. /** 1: Constant indicating that character mirroring should be performed.
* This corresponds to calling <code>ubidi_writeReordered</code> or * This corresponds to calling <code>ubidi_writeReordered</code> or
* <code>ubidi_writeReverse</code> with the * <code>ubidi_writeReverse</code> with the
* <code>UBIDI_DO_MIRRORING</code> option bit set. * <code>UBIDI_DO_MIRRORING</code> option bit set.
* @draft ICU 58 * @stable ICU 58
*/ */
UBIDI_MIRRORING_ON UBIDI_MIRRORING_ON
} UBiDiMirroring; } UBiDiMirroring;
@ -104,7 +102,7 @@ typedef enum {
/** /**
* Forward declaration of the <code>UBiDiTransform</code> structure that stores * Forward declaration of the <code>UBiDiTransform</code> structure that stores
* information used by the layout transformation engine. * information used by the layout transformation engine.
* @draft ICU 58 * @stable ICU 58
*/ */
typedef struct UBiDiTransform UBiDiTransform; typedef struct UBiDiTransform UBiDiTransform;
@ -240,9 +238,9 @@ typedef struct UBiDiTransform UBiDiTransform;
* @see UBiDiMirroring * @see UBiDiMirroring
* @see ubidi_setPara * @see ubidi_setPara
* @see u_shapeArabic * @see u_shapeArabic
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT uint32_t U_EXPORT2 U_STABLE uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform, ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength, const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize, UChar *dest, int32_t destSize,
@ -286,16 +284,16 @@ ubiditransform_transform(UBiDiTransform *pBiDiTransform,
* <code>ubiditransform_close()</code>. * <code>ubiditransform_close()</code>.
* *
* @return An empty <code>UBiDiTransform</code> object. * @return An empty <code>UBiDiTransform</code> object.
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT UBiDiTransform* U_EXPORT2 U_STABLE UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode); ubiditransform_open(UErrorCode *pErrorCode);
/** /**
* Deallocates the given <code>UBiDiTransform</code> object. * Deallocates the given <code>UBiDiTransform</code> object.
* @draft ICU 58 * @stable ICU 58
*/ */
U_DRAFT void U_EXPORT2 U_STABLE void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBidiTransform); ubiditransform_close(UBiDiTransform *pBidiTransform);
#if U_SHOW_CPLUSPLUS_API #if U_SHOW_CPLUSPLUS_API
@ -309,7 +307,7 @@ U_NAMESPACE_BEGIN
* *
* @see LocalPointerBase * @see LocalPointerBase
* @see LocalPointer * @see LocalPointer
* @draft ICU 58 * @stable ICU 58
*/ */
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
@ -317,5 +315,4 @@ U_NAMESPACE_END
#endif #endif
#endif /* U_HIDE_DRAFT_API */
#endif #endif

Просмотреть файл

@ -230,7 +230,8 @@ typedef enum USentenceBreakTag {
* @param locale The locale specifying the text-breaking conventions. Note that * @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior, * locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API. * see general discussion of BreakIterator C API.
* @param text The text to be iterated over. * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated. * @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors. * @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale. * @return A UBreakIterator for the specified locale.

Просмотреть файл

@ -23,6 +23,7 @@
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/localpointer.h" #include "unicode/localpointer.h"
#include "unicode/stringoptions.h"
#include "unicode/ustring.h" #include "unicode/ustring.h"
/** /**
@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
U_STABLE void U_EXPORT2 U_STABLE void U_EXPORT2
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the first cased character
* of a word and lowercase all other characters.
* With this option, the other characters will not be modified.
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it by looking for the next cased character, and titlecase that one.
* Other characters are lowercased.
*
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @see U_TITLECASE_NO_LOWERCASE
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Omit unchanged text when case-mapping with Edits.
*
* @see CaseMap
* @see Edits
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#if !UCONFIG_NO_BREAK_ITERATION #if !UCONFIG_NO_BREAK_ITERATION
/** /**
@ -251,7 +202,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
* The standard titlecase iterator for the root locale implements the * The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21. * algorithm of Unicode TR 21.
* *
* This function uses only the setUText(), first(), next() and close() methods of the * This function uses only the setText(), first() and next() methods of the
* provided break iterator. * provided break iterator.
* *
* The result may be longer or shorter than the original. * The result may be longer or shorter than the original.

Просмотреть файл

@ -26,6 +26,7 @@
#define UCHAR_H #define UCHAR_H
#include "unicode/utypes.h" #include "unicode/utypes.h"
#include "unicode/stringoptions.h"
U_CDECL_BEGIN U_CDECL_BEGIN
@ -41,7 +42,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion * @see u_getUnicodeVersion
* @stable ICU 2.0 * @stable ICU 2.0
*/ */
#define U_UNICODE_VERSION "9.0" #define U_UNICODE_VERSION "10.0"
/** /**
* \file * \file
@ -148,8 +149,9 @@ U_CDECL_BEGIN
* *
* The properties APIs are intended to reflect Unicode properties as defined * The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
* For details about the properties see http://www.unicode.org/ucd/ . *
* For names of Unicode properties see the UCD file PropertyAliases.txt. * For details about the properties see
* UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
* *
* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
* then properties marked with "new in Unicode 3.2" are not or not fully available. * then properties marked with "new in Unicode 3.2" are not or not fully available.
@ -427,12 +429,29 @@ typedef enum UProperty {
* @stable ICU 57 * @stable ICU 57
*/ */
UCHAR_EMOJI_MODIFIER_BASE=60, UCHAR_EMOJI_MODIFIER_BASE=60,
/**
* Binary property Emoji_Component.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 60
*/
UCHAR_EMOJI_COMPONENT=61,
/**
* Binary property Regional_Indicator.
* @stable ICU 60
*/
UCHAR_REGIONAL_INDICATOR=62,
/**
* Binary property Prepended_Concatenation_Mark.
* @stable ICU 60
*/
UCHAR_PREPENDED_CONCATENATION_MARK=63,
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the last constant for binary Unicode properties. * One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
UCHAR_BINARY_LIMIT=61, UCHAR_BINARY_LIMIT,
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class. /** Enumerated property Bidi_Class.
@ -1647,6 +1666,23 @@ enum UBlockCode {
/** @stable ICU 58 */ /** @stable ICU 58 */
UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
// New blocks in Unicode 10.0
/** @stable ICU 60 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
/** @stable ICU 60 */
UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
/** @stable ICU 60 */
UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
/** @stable ICU 60 */
UBLOCK_NUSHU = 277, /*[1B170]*/
/** @stable ICU 60 */
UBLOCK_SOYOMBO = 278, /*[11A50]*/
/** @stable ICU 60 */
UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
/** @stable ICU 60 */
UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal UBlockCode value. * One more than the highest normal UBlockCode value.
@ -1654,7 +1690,7 @@ enum UBlockCode {
* *
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/ */
UBLOCK_COUNT = 274, UBLOCK_COUNT = 281,
#endif // U_HIDE_DEPRECATED_API #endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */ /** @stable ICU 2.0 */
@ -1930,6 +1966,19 @@ typedef enum UJoiningGroup {
U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
#ifndef U_HIDE_DEPRECATED_API #ifndef U_HIDE_DEPRECATED_API
/** /**
* One more than the highest normal UJoiningGroup value. * One more than the highest normal UJoiningGroup value.
@ -3521,27 +3570,6 @@ u_toupper(UChar32 c);
U_STABLE UChar32 U_EXPORT2 U_STABLE UChar32 U_EXPORT2
u_totitle(UChar32 c); u_totitle(UChar32 c);
/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/** /**
* The given character is mapped to its case folding equivalent according to * The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt; * UnicodeData.txt and CaseFolding.txt;

Просмотреть файл

@ -148,15 +148,9 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
* @stable ICU 2.8 * @stable ICU 2.8
* @system * @system
*/ */
#ifndef _MSC_VER
U_STABLE void U_EXPORT2 U_STABLE void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f, u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
UErrorCode *status); UErrorCode *status);
#else
U_STABLE void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn * a, UMemReallocFn * r, UMemFreeFn * f,
UErrorCode *status);
#endif
U_CDECL_END U_CDECL_END

Просмотреть файл

@ -76,7 +76,7 @@
#endif #endif
/** /**
* Determines wheter to enable auto cleanup of libraries. * Determines whether to enable auto cleanup of libraries.
* @internal * @internal
*/ */
#ifndef UCLN_NO_AUTO_CLEANUP #ifndef UCLN_NO_AUTO_CLEANUP
@ -262,7 +262,8 @@
/** /**
* \def UCONFIG_NO_CONVERSION * \def UCONFIG_NO_CONVERSION
* ICU will not completely build with this switch turned on. * ICU will not completely build (compiling the tools fails) with this
* switch turned on.
* This switch turns off all converters. * This switch turns off all converters.
* *
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
@ -320,7 +321,9 @@
*/ */
#ifndef UCONFIG_NO_NORMALIZATION #ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0 # define UCONFIG_NO_NORMALIZATION 0
#elif UCONFIG_NO_NORMALIZATION #endif
#if UCONFIG_NO_NORMALIZATION
/* common library */ /* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */ /* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1 # define UCONFIG_NO_BREAK_ITERATION 1

Просмотреть файл

@ -44,14 +44,12 @@ enum UDisplayContextType {
* @stable ICU 54 * @stable ICU 54
*/ */
UDISPCTX_TYPE_DISPLAY_LENGTH = 2, UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
#ifndef U_HIDE_DRAFT_API
/** /**
* Type to retrieve the substitute handling setting, e.g. * Type to retrieve the substitute handling setting, e.g.
* UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE. * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
#endif /* U_HIDE_DRAFT_API */
}; };
/** /**
* @stable ICU 51 * @stable ICU 51
@ -143,7 +141,6 @@ enum UDisplayContext {
* @stable ICU 54 * @stable ICU 54
*/ */
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
#ifndef U_HIDE_DRAFT_API
/** /**
* ================================ * ================================
* SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
@ -154,16 +151,15 @@ enum UDisplayContext {
* A possible setting for SUBSTITUTE_HANDLING: * A possible setting for SUBSTITUTE_HANDLING:
* Returns a fallback value (e.g., the input code) when no data is available. * Returns a fallback value (e.g., the input code) when no data is available.
* This is the default value. * This is the default value.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
/** /**
* A possible setting for SUBSTITUTE_HANDLING: * A possible setting for SUBSTITUTE_HANDLING:
* Returns a null value when no data is available. * Returns a null value when no data is available.
* @draft ICU 58 * @stable ICU 58
*/ */
UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
#endif /* U_HIDE_DRAFT_API */
}; };
/** /**

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше