Bug 1405993 - Part 3: Update in-tree ICU to release 60.1. rs=Waldo

--HG--
extra : rebase_source : 468a4fc2e1fa7215b1224d998024a7121a05af62
This commit is contained in:
André Bargull 2017-11-01 11:56:15 -07:00
Родитель db02e51a31
Коммит 3296f48ec9
1202 изменённых файлов: 133567 добавлений и 36786 удалений

3
config/external/icu/common/sources.mozbuild поставляемый
Просмотреть файл

@ -4,6 +4,7 @@ SOURCES += [
'/intl/icu/source/common/bmpset.cpp',
'/intl/icu/source/common/brkeng.cpp',
'/intl/icu/source/common/brkiter.cpp',
'/intl/icu/source/common/bytesinkutil.cpp',
'/intl/icu/source/common/bytestream.cpp',
'/intl/icu/source/common/bytestrie.cpp',
'/intl/icu/source/common/bytestriebuilder.cpp',
@ -47,6 +48,7 @@ SOURCES += [
'/intl/icu/source/common/punycode.cpp',
'/intl/icu/source/common/putil.cpp',
'/intl/icu/source/common/rbbi.cpp',
'/intl/icu/source/common/rbbi_cache.cpp',
'/intl/icu/source/common/rbbidata.cpp',
'/intl/icu/source/common/rbbinode.cpp',
'/intl/icu/source/common/rbbirb.cpp',
@ -225,6 +227,7 @@ EXPORTS.unicode += [
'/intl/icu/source/common/unicode/simpleformatter.h',
'/intl/icu/source/common/unicode/std_string.h',
'/intl/icu/source/common/unicode/strenum.h',
'/intl/icu/source/common/unicode/stringoptions.h',
'/intl/icu/source/common/unicode/stringpiece.h',
'/intl/icu/source/common/unicode/stringtriebuilder.h',
'/intl/icu/source/common/unicode/symtable.h',

Двоичные данные
config/external/icu/data/icudt59l.dat → config/external/icu/data/icudt60l.dat поставляемый

Двоичный файл не отображается.

20
config/external/icu/i18n/sources.mozbuild поставляемый
Просмотреть файл

@ -95,7 +95,25 @@ SOURCES += [
'/intl/icu/source/i18n/nfrule.cpp',
'/intl/icu/source/i18n/nfsubs.cpp',
'/intl/icu/source/i18n/nortrans.cpp',
'/intl/icu/source/i18n/nounit.cpp',
'/intl/icu/source/i18n/nultrans.cpp',
'/intl/icu/source/i18n/number_affixutils.cpp',
'/intl/icu/source/i18n/number_compact.cpp',
'/intl/icu/source/i18n/number_decimalquantity.cpp',
'/intl/icu/source/i18n/number_decimfmtprops.cpp',
'/intl/icu/source/i18n/number_fluent.cpp',
'/intl/icu/source/i18n/number_formatimpl.cpp',
'/intl/icu/source/i18n/number_grouping.cpp',
'/intl/icu/source/i18n/number_integerwidth.cpp',
'/intl/icu/source/i18n/number_longnames.cpp',
'/intl/icu/source/i18n/number_modifiers.cpp',
'/intl/icu/source/i18n/number_notation.cpp',
'/intl/icu/source/i18n/number_padding.cpp',
'/intl/icu/source/i18n/number_patternmodifier.cpp',
'/intl/icu/source/i18n/number_patternstring.cpp',
'/intl/icu/source/i18n/number_rounding.cpp',
'/intl/icu/source/i18n/number_scientific.cpp',
'/intl/icu/source/i18n/number_stringbuilder.cpp',
'/intl/icu/source/i18n/numfmt.cpp',
'/intl/icu/source/i18n/numsys.cpp',
'/intl/icu/source/i18n/olsontz.cpp',
@ -227,6 +245,8 @@ EXPORTS.unicode += [
'/intl/icu/source/i18n/unicode/measunit.h',
'/intl/icu/source/i18n/unicode/measure.h',
'/intl/icu/source/i18n/unicode/msgfmt.h',
'/intl/icu/source/i18n/unicode/nounit.h',
'/intl/icu/source/i18n/unicode/numberformatter.h',
'/intl/icu/source/i18n/unicode/numfmt.h',
'/intl/icu/source/i18n/unicode/numsys.h',
'/intl/icu/source/i18n/unicode/plurfmt.h',

Просмотреть файл

@ -1,10 +1,10 @@
Path: icu4c
URL: https://ssl.icu-project.org/repos/icu/tags/release-59-1/icu4c
Relative URL: ^/tags/release-59-1/icu4c
URL: https://ssl.icu-project.org/repos/icu/tags/release-60-1/icu4c
Relative URL: ^/tags/release-60-1/icu4c
Repository Root: https://ssl.icu-project.org/repos/icu
Repository UUID: 251d0590-4201-4cf1-90de-194747b24ca1
Node Kind: directory
Last Changed Author: yoshito
Last Changed Rev: 40047
Last Changed Date: 2017-04-13 09:55:03 +0000 (Thu, 13 Apr 2017)
Last Changed Rev: 40662
Last Changed Date: 2017-10-31 15:14:15 +0000 (Tue, 31 Oct 2017)

Просмотреть файл

@ -194,7 +194,7 @@ EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE=override U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_HAVE_RVALUE_REFERENCES=1 U_WCHAR_IS_UTF16
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV_FPTR= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE= U_FINAL=final UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11 U_WCHAR_IS_UTF16 U_NOEXCEPT=
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------

Просмотреть файл

@ -34,9 +34,11 @@ subdir = .
@LAYOUTEX_TRUE@LAYOUTEX = layoutex
@ICUIO_TRUE@ICUIO = io
@EXTRAS_TRUE@EXTRA = extra
@TESTS_TRUE@TEST = test
# need tools for tests
@TOOLS_TRUE@@TESTS_TRUE@TEST = test
@SAMPLES_TRUE@SAMPLE = samples
@TOOLS_TRUE@TOOLS = tools
@TOOLS_TRUE@DATASUBDIR = data
## pkgconfig setup. Always have uc and i18n. Others are optional.
ALL_PKGCONFIG_SUFFIX=uc i18n
@ -58,7 +60,7 @@ INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/conf
LOCAL_BUILT_FILES = icudefs.mk config/icucross.mk config/icucross.inc
DOCDIRS = common i18n
SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) data $(EXTRA) $(SAMPLE) $(TEST)
SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) $(DATASUBDIR) $(EXTRA) $(SAMPLE) $(TEST)
SECTION = 1
@ -85,7 +87,7 @@ all: all-local all-recursive
install: install-recursive install-local
clean: clean-recursive-with-twist clean-local
distclean : distclean-recursive distclean-local
dist: dist-recursive dist-local
dist: dist-recursive
check: all check-recursive
check-recursive: all
xcheck: all xcheck-recursive

Просмотреть файл

@ -89,7 +89,7 @@ ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_
resource.o uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucurr.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o locdspnm.o loclikely.o locresdata.o \
bytestream.o stringpiece.o \
bytestream.o stringpiece.o bytesinkutil.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
@ -104,7 +104,7 @@ patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwr
uscript.o uscript_props.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \

Просмотреть файл

@ -28,7 +28,7 @@ U_NAMESPACE_BEGIN
BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list(parentList), listLength(parentListLength) {
uprv_memset(asciiBytes, 0, sizeof(asciiBytes));
uprv_memset(latin1Contains, 0, sizeof(latin1Contains));
uprv_memset(table7FF, 0, sizeof(table7FF));
uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits));
@ -45,14 +45,16 @@ BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) :
list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1);
}
list4kStarts[0x11]=listLength-1;
containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]);
initBits();
overrideIllegal();
}
BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) :
containsFFFD(otherBMPSet.containsFFFD),
list(newParentList), listLength(newParentListLength) {
uprv_memcpy(asciiBytes, otherBMPSet.asciiBytes, sizeof(asciiBytes));
uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains));
uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF));
uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits));
uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts));
@ -120,7 +122,7 @@ void BMPSet::initBits() {
UChar32 start, limit;
int32_t listIndex=0;
// Set asciiBytes[].
// Set latin1Contains[].
do {
start=list[listIndex++];
if(listIndex<listLength) {
@ -128,13 +130,30 @@ void BMPSet::initBits() {
} else {
limit=0x110000;
}
if(start>=0x80) {
if(start>=0x100) {
break;
}
do {
asciiBytes[start++]=1;
} while(start<limit && start<0x80);
} while(limit<=0x80);
latin1Contains[start++]=1;
} while(start<limit && start<0x100);
} while(limit<=0x100);
// Find the first range overlapping with (or after) 80..FF again,
// to include them in table7FF as well.
for(listIndex=0;;) {
start=list[listIndex++];
if(listIndex<listLength) {
limit=list[listIndex++];
} else {
limit=0x110000;
}
if(limit>0x80) {
if(start<0x80) {
start=0x80;
}
break;
}
}
// Set table7FF[].
while(start<0x800) {
@ -204,19 +223,14 @@ void BMPSet::initBits() {
* for faster validity checking at runtime.
* No need to set 0 values where they were reset to 0 in the constructor
* and not modified by initBits().
* (asciiBytes[] trail bytes, table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* (table7FF[] 0..7F, bmpBlockBits[] 0..7FF)
* Need to set 0 values for surrogates D800..DFFF.
*/
void BMPSet::overrideIllegal() {
uint32_t bits, mask;
int32_t i;
if(containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10])) {
// contains(FFFD)==TRUE
for(i=0x80; i<0xc0; ++i) {
asciiBytes[i]=1;
}
if(containsFFFD) {
bits=3; // Lead bytes 0xC0 and 0xC1.
for(i=0; i<64; ++i) {
table7FF[i]|=bits;
@ -233,7 +247,6 @@ void BMPSet::overrideIllegal() {
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
}
} else {
// contains(FFFD)==FALSE
mask=~(0x10001<<0xd); // Lead byte 0xED.
for(i=32; i<64; ++i) { // Second half of 4k block.
bmpBlockBits[i]&=mask;
@ -277,8 +290,8 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
UBool
BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0x7f) {
return (UBool)asciiBytes[c];
if((uint32_t)c<=0xff) {
return (UBool)latin1Contains[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
@ -314,8 +327,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span
do {
c=*s;
if(c<=0x7f) {
if(!asciiBytes[c]) {
if(c<=0xff) {
if(!latin1Contains[c]) {
break;
}
} else if(c<=0x7ff) {
@ -354,8 +367,8 @@ BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition
// span not
do {
c=*s;
if(c<=0x7f) {
if(asciiBytes[c]) {
if(c<=0xff) {
if(latin1Contains[c]) {
break;
}
} else if(c<=0x7ff) {
@ -403,8 +416,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(!asciiBytes[c]) {
if(c<=0xff) {
if(!latin1Contains[c]) {
break;
}
} else if(c<=0x7ff) {
@ -446,8 +459,8 @@ BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondi
// span not
for(;;) {
c=*(--limit);
if(c<=0x7f) {
if(asciiBytes[c]) {
if(c<=0xff) {
if(latin1Contains[c]) {
break;
}
} else if(c<=0x7ff) {
@ -497,22 +510,22 @@ const uint8_t *
BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const {
const uint8_t *limit=s+length;
uint8_t b=*s;
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
// Initial all-ASCII span.
if(spanCondition) {
do {
if(!asciiBytes[b] || ++s==limit) {
if(!latin1Contains[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
} while(U8_IS_SINGLE(b));
} else {
do {
if(asciiBytes[b] || ++s==limit) {
if(latin1Contains[b] || ++s==limit) {
return s;
}
b=*s;
} while((int8_t)b>=0);
} while(U8_IS_SINGLE(b));
}
length=(int32_t)(limit-s);
}
@ -540,20 +553,20 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// single trail byte, check for preceding 3- or 4-byte lead byte
if(length>=2 && (b=*(limit-2))>=0xe0) {
limit-=2;
if(asciiBytes[0x80]!=spanCondition) {
if(containsFFFD!=spanCondition) {
limit0=limit;
}
} else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) {
// 4-byte lead byte with only two trail bytes
limit-=3;
if(asciiBytes[0x80]!=spanCondition) {
if(containsFFFD!=spanCondition) {
limit0=limit;
}
}
} else {
// lead byte with no trail bytes
--limit;
if(asciiBytes[0x80]!=spanCondition) {
if(containsFFFD!=spanCondition) {
limit0=limit;
}
}
@ -563,26 +576,26 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
while(s<limit) {
b=*s;
if(b<0xc0) {
// ASCII; or trail bytes with the result of contains(FFFD).
if(U8_IS_SINGLE(b)) {
// ASCII
if(spanCondition) {
do {
if(!asciiBytes[b]) {
if(!latin1Contains[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
} while(U8_IS_SINGLE(b));
} else {
do {
if(asciiBytes[b]) {
if(latin1Contains[b]) {
return s;
} else if(++s==limit) {
return limit0;
}
b=*s;
} while(b<0xc0);
} while(U8_IS_SINGLE(b));
}
}
++s; // Advance past the lead byte.
@ -619,7 +632,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
asciiBytes[0x80]
containsFFFD
) != spanCondition
) {
return s-1;
@ -627,8 +640,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
s+=3;
continue;
}
} else /* 0xc0<=b<0xe0 */ {
} else {
if( /* handle U+0000..U+07FF inline */
b>=0xc0 &&
(t1=(uint8_t)(*s-0x80)) <= 0x3f
) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
@ -642,7 +656,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
// Give an illegal sequence the same value as the result of contains(FFFD).
// Handle each byte of an illegal sequence separately to simplify the code;
// no need to optimize error handling.
if(asciiBytes[0x80]!=spanCondition) {
if(containsFFFD!=spanCondition) {
return s-1;
}
}
@ -667,26 +681,26 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
do {
b=s[--length];
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
// ASCII sub-span
if(spanCondition) {
do {
if(!asciiBytes[b]) {
if(!latin1Contains[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
} while(U8_IS_SINGLE(b));
} else {
do {
if(asciiBytes[b]) {
if(latin1Contains[b]) {
return length+1;
} else if(length==0) {
return 0;
}
b=s[--length];
} while((int8_t)b>=0);
} while(U8_IS_SINGLE(b));
}
}

Просмотреть файл

@ -28,11 +28,12 @@ U_NAMESPACE_BEGIN
* Helper class for frozen UnicodeSets, implements contains() and span()
* optimized for BMP code points. Structured to be UTF-8-friendly.
*
* ASCII: Look up bytes.
* Latin-1: Look up bytes.
* 2-byte characters: Bits organized vertically.
* 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
* with mixed for illegal ranges.
* Supplementary characters: Call contains() on the parent set.
* Supplementary characters: Binary search over
* the supplementary part of the parent set's inversion list.
*/
class BMPSet : public UMemory {
public:
@ -96,12 +97,12 @@ private:
inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
/*
* One byte per ASCII character, or trail byte in lead position.
* 0 or 1 for ASCII characters.
* The value for trail bytes is the result of contains(FFFD)
* for faster validity checking at runtime.
* One byte 0 or 1 per Latin-1 character.
*/
UBool asciiBytes[0xc0];
UBool latin1Contains[0x100];
/* TRUE if contains(U+FFFD). */
UBool containsFFFD;
/*
* One bit per code point from U+0000..U+07FF.

Просмотреть файл

@ -11,9 +11,6 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/chariter.h"
@ -24,6 +21,10 @@
#include "unicode/uscript.h"
#include "unicode/ucharstrie.h"
#include "unicode/bytestrie.h"
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "mutex.h"
@ -80,23 +81,15 @@ UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &/*foundBreaks*/ ) const {
int32_t /* startPos */,
int32_t endPos,
int32_t breakType,
UVector32 &/*foundBreaks*/ ) const {
if (breakType >= 0 && breakType < UPRV_LENGTHOF(fHandled)) {
UChar32 c = utext_current32(text);
if (reverse) {
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
c = utext_previous32(text);
}
}
else {
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
}
return 0;

Просмотреть файл

@ -19,6 +19,7 @@ U_NAMESPACE_BEGIN
class UnicodeSet;
class UStack;
class UVector32;
class DictionaryMatcher;
/*******************************************************************
@ -67,18 +68,15 @@ class LanguageBreakEngine : public UMemory {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @param foundBreaks A Vector of int32_t to receive the breaks.
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const = 0;
UVector32 &foundBreaks ) const = 0;
};
@ -192,8 +190,6 @@ class UnhandledEngine : public LanguageBreakEngine {
* is capable of handling.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @return The number of breaks found.
@ -201,9 +197,8 @@ class UnhandledEngine : public LanguageBreakEngine {
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
/**
* <p>Tell the engine to handle a particular character and break type.</p>

Просмотреть файл

@ -195,7 +195,7 @@ BreakIterator::getAvailableLocales(int32_t& count)
// ------------------------------------------
//
// Default constructor and destructor
// Constructors, destructor and assignment operator
//
//-------------------------------------------
@ -204,6 +204,19 @@ BreakIterator::BreakIterator()
*validLocale = *actualLocale = 0;
}
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
}
return *this;
}
BreakIterator::~BreakIterator()
{
}
@ -265,7 +278,7 @@ ICUBreakIteratorService::~ICUBreakIteratorService() {}
// defined in ucln_cmn.h
U_NAMESPACE_END
static icu::UInitOnce gInitOnce;
static icu::UInitOnce gInitOnceBrkiter;
static icu::ICULocaleService* gService = NULL;
@ -280,7 +293,7 @@ static UBool U_CALLCONV breakiterator_cleanup(void) {
delete gService;
gService = NULL;
}
gInitOnce.reset();
gInitOnceBrkiter.reset();
#endif
return TRUE;
}
@ -296,7 +309,7 @@ initService(void) {
static ICULocaleService*
getService(void)
{
umtx_initOnce(gInitOnce, &initService);
umtx_initOnce(gInitOnceBrkiter, &initService);
return gService;
}
@ -306,7 +319,7 @@ getService(void)
static inline UBool
hasService(void)
{
return !gInitOnce.isReset() && getService() != NULL;
return !gInitOnceBrkiter.isReset() && getService() != NULL;
}
// -------------------------------------

Просмотреть файл

@ -0,0 +1,123 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.cpp
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
UBool
ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
char scratch[200];
int32_t s8Length = 0;
for (int32_t i = 0; i < s16Length;) {
int32_t capacity;
int32_t desiredCapacity = s16Length - i;
if (desiredCapacity < (INT32_MAX / 3)) {
desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
} else if (desiredCapacity < (INT32_MAX / 2)) {
desiredCapacity *= 2;
} else {
desiredCapacity = INT32_MAX;
}
char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
scratch, UPRV_LENGTHOF(scratch), &capacity);
capacity -= U8_MAX_LENGTH - 1;
int32_t j = 0;
for (; i < s16Length && j < capacity;) {
UChar32 c;
U16_NEXT_UNSAFE(s16, i, c);
U8_APPEND_UNSAFE(buffer, j, c);
}
if (j > (INT32_MAX - s8Length)) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
sink.Append(buffer, j);
s8Length += j;
}
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
return TRUE;
}
UBool
ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
}
void
ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
char s8[U8_MAX_LENGTH];
int32_t s8Length = 0;
U8_APPEND_UNSAFE(s8, s8Length, c);
if (edits != nullptr) {
edits->addReplace(length, s8Length);
}
sink.Append(s8, s8Length);
}
namespace {
// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
} // namespace
void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
sink.Append(s8, 2);
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if (length > 0) {
if (edits != nullptr) {
edits->addUnchanged(length);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(reinterpret_cast<const char *>(s), length);
}
}
return TRUE;
}
UBool
ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((limit - s) > INT32_MAX) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
return appendUnchanged(s, (int32_t)(limit - s), sink, options, edits, errorCode);
}
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,53 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// bytesinkutil.h
// created: 2017sep14 Markus W. Scherer
#include "unicode/utypes.h"
#include "unicode/bytestream.h"
#include "unicode/edits.h"
#include "cmemory.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
class ByteSink;
class Edits;
class U_COMMON_API ByteSinkUtil {
public:
ByteSinkUtil() = delete; // all static
/** (length) bytes were mapped to valid (s16, s16Length). */
static UBool appendChange(int32_t length,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */
static UBool appendChange(const uint8_t *s, const uint8_t *limit,
const char16_t *s16, int32_t s16Length,
ByteSink &sink, Edits *edits, UErrorCode &errorCode);
/** (length) bytes were mapped/changed to valid code point c. */
static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr);
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
}
/** Append the two-byte character (U+0080..U+07FF). */
static void appendTwoBytes(UChar32 c, ByteSink &sink);
static UBool appendUnchanged(const uint8_t *s, int32_t length,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit,
ByteSink &sink, uint32_t options, Edits *edits,
UErrorCode &errorCode);
};
U_NAMESPACE_END

Просмотреть файл

@ -45,6 +45,12 @@ void CheckedArrayByteSink::Append(const char* bytes, int32_t n) {
if (n <= 0) {
return;
}
if (n > (INT32_MAX - appended_)) {
// TODO: Report as integer overflow, not merely buffer overflow.
appended_ = INT32_MAX;
overflowed_ = TRUE;
return;
}
appended_ += n;
int32_t available = capacity_ - size_;
if (n > available) {

Просмотреть файл

@ -405,7 +405,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
//String[] finalResult = new String[result.size()];
UnicodeString *finalResult = NULL;
int32_t resultCount;
if((resultCount = result.count())) {
if((resultCount = result.count()) != 0) {
finalResult = new UnicodeString[resultCount];
if (finalResult == 0) {
status = U_MEMORY_ALLOCATION_ERROR;

Просмотреть файл

@ -162,7 +162,6 @@ public:
* @param p simple pointer to an array of T items that is adopted
*/
explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
@ -170,14 +169,12 @@ public:
LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
#endif
/**
* Destructor deletes the memory it owns.
*/
~LocalMemory() {
uprv_free(LocalPointerBase<T>::ptr);
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@ -187,7 +184,6 @@ public:
LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
#endif
/**
* Move assignment, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@ -312,6 +308,14 @@ public:
* Default constructor initializes with internal T[stackCapacity] buffer.
*/
MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {}
/**
* Automatically allocates the heap array if the argument is larger than the stack capacity.
* Intended for use when an approximate capacity is known at compile time but the true
* capacity is not known until runtime.
*/
MaybeStackArray(int32_t newCapacity) : MaybeStackArray() {
if (capacity < newCapacity) { resize(newCapacity); }
};
/**
* Destructor deletes the array (if owned).
*/

Просмотреть файл

@ -20,6 +20,7 @@
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid>
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@ -109,7 +110,7 @@
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<OutputFile>..\..\bin\icuuc59.dll</OutputFile>
<OutputFile>..\..\bin\icuuc60.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -151,7 +152,7 @@
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<OutputFile>..\..\bin\icuuc59d.dll</OutputFile>
<OutputFile>..\..\bin\icuuc60d.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile>
@ -190,7 +191,7 @@
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<OutputFile>..\..\bin64\icuuc59.dll</OutputFile>
<OutputFile>..\..\bin64\icuuc60.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -230,7 +231,7 @@
<Culture>0x0409</Culture>
</ResourceCompile>
<Link>
<OutputFile>..\..\bin64\icuuc59d.dll</OutputFile>
<OutputFile>..\..\bin64\icuuc60d.dll</OutputFile>
<SuppressStartupBanner>true</SuppressStartupBanner>
<GenerateDebugInformation>true</GenerateDebugInformation>
<ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile>
@ -267,6 +268,8 @@
</ClCompile>
<ClCompile Include="rbbitblb.cpp">
</ClCompile>
<ClCompile Include="rbbi_cache.cpp">
</ClCompile>
<ClCompile Include="dictionarydata.cpp" />
<ClCompile Include="ubrk.cpp" />
<ClCompile Include="ucol_swp.cpp">
@ -444,6 +447,7 @@
</ClCompile>
<ClCompile Include="usprep.cpp" />
<ClCompile Include="appendable.cpp" />
<ClCompile Include="bytesinkutil.cpp" />
<ClCompile Include="bytestream.cpp" />
<ClCompile Include="bytestrie.cpp" />
<ClCompile Include="bytestriebuilder.cpp" />
@ -571,6 +575,7 @@
<ClInclude Include="rbbiscan.h" />
<ClInclude Include="rbbisetb.h" />
<ClInclude Include="rbbitblb.h" />
<ClInclude Include="rbbi_cache.h" />
<ClInclude Include="dictionarydata.h" />
<CustomBuild Include="unicode\ubrk.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
@ -1477,6 +1482,7 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="bytesinkutil.h" />
<CustomBuild Include="unicode\bytestream.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>

Просмотреть файл

@ -97,6 +97,9 @@
<ClCompile Include="rbbitblb.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="rbbi_cache.cpp">
<Filter>break iteration</Filter>
</ClCompile>
<ClCompile Include="ubrk.cpp">
<Filter>break iteration</Filter>
</ClCompile>
@ -460,6 +463,9 @@
<ClCompile Include="usprep.cpp">
<Filter>sprep</Filter>
</ClCompile>
<ClCompile Include="bytesinkutil.cpp">
<Filter>strings</Filter>
</ClCompile>
<ClCompile Include="bytestream.cpp">
<Filter>strings</Filter>
</ClCompile>
@ -636,6 +642,9 @@
<ClInclude Include="rbbitblb.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="rbbi_cache.h">
<Filter>break iteration</Filter>
</ClInclude>
<ClInclude Include="ubrkimpl.h">
<Filter>break iteration</Filter>
</ClInclude>
@ -861,6 +870,9 @@
<ClInclude Include="sprpimpl.h">
<Filter>sprep</Filter>
</ClInclude>
<ClInclude Include="bytesinkutil.h">
<Filter>strings</Filter>
</ClInclude>
<ClInclude Include="charstr.h">
<Filter>strings</Filter>
</ClInclude>

Просмотреть файл

@ -70,15 +70,17 @@
<LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup>
<!-- Options that are common to *all* configurations -->
<Midl>
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MkTypLibCompatible>true</MkTypLibCompatible>
<SuppressStartupBanner>true</SuppressStartupBanner>
</Midl>
<ClCompile>
<AdditionalIncludeDirectories>..\..\include;..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<!-- U_DISABLE_RENAMING -->
<!-- U_HIDE_DRAFT_API & U_HIDE_DEPRECATED_API -->
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;U_ATTRIBUTE_DEPRECATED=;_CRT_SECURE_NO_DEPRECATE;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;U_ATTRIBUTE_DEPRECATED=;_CRT_SECURE_NO_DEPRECATE;U_COMMON_IMPLEMENTATION;U_PLATFORM_USES_ONLY_WIN32_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<ExceptionHandling>
</ExceptionHandling>
@ -93,7 +95,7 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>U_DISABLE_RENAMING=1;U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>U_PLATFORM_HAS_WINUWP_API=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Culture>0x0409</Culture>
<AdditionalIncludeDirectories>../common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
@ -108,6 +110,7 @@
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<!-- Options that are common to all 'Release' configurations -->
<Midl>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
@ -120,9 +123,11 @@
</ResourceCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<!-- Options that are common to all 'Debug' configurations -->
<Midl>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
@ -137,8 +142,13 @@
<ResourceCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ResourceCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<!-- Options that are common to all 32-bit configurations -->
<Midl>
<TargetEnvironment>Win32</TargetEnvironment>
</Midl>
@ -147,6 +157,7 @@
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
<!-- Options that are common to all 64-bit configurations -->
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
@ -158,6 +169,7 @@
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='ARM'">
<!-- Options that are common to all ARM configurations -->
<Midl>
<TargetEnvironment>ARM</TargetEnvironment>
</Midl>
@ -168,6 +180,7 @@
<TargetMachine>MachineARM</TargetMachine>
</Link>
</ItemDefinitionGroup>
<!-- Options that are specific to a particular configuration -->
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Midl>
<TypeLibraryName>.\..\..\lib32uwp\icuuc.tlb</TypeLibraryName>
@ -179,10 +192,9 @@
<ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuuc.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@ -196,10 +208,9 @@
<ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin32uwp\icuucd.dll</OutputFile>
<OutputFile>..\..\bin32uwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -213,10 +224,9 @@
<ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuuc.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -230,10 +240,9 @@
<ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\bin64uwp\icuucd.dll</OutputFile>
<OutputFile>..\..\bin64uwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
@ -247,10 +256,9 @@
<ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuuc.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc60.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
<AdditionalDependencies>vccorlib.lib;WindowsApp.lib;msvcrt.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
@ -264,10 +272,9 @@
<ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
</ClCompile>
<Link>
<OutputFile>..\..\binARMuwp\icuucd.dll</OutputFile>
<OutputFile>..\..\binARMuwp\icuuc60d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
<AdditionalDependencies>vccorlibd.lib;WindowsApp.lib;msvcrtd.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
@ -280,25 +287,19 @@
<ClCompile Include="ubidiwrt.cpp" />
<ClCompile Include="uloc_keytype.cpp" />
<ClCompile Include="ushape.cpp" />
<ClCompile Include="brkeng.cpp">
</ClCompile>
<ClCompile Include="brkiter.cpp">
</ClCompile>
<ClCompile Include="brkeng.cpp" />
<ClCompile Include="brkiter.cpp" />
<ClCompile Include="dictbe.cpp" />
<ClCompile Include="pluralmap.cpp" />
<ClCompile Include="rbbi.cpp">
</ClCompile>
<ClCompile Include="rbbidata.cpp">
</ClCompile>
<ClCompile Include="rbbi.cpp" />
<ClCompile Include="rbbidata.cpp" />
<ClCompile Include="rbbinode.cpp" />
<ClCompile Include="rbbirb.cpp">
</ClCompile>
<ClCompile Include="rbbirb.cpp" />
<ClCompile Include="rbbiscan.cpp" />
<ClCompile Include="rbbisetb.cpp" />
<ClCompile Include="rbbistbl.cpp">
</ClCompile>
<ClCompile Include="rbbitblb.cpp">
</ClCompile>
<ClCompile Include="rbbistbl.cpp" />
<ClCompile Include="rbbitblb.cpp" />
<ClCompile Include="rbbi_cache.cpp" />
<ClCompile Include="dictionarydata.cpp" />
<ClCompile Include="ubrk.cpp" />
<ClCompile Include="ucol_swp.cpp">
@ -320,14 +321,12 @@
<ClCompile Include="uvectr64.cpp" />
<ClCompile Include="errorcode.cpp" />
<ClCompile Include="icudataver.cpp" />
<ClCompile Include="locmap.cpp">
</ClCompile>
<ClCompile Include="locmap.cpp" />
<ClCompile Include="putil.cpp">
<CompileAsWinRT>true</CompileAsWinRT>
</ClCompile>
<ClCompile Include="umath.cpp" />
<ClCompile Include="umutex.cpp">
</ClCompile>
<ClCompile Include="umutex.cpp" />
<ClCompile Include="utrace.cpp" />
<ClCompile Include="utypes.cpp" />
<ClCompile Include="wintz.cpp">
@ -335,15 +334,13 @@
</ClCompile>
<ClCompile Include="ucnv.cpp" />
<ClCompile Include="ucnv2022.cpp" />
<ClCompile Include="ucnv_bld.cpp">
</ClCompile>
<ClCompile Include="ucnv_bld.cpp" />
<ClCompile Include="ucnv_cb.cpp" />
<ClCompile Include="ucnv_cnv.cpp" />
<ClCompile Include="ucnv_ct.cpp" />
<ClCompile Include="ucnv_err.cpp" />
<ClCompile Include="ucnv_ext.cpp" />
<ClCompile Include="ucnv_io.cpp">
</ClCompile>
<ClCompile Include="ucnv_io.cpp" />
<ClCompile Include="ucnv_lmb.cpp" />
<ClCompile Include="ucnv_set.cpp" />
<ClCompile Include="ucnv_u16.cpp" />
@ -357,19 +354,15 @@
<ClCompile Include="ucnvlat1.cpp" />
<ClCompile Include="ucnvmbcs.cpp" />
<ClCompile Include="ucnvscsu.cpp" />
<ClCompile Include="ucnvsel.cpp">
</ClCompile>
<ClCompile Include="ucnvsel.cpp" />
<ClCompile Include="cmemory.cpp" />
<ClCompile Include="ucln_cmn.cpp">
</ClCompile>
<ClCompile Include="ucln_cmn.cpp" />
<ClCompile Include="ucmndata.cpp" />
<ClCompile Include="udata.cpp" />
<ClCompile Include="udatamem.cpp" />
<ClCompile Include="udataswp.cpp" />
<ClCompile Include="uinit.cpp">
</ClCompile>
<ClCompile Include="umapfile.cpp">
</ClCompile>
<ClCompile Include="uinit.cpp" />
<ClCompile Include="umapfile.cpp" />
<ClCompile Include="uobject.cpp" />
<ClCompile Include="dtintrv.cpp" />
<ClCompile Include="parsepos.cpp" />
@ -379,19 +372,15 @@
<ClCompile Include="punycode.cpp" />
<ClCompile Include="uidna.cpp" />
<ClCompile Include="uts46.cpp" />
<ClCompile Include="locavailable.cpp">
</ClCompile>
<ClCompile Include="locavailable.cpp" />
<ClCompile Include="locbased.cpp" />
<ClCompile Include="locdispnames.cpp" />
<ClCompile Include="locdspnm.cpp" />
<ClCompile Include="locid.cpp">
</ClCompile>
<ClCompile Include="locid.cpp" />
<ClCompile Include="loclikely.cpp" />
<ClCompile Include="locresdata.cpp" />
<ClCompile Include="locutil.cpp">
</ClCompile>
<ClCompile Include="resbund.cpp">
</ClCompile>
<ClCompile Include="locutil.cpp" />
<ClCompile Include="resbund.cpp" />
<ClCompile Include="resbund_cnv.cpp" />
<ClCompile Include="ucat.cpp" />
<ClCompile Include="uloc.cpp" />
@ -401,27 +390,22 @@
<ClCompile Include="uresdata.cpp" />
<ClCompile Include="resource.cpp" />
<ClCompile Include="ucurr.cpp" />
<ClCompile Include="caniter.cpp">
</ClCompile>
<ClCompile Include="caniter.cpp" />
<ClCompile Include="filterednormalizer2.cpp" />
<ClCompile Include="loadednormalizer2impl.cpp" />
<ClCompile Include="normalizer2.cpp" />
<ClCompile Include="normalizer2impl.cpp" />
<ClCompile Include="normlzr.cpp">
</ClCompile>
<ClCompile Include="normlzr.cpp" />
<ClCompile Include="unorm.cpp" />
<ClCompile Include="unormcmp.cpp" />
<ClCompile Include="bmpset.cpp" />
<ClCompile Include="patternprops.cpp" />
<ClCompile Include="propname.cpp">
</ClCompile>
<ClCompile Include="propname.cpp" />
<ClCompile Include="ruleiter.cpp" />
<ClCompile Include="ucase.cpp">
</ClCompile>
<ClCompile Include="ucase.cpp" />
<ClCompile Include="uchar.cpp" />
<ClCompile Include="unames.cpp" />
<ClCompile Include="unifiedcache.cpp">
</ClCompile>
<ClCompile Include="unifiedcache.cpp" />
<ClCompile Include="unifilt.cpp" />
<ClCompile Include="unifunct.cpp" />
<ClCompile Include="uniset.cpp" />
@ -436,22 +420,16 @@
<ClCompile Include="uset_props.cpp" />
<ClCompile Include="usetiter.cpp" />
<ClCompile Include="icuplug.cpp" />
<ClCompile Include="serv.cpp">
</ClCompile>
<ClCompile Include="servlk.cpp">
</ClCompile>
<ClCompile Include="servlkf.cpp">
</ClCompile>
<ClCompile Include="servls.cpp">
</ClCompile>
<ClCompile Include="servnotf.cpp">
</ClCompile>
<ClCompile Include="servrbf.cpp">
</ClCompile>
<ClCompile Include="servslkf.cpp">
</ClCompile>
<ClCompile Include="serv.cpp" />
<ClCompile Include="servlk.cpp" />
<ClCompile Include="servlkf.cpp" />
<ClCompile Include="servls.cpp" />
<ClCompile Include="servnotf.cpp" />
<ClCompile Include="servrbf.cpp" />
<ClCompile Include="servslkf.cpp" />
<ClCompile Include="usprep.cpp" />
<ClCompile Include="appendable.cpp" />
<ClCompile Include="bytesinkutil.cpp" />
<ClCompile Include="bytestream.cpp" />
<ClCompile Include="bytestrie.cpp" />
<ClCompile Include="bytestriebuilder.cpp" />
@ -494,8 +472,7 @@
</ItemGroup>
<ItemGroup>
<CustomBuild Include="unicode\ubidi.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="localsvc.h" />
@ -529,6 +506,7 @@
<ClInclude Include="rbbiscan.h" />
<ClInclude Include="rbbisetb.h" />
<ClInclude Include="rbbitblb.h" />
<ClInclude Include="rbbi_cache.h" />
<ClInclude Include="dictionarydata.h" />
<CustomBuild Include="unicode\ubrk.h">
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
@ -599,65 +577,54 @@
</CustomBuild>
<ClInclude Include="putilimp.h" />
<CustomBuild Include="unicode\std_string.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uassert.h" />
<CustomBuild Include="unicode\uconfig.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\umachine.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="umutex.h" />
<ClInclude Include="uposixdefs.h" />
<CustomBuild Include="unicode\urename.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utrace.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="utracimp.h" />
<CustomBuild Include="unicode\utypes.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uvernum.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uversion.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="wintz.h" />
<CustomBuild Include="unicode\ucnv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_bld.h" />
<CustomBuild Include="unicode\ucnv_cb.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_cnv.h" />
<CustomBuild Include="unicode\ucnv_err.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucnv_ext.h" />
@ -665,19 +632,16 @@
<ClInclude Include="ucnv_io.h" />
<ClInclude Include="ucnvmbcs.h" />
<CustomBuild Include="unicode\ucnvsel.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="cmemory.h" />
<CustomBuild Include="unicode\localpointer.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uclean.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucln.h" />
@ -685,99 +649,82 @@
<ClInclude Include="ucln_imp.h" />
<ClInclude Include="ucmndata.h" />
<CustomBuild Include="unicode\udata.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="udatamem.h" />
<ClInclude Include="udataswp.h" />
<ClInclude Include="umapfile.h" />
<CustomBuild Include="unicode\uobject.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\dtintrv.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\parseerr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\parsepos.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\umisc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ustrfmt.h" />
<ClInclude Include="util.h" />
<CustomBuild Include="unicode\idna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="punycode.h" />
<CustomBuild Include="unicode\uidna.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="locbased.h" />
<CustomBuild Include="unicode\locid.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="locutil.h" />
<CustomBuild Include="unicode\resbund.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="sharedobject.h" />
<ClCompile Include="sharedobject.cpp" />
<CustomBuild Include="unicode\locdspnm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\simpleformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucat.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\udisplaycontext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uldnames.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uloc.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ulocimp.h" />
<CustomBuild Include="unicode\ures.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unifiedcache.h" />
@ -786,36 +733,30 @@
<ClInclude Include="ureslocs.h" />
<ClInclude Include="resource.h" />
<CustomBuild Include="unicode\ucurr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucurrimp.h" />
<CustomBuild Include="unicode\caniter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="norm2allmodes.h" />
<CustomBuild Include="unicode\normalizer2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="normalizer2impl.h" />
<CustomBuild Include="unicode\normlzr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unorm.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unorm2.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unormimp.h" />
@ -825,58 +766,48 @@
<ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" />
<CustomBuild Include="unicode\symtable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ucase.h" />
<CustomBuild Include="unicode\uchar.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unifilt.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unifunct.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unimatch.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uniset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="unisetspan.h" />
<ClInclude Include="uprops.h" />
<ClInclude Include="usc_impl.h" />
<CustomBuild Include="unicode\uscript.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uset.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uset_imp.h" />
<CustomBuild Include="unicode\usetiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\icuplug.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="icuplugimp.h" />
@ -885,33 +816,28 @@
<ClInclude Include="servnotf.h" />
<ClInclude Include="sprpimpl.h" />
<CustomBuild Include="unicode\usprep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\appendable.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="bytesinkutil.h" />
<CustomBuild Include="unicode\bytestream.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\bytestrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\bytestriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\chariter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="charstr.h" />
@ -919,116 +845,94 @@
<ClInclude Include="cstr.h" />
<ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\messagepattern.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\rep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\schriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\stringpiece.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\stringtriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucasemap.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucharstrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ucharstriebuilder.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\uchriter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="uinvchar.h" />
<CustomBuild Include="unicode\uiter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\unistr.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\urep.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="ustr_cnv.h" />
<ClInclude Include="ustr_imp.h" />
<CustomBuild Include="unicode\ustring.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ustringtrie.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utext.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf16.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf32.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf8.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\utf_old.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\listformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\ulistformatter.h">
<Command>copy "%(FullPath)" ..\..\include\unicode
</Command>
<Command>copy "%(FullPath)" ..\..\include\unicode</Command>
<Outputs>..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
</ItemGroup>

Просмотреть файл

@ -46,9 +46,9 @@ int32_t
DictionaryBreakEngine::findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
(void)startPos; // TODO: remove this param?
int32_t result = 0;
// Find the span of characters included in the set.
@ -60,34 +60,12 @@ DictionaryBreakEngine::findBreaks( UText *text,
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
if (reverse) {
UBool isDict = fSet.contains(c);
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
c = utext_previous32(text);
isDict = fSet.contains(c);
}
if (current < startPos) {
rangeStart = startPos;
} else {
rangeStart = current;
if (!isDict) {
utext_next32(text);
rangeStart = (int32_t)utext_getNativeIndex(text);
}
}
// rangeEnd = start + 1;
utext_setNativeIndex(text, start);
utext_next32(text);
rangeEnd = (int32_t)utext_getNativeIndex(text);
}
else {
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
rangeStart = start;
rangeEnd = current;
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
utext_setNativeIndex(text, current);
@ -248,7 +226,7 @@ int32_t
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
utext_setNativeIndex(text, rangeStart);
utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
if (utext_getNativeIndex(text) >= rangeEnd) {
@ -487,7 +465,7 @@ int32_t
LaoBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -680,7 +658,7 @@ int32_t
BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -885,7 +863,7 @@ int32_t
KhmerBreakEngine::divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
return 0; // Not enough characters for two words
}
@ -1110,9 +1088,9 @@ static inline uint32_t getKatakanaCost(int32_t wordLength){
return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength];
}
static inline bool isKatakana(uint16_t value) {
return (value >= 0x30A1u && value <= 0x30FEu && value != 0x30FBu) ||
(value >= 0xFF66u && value <= 0xFF9fu);
static inline bool isKatakana(UChar32 value) {
return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
(value >= 0xFF66 && value <= 0xFF9f);
}
@ -1128,14 +1106,14 @@ static inline int32_t utext_i32_flag(int32_t bitIndex) {
* @param text A UText representing the text
* @param rangeStart The start of the range of dictionary characters
* @param rangeEnd The end of the range of dictionary characters
* @param foundBreaks Output of C array of int32_t break positions, or 0
* @param foundBreaks vector<int32> to receive the break positions
* @return The number of breaks found
*/
int32_t
CjkBreakEngine::divideUpDictionaryRange( UText *inText,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const {
UVector32 &foundBreaks ) const {
if (rangeStart >= rangeEnd) {
return 0;
}
@ -1405,6 +1383,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
prevCPPos = cpPos;
prevUTextPos = utextPos;
}
(void)prevCPPos; // suppress compiler warnings about unused variable
// inString goes out of scope
// inputMap goes out of scope

Просмотреть файл

@ -15,6 +15,7 @@
#include "unicode/utext.h"
#include "brkeng.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
@ -84,21 +85,18 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
*
* @param text A UText representing the text. The iterator is left at
* the end of the run of characters which the engine is capable of handling
* that starts from the first (or last) character in the range.
* that starts from the first character in the range.
* @param startPos The start of the run within the supplied text.
* @param endPos The end of the run within the supplied text.
* @param reverse Whether the caller is looking for breaks in a reverse
* direction.
* @param breakType The type of break desired, or -1.
* @param foundBreaks An allocated C array of the breaks found, if any
* @param foundBreaks vector of int32_t to receive the break positions
* @return The number of breaks found.
*/
virtual int32_t findBreaks( UText *text,
int32_t startPos,
int32_t endPos,
UBool reverse,
int32_t breakType,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
protected:
@ -128,7 +126,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const = 0;
UVector32 &foundBreaks ) const = 0;
};
@ -185,7 +183,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
};
@ -241,7 +239,7 @@ class LaoBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
};
@ -297,7 +295,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
};
@ -353,7 +351,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
};
@ -417,7 +415,7 @@ class CjkBreakEngine : public DictionaryBreakEngine {
virtual int32_t divideUpDictionaryRange( UText *text,
int32_t rangeStart,
int32_t rangeEnd,
UStack &foundBreaks ) const;
UVector32 &foundBreaks ) const;
};

Просмотреть файл

@ -17,10 +17,10 @@ namespace {
const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
// 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
// No length change.
const int32_t MAX_SHORT_WIDTH = 6;
const int32_t MAX_SHORT_CHANGE_LENGTH = 0xfff;
// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
const int32_t MAX_SHORT_CHANGE = 0x6fff;
// 0111mmmmmmnnnnnn records a replacement of m text units with n.
@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
} // namespace
Edits::~Edits() {
if(array != stackArray) {
void Edits::releaseArray() U_NOEXCEPT {
if (array != stackArray) {
uprv_free(array);
}
}
void Edits::reset() {
length = delta = 0;
Edits &Edits::copyArray(const Edits &other) {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
if (length > capacity) {
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
if (newArray == nullptr) {
length = delta = numChanges = 0;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
releaseArray();
array = newArray;
capacity = length;
}
if (length > 0) {
uprv_memcpy(array, other.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
if (U_FAILURE(errorCode_)) {
length = delta = numChanges = 0;
return *this;
}
releaseArray();
if (length > STACK_CAPACITY) {
array = src.array;
capacity = src.capacity;
src.array = src.stackArray;
src.capacity = STACK_CAPACITY;
src.reset();
return *this;
}
array = stackArray;
capacity = STACK_CAPACITY;
if (length > 0) {
uprv_memcpy(array, src.array, (size_t)length * 2);
}
return *this;
}
Edits &Edits::operator=(const Edits &other) {
length = other.length;
delta = other.delta;
numChanges = other.numChanges;
errorCode_ = other.errorCode_;
return copyArray(other);
}
Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
length = src.length;
delta = src.delta;
numChanges = src.numChanges;
errorCode_ = src.errorCode_;
return moveArray(src);
}
Edits::~Edits() {
releaseArray();
}
void Edits::reset() U_NOEXCEPT {
length = delta = numChanges = 0;
errorCode_ = U_ZERO_ERROR;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
@ -72,38 +137,41 @@ void Edits::addUnchanged(int32_t unchangedLength) {
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
if(U_FAILURE(errorCode)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
setLastUnit(last + 1);
return;
}
append(oldLength << 12);
return;
}
if(U_FAILURE(errorCode_)) { return; }
if(oldLength < 0 || newLength < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
return;
}
++numChanges;
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
}
if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
// Merge into previous same-lengths short-replacement record, if any.
int32_t u = (oldLength << 12) | (newLength << 9);
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last & ~SHORT_CHANGE_NUM_MASK) == u &&
(last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
setLastUnit(last + 1);
return;
}
append(u);
return;
}
int32_t head = 0x7000;
if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
head |= oldLength << 6;
@ -149,7 +217,7 @@ UBool Edits::growArray() {
} else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
@ -158,18 +226,16 @@ UBool Edits::growArray() {
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
if (array != stackArray) {
uprv_free(array);
}
releaseArray();
array = newArray;
capacity = newCapacity;
return TRUE;
@ -177,27 +243,161 @@ UBool Edits::growArray() {
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
if (U_SUCCESS(errorCode)) { return FALSE; }
outErrorCode = errorCode;
if (U_SUCCESS(errorCode_)) { return FALSE; }
outErrorCode = errorCode_;
return TRUE;
}
UBool Edits::hasChanges() const {
if (delta != 0) {
return TRUE;
}
for (int32_t i = 0; i < length; ++i) {
if (array[i] > MAX_UNCHANGED) {
return TRUE;
Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
if (copyErrorTo(errorCode)) { return *this; }
// Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
// Parallel iteration over both Edits.
Iterator abIter = ab.getFineIterator();
Iterator bcIter = bc.getFineIterator();
UBool abHasNext = TRUE, bcHasNext = TRUE;
// Copy iterator state into local variables, so that we can modify and subdivide spans.
// ab old & new length, bc old & new length
int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
// When we have different-intermediate-length changes, we accumulate a larger change.
int32_t pending_aLength = 0, pending_cLength = 0;
for (;;) {
// At this point, for each of the two iterators:
// Either we are done with the locally cached current edit,
// and its intermediate-string length has been reset,
// or we will continue to work with a truncated remainder of this edit.
//
// If the current edit is done, and the iterator has not yet reached the end,
// then we fetch the next edit. This is true for at least one of the iterators.
//
// Normally it does not matter whether we fetch from ab and then bc or vice versa.
// However, the result is observably different when
// ab deletions meet bc insertions at the same intermediate-string index.
// Some users expect the bc insertions to come first, so we fetch from bc first.
if (bc_bLength == 0) {
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
bc_bLength = bcIter.oldLength();
cLength = bcIter.newLength();
if (bc_bLength == 0) {
// insertion
if (ab_bLength == 0 || !abIter.hasChange()) {
addReplace(pending_aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_cLength += cLength;
}
continue;
}
}
// else see if the other iterator is done, too.
}
if (ab_bLength == 0) {
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
aLength = abIter.oldLength();
ab_bLength = abIter.newLength();
if (ab_bLength == 0) {
// deletion
if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
addReplace(pending_aLength + aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
} else {
pending_aLength += aLength;
}
continue;
}
} else if (bc_bLength == 0) {
// Both iterators are done at the same time:
// The intermediate-string lengths match.
break;
} else {
// The ab output string is shorter than the bc input string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
}
if (bc_bLength == 0) {
// The bc input string is shorter than the ab output string.
if (!copyErrorTo(errorCode)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
// Done fetching: ab_bLength > 0 && bc_bLength > 0
// The current state has two parts:
// - Past: We accumulate a longer ac edit in the "pending" variables.
// - Current: We have copies of the current ab/bc edits in local variables.
// At least one side is newly fetched.
// One side might be a truncated remainder of an edit we fetched earlier.
if (!abIter.hasChange() && !bcIter.hasChange()) {
// An unchanged span all the way from string a to string c.
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
pending_aLength = pending_cLength = 0;
}
int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
addUnchanged(unchangedLength);
ab_bLength = aLength -= unchangedLength;
bc_bLength = cLength -= unchangedLength;
// At least one of the unchanged spans is now empty.
continue;
}
if (!abIter.hasChange() && bcIter.hasChange()) {
// Unchanged a->b but changed b->c.
if (ab_bLength >= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
aLength = ab_bLength -= bc_bLength;
bc_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else if (abIter.hasChange() && !bcIter.hasChange()) {
// Changed a->b and then unchanged b->c.
if (ab_bLength <= bc_bLength) {
// Split the longer unchanged span into change + remainder.
addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
pending_aLength = pending_cLength = 0;
cLength = bc_bLength -= ab_bLength;
ab_bLength = 0;
continue;
}
// Handle the shorter unchanged span below like a change.
} else { // both abIter.hasChange() && bcIter.hasChange()
if (ab_bLength == bc_bLength) {
// Changes on both sides up to the same position. Emit & reset.
addReplace(pending_aLength + aLength, pending_cLength + cLength);
pending_aLength = pending_cLength = 0;
ab_bLength = bc_bLength = 0;
continue;
}
}
// Accumulate the a->c change, reset the shorter side,
// keep a remainder of the longer one.
pending_aLength += aLength;
pending_cLength += cLength;
if (ab_bLength < bc_bLength) {
bc_bLength -= ab_bLength;
cLength = ab_bLength = 0;
} else { // ab_bLength > bc_bLength
ab_bLength -= bc_bLength;
aLength = bc_bLength = 0;
}
}
return FALSE;
if (pending_aLength != 0 || pending_cLength != 0) {
addReplace(pending_aLength, pending_cLength);
}
copyErrorTo(errorCode);
return *this;
}
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
int32_t Edits::Iterator::readLength(int32_t head) {
@ -219,7 +419,7 @@ int32_t Edits::Iterator::readLength(int32_t head) {
}
}
void Edits::Iterator::updateIndexes() {
void Edits::Iterator::updateNextIndexes() {
srcIndex += oldLength_;
if (changed) {
replIndex += newLength_;
@ -227,22 +427,52 @@ void Edits::Iterator::updateIndexes() {
destIndex += newLength_;
}
void Edits::Iterator::updatePreviousIndexes() {
srcIndex -= oldLength_;
if (changed) {
replIndex -= newLength_;
}
destIndex -= newLength_;
}
UBool Edits::Iterator::noNext() {
// No change beyond the string.
// No change before or beyond the string.
dir = 0;
changed = FALSE;
oldLength_ = newLength_ = 0;
return FALSE;
}
UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
// Forward iteration: Update the string indexes to the limit of the current span,
// and post-increment-read array units to assemble a new span.
// Leaves the array index one after the last unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of equal-length changes.
--remaining;
return TRUE;
if (dir > 0) {
updateNextIndexes();
} else {
if (dir < 0) {
// Turn around from previous() to next().
// Post-increment-read the same span again.
if (remaining > 0) {
// Fine-grained iterator:
// Stay on the current one of a sequence of compressed changes.
++index; // next() rests on the index after the sequence unit.
dir = 1;
return TRUE;
}
}
dir = 1;
}
if (remaining >= 1) {
// Fine-grained iterator: Continue a sequence of compressed changes.
if (remaining > 1) {
--remaining;
return TRUE;
}
remaining = 0;
}
if (index >= length) {
return noNext();
@ -258,7 +488,7 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
}
newLength_ = oldLength_;
if (onlyChanges) {
updateIndexes();
updateNextIndexes();
if (index >= length) {
return noNext();
}
@ -270,14 +500,19 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
oldLength_ = newLength_ = len * w;
oldLength_ = num * oldLen;
newLength_ = num * newLen;
} else {
// Split a sequence of equal-length changes that was compressed into one unit.
oldLength_ = newLength_ = u >> 12;
remaining = u & 0xfff;
// Split a sequence of changes that was compressed into one unit.
oldLength_ = oldLen;
newLength_ = newLen;
if (num > 1) {
remaining = num; // This is the first of two or more changes.
}
return TRUE;
}
} else {
@ -292,55 +527,250 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
while (index < length && (u = array[index]) > MAX_UNCHANGED) {
++index;
if (u <= MAX_SHORT_CHANGE) {
int32_t w = u >> 12;
int32_t len = (u & 0xfff) + 1;
len = len * w;
oldLength_ += len;
newLength_ += len;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
oldLength_ += (u >> 12) * num;
newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
} else {
U_ASSERT(u <= 0x7fff);
int32_t oldLen = readLength((u >> 6) & 0x3f);
int32_t newLen = readLength(u & 0x3f);
oldLength_ += oldLen;
newLength_ += newLen;
oldLength_ += readLength((u >> 6) & 0x3f);
newLength_ += readLength(u & 0x3f);
}
}
return TRUE;
}
UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
while (next(FALSE, errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// srcIndex is the start of the current span, before the remaining ones.
int32_t len = (remaining + 1) * oldLength_;
if (i < (srcIndex + len)) {
int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
len = n * oldLength_;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
UBool Edits::Iterator::previous(UErrorCode &errorCode) {
// Backward iteration: Pre-decrement-read array units to assemble a new span,
// then update the string indexes to the start of that span.
// Leaves the array index on the head unit of that span.
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
if (dir >= 0) {
if (dir > 0) {
// Turn around from next() to previous().
// Set the string indexes to the span limit and
// pre-decrement-read the same span again.
if (remaining > 0) {
// Fine-grained iterator:
// Stay on the current one of a sequence of compressed changes.
--index; // previous() rests on the sequence unit.
dir = -1;
return TRUE;
}
updateNextIndexes();
}
dir = -1;
}
if (remaining > 0) {
// Fine-grained iterator: Continue a sequence of compressed changes.
int32_t u = array[index];
U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
++remaining;
updatePreviousIndexes();
return TRUE;
}
remaining = 0;
}
if (index <= 0) {
return noNext();
}
int32_t u = array[--index];
if (u <= MAX_UNCHANGED) {
// Combine adjacent unchanged ranges.
changed = FALSE;
oldLength_ = u + 1;
while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
--index;
oldLength_ += u + 1;
}
newLength_ = oldLength_;
// No need to handle onlyChanges as long as previous() is called only from findIndex().
updatePreviousIndexes();
return TRUE;
}
changed = TRUE;
if (u <= MAX_SHORT_CHANGE) {
int32_t oldLen = u >> 12;
int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
if (coarse) {
oldLength_ = num * oldLen;
newLength_ = num * newLen;
} else {
// Split a sequence of changes that was compressed into one unit.
oldLength_ = oldLen;
newLength_ = newLen;
if (num > 1) {
remaining = 1; // This is the last of two or more changes.
}
updatePreviousIndexes();
return TRUE;
}
} else {
if (u <= 0x7fff) {
// The change is encoded in u alone.
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
} else {
// Back up to the head of the change, read the lengths,
// and reset the index to the head again.
U_ASSERT(index > 0);
while ((u = array[--index]) > 0x7fff) {}
U_ASSERT(u > MAX_SHORT_CHANGE);
int32_t headIndex = index++;
oldLength_ = readLength((u >> 6) & 0x3f);
newLength_ = readLength(u & 0x3f);
index = headIndex;
}
if (!coarse) {
updatePreviousIndexes();
return TRUE;
}
}
// Combine adjacent changes.
while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
--index;
if (u <= MAX_SHORT_CHANGE) {
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
oldLength_ += (u >> 12) * num;
newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
} else if (u <= 0x7fff) {
// Read the lengths, and reset the index to the head again.
int32_t headIndex = index++;
oldLength_ += readLength((u >> 6) & 0x3f);
newLength_ += readLength(u & 0x3f);
index = headIndex;
}
}
updatePreviousIndexes();
return TRUE;
}
int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
if (U_FAILURE(errorCode) || i < 0) { return -1; }
int32_t spanStart, spanLength;
if (findSource) { // find source index
spanStart = srcIndex;
spanLength = oldLength_;
} else { // find destination index
spanStart = destIndex;
spanLength = newLength_;
}
if (i < spanStart) {
if (i >= (spanStart / 2)) {
// Search backwards.
for (;;) {
UBool hasPrevious = previous(errorCode);
U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0
(void)hasPrevious; // avoid unused-variable warning
spanStart = findSource ? srcIndex : destIndex;
if (i >= spanStart) {
// The index is in the current span.
return 0;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
// spanStart is the start of the current span, first of the remaining ones.
spanLength = findSource ? oldLength_ : newLength_;
int32_t u = array[index];
U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
int32_t len = num * spanLength;
if (i >= (spanStart - len)) {
int32_t n = ((spanStart - i - 1) / spanLength) + 1;
// 1 <= n <= num
srcIndex -= n * oldLength_;
replIndex -= n * newLength_;
destIndex -= n * newLength_;
remaining += n;
return 0;
}
// Skip all of these edits at once.
srcIndex -= num * oldLength_;
replIndex -= num * newLength_;
destIndex -= num * newLength_;
remaining = 0;
}
}
}
// Reset the iterator to the start.
dir = 0;
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
while (next(FALSE, errorCode)) {
if (findSource) {
spanStart = srcIndex;
spanLength = oldLength_;
} else {
spanStart = destIndex;
spanLength = newLength_;
}
if (i < (spanStart + spanLength)) {
// The index is in the current span.
return 0;
}
if (remaining > 1) {
// Is the index in one of the remaining compressed edits?
// spanStart is the start of the current span, first of the remaining ones.
int32_t len = remaining * spanLength;
if (i < (spanStart + len)) {
int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1
srcIndex += n * oldLength_;
replIndex += n * newLength_;
destIndex += n * newLength_;
remaining -= n;
return 0;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
oldLength_ *= remaining;
newLength_ *= remaining;
remaining = 0;
}
}
return FALSE;
return 1;
}
int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, TRUE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == srcIndex) {
// At or after string length, or at start of the found span.
return destIndex;
}
if (changed) {
// In a change span, map to its end.
return destIndex + newLength_;
} else {
// In an unchanged span, offset 1:1 within it.
return destIndex + (i - srcIndex);
}
}
int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
int32_t where = findIndex(i, FALSE, errorCode);
if (where < 0) {
// Error or before the string.
return 0;
}
if (where > 0 || i == destIndex) {
// At or after string length, or at start of the found span.
return srcIndex;
}
if (changed) {
// In a change span, map to its end.
return srcIndex + oldLength_;
} else {
// In an unchanged span, offset within it.
return srcIndex + (i - destIndex);
}
}
U_NAMESPACE_END

Просмотреть файл

@ -694,7 +694,7 @@ FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
}
FilteredBreakIteratorBuilder *
FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
if(U_FAILURE(status)) return NULL;
LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
return (U_SUCCESS(status))? ret.orphan(): NULL;

Просмотреть файл

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
@ -85,6 +87,52 @@ FilteredNormalizer2::normalize(const UnicodeString &src,
return dest;
}
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
options |= U_EDITS_NO_RESET; // Do not reset for each span.
normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode);
}
void
FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const {
while (length > 0) {
int32_t spanLength = set.spanUTF8(src, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
if (spanLength != 0) {
if (edits != nullptr) {
edits->addUnchanged(spanLength);
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src, spanLength);
}
}
spanCondition = USET_SPAN_SIMPLE;
} else {
if (spanLength != 0) {
// Not norm2.normalizeSecondAndAppend() because we do not want
// to modify the non-filter part of dest.
norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode);
if (U_FAILURE(errorCode)) {
break;
}
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
src += spanLength;
length -= spanLength;
}
}
UnicodeString &
FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
@ -196,6 +244,31 @@ FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode)
return TRUE;
}
UBool
FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const char *s = sp.data();
int32_t length = sp.length();
USetSpanCondition spanCondition = USET_SPAN_SIMPLE;
while (length > 0) {
int32_t spanLength = set.spanUTF8(s, length, spanCondition);
if (spanCondition == USET_SPAN_NOT_CONTAINED) {
spanCondition = USET_SPAN_SIMPLE;
} else {
if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) ||
U_FAILURE(errorCode)) {
return FALSE;
}
spanCondition = USET_SPAN_NOT_CONTAINED;
}
s += spanLength;
length -= spanLength;
}
return TRUE;
}
UNormalizationCheckResult
FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
uprv_checkCanGetBuffer(s, errorCode);

Просмотреть файл

@ -33,6 +33,8 @@ class U_COMMON_API Hashtable : public UMemory {
inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status);
inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status);
public:
/**
* Construct a hashtable
@ -41,6 +43,14 @@ public:
*/
Hashtable(UBool ignoreKeyCase, UErrorCode& status);
/**
* Construct a hashtable
* @param ignoreKeyCase If true, keys are case insensitive.
* @param size initial size allocation
* @param status Error code
*/
Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status);
/**
* Construct a hashtable
* @param keyComp Comparator for comparing the keys
@ -76,9 +86,9 @@ public:
int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
void* get(const UnicodeString& key) const;
int32_t geti(const UnicodeString& key) const;
void* remove(const UnicodeString& key);
int32_t removei(const UnicodeString& key);
@ -92,9 +102,9 @@ public:
* @see uhash_nextElement
*/
const UHashElement* nextElement(int32_t& pos) const;
UKeyComparator* setKeyComparator(UKeyComparator*keyComp);
UValueComparator* setValueComparator(UValueComparator* valueComp);
UBool equals(const Hashtable& that) const;
@ -107,7 +117,7 @@ private:
* Implementation
********************************************************************/
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
@ -119,10 +129,23 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
UValueComparator *valueComp, int32_t size, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
UErrorCode& status) : hash(0) {
init( uhash_hashUnicodeString, keyComp, valueComp, status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
: hash(0)
{
@ -134,6 +157,17 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
status);
}
inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
: hash(0)
{
initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
: uhash_hashUnicodeString,
ignoreKeyCase ? uhash_compareCaselessUnicodeString
: uhash_compareUnicodeString,
NULL, size,
status);
}
inline Hashtable::Hashtable(UErrorCode& status)
: hash(0)
{
@ -200,7 +234,7 @@ inline void Hashtable::removeAll(void) {
inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){
return uhash_setKeyComparator(hash, keyComp);
}
inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){
return uhash_setValueComparator(hash, valueComp);
}

Просмотреть файл

@ -63,7 +63,7 @@ ListFormatInternal(const ListFormatInternal &other) :
static Hashtable* listPatternHash = NULL;
static UMutex listFormatterMutex = U_MUTEX_INITIALIZER;
static const char *STANDARD_STYLE = "standard";
static const char STANDARD_STYLE[] = "standard";
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() {

Просмотреть файл

@ -62,7 +62,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6d &&
pInfo->dataFormat[3]==0x32 &&
pInfo->formatVersion[0]==2
pInfo->formatVersion[0]==3
) {
// Normalizer2Impl *me=(Normalizer2Impl *)context;
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
@ -84,7 +84,7 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
const int32_t *inIndexes=(const int32_t *)inBytes;
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
if(indexesLength<=IX_MIN_MAYBE_YES) {
if(indexesLength<=IX_MIN_LCCC_CP) {
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
return;
}

Просмотреть файл

@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN
static icu::Locale* availableLocaleList = NULL;
static int32_t availableLocaleListCount;
static icu::UInitOnce gInitOnce = U_INITONCE_INITIALIZER;
static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER;
U_NAMESPACE_END
@ -50,7 +50,7 @@ static UBool U_CALLCONV locale_available_cleanup(void)
availableLocaleList = NULL;
}
availableLocaleListCount = 0;
gInitOnce.reset();
gInitOnceLocale.reset();
return TRUE;
}
@ -81,7 +81,7 @@ void U_CALLCONV locale_available_init() {
const Locale* U_EXPORT2
Locale::getAvailableLocales(int32_t& count)
{
umtx_initOnce(gInitOnce, &locale_available_init);
umtx_initOnce(gInitOnceLocale, &locale_available_init);
count = availableLocaleListCount;
return availableLocaleList;
}

Просмотреть файл

@ -542,7 +542,7 @@ uloc_getDisplayName(const char *locale,
return 0;
}
separator = (const UChar *)p0 + subLen;
sepLen = p1 - separator;
sepLen = static_cast<int32_t>(p1 - separator);
}
if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) {
@ -558,8 +558,8 @@ uloc_getDisplayName(const char *locale,
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
sub0Pos=p0-pattern;
sub1Pos=p1-pattern;
sub0Pos = static_cast<int32_t>(p0-pattern);
sub1Pos = static_cast<int32_t>(p1-pattern);
if (sub1Pos < sub0Pos) { /* a very odd pattern */
int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t;
langi=1;
@ -821,6 +821,8 @@ uloc_getDisplayKeywordValue( const char* locale,
/* get the keyword value */
keywordValue[0]=0;
keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
if (*status == U_STRING_NOT_TERMINATED_WARNING)
*status = U_BUFFER_OVERFLOW_ERROR;
/*
* if the keyword is equal to currency .. then to get the display name

Просмотреть файл

@ -54,7 +54,7 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
*p = 0;
va_end(args);
return p - buffer;
return static_cast<int32_t>(p - buffer);
}
U_NAMESPACE_BEGIN
@ -636,8 +636,9 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
const char* key;
while ((key = e->next((int32_t *)0, status)) != NULL) {
value[0] = 0;
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
if (U_FAILURE(status)) {
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
return result;
}
keyDisplayName(key, temp, TRUE);

Просмотреть файл

@ -511,7 +511,7 @@ parseTagString(
unknownLanguage);
*langLength = (int32_t)uprv_strlen(lang);
}
else if (_isIDSeparator(*position)) {
if (_isIDSeparator(*position)) {
++position;
}
@ -1281,7 +1281,7 @@ uloc_minimizeSubtags(const char* localeID,
// Pairs of (language subtag, + or -) for finding out fast if common languages
// are LTR (minus) or RTL (plus).
static const char* LANG_DIR_STRING =
static const char LANG_DIR_STRING[] =
"root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
// Implemented here because this calls uloc_addLikelySubtags().

Просмотреть файл

@ -190,7 +190,10 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
ILCID_POSIX_SUBTABLE(bin) {
{0x66, "bin"},
{0x0466, "bin_NG"}
};
ILCID_POSIX_SUBTABLE(bn) {
{0x45, "bn"},
@ -214,7 +217,13 @@ ILCID_POSIX_SUBTABLE(ca) {
};
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
ILCID_POSIX_SUBTABLE(chr) {
{0x05c, "chr"},
{0x7c5c, "chr_Cher"},
{0x045c, "chr_Cher_US"},
{0x045c, "chr_US"}
};
// ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb) {
@ -263,10 +272,10 @@ ILCID_POSIX_SUBTABLE(en) {
{0x2C09, "en_TT"},
{0x0409, "en_US"},
{0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
{0x2409, "en_029"},
{0x1c09, "en_ZA"},
{0x3009, "en_ZW"},
{0x2409, "en_029"},
{0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
{0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */
@ -419,7 +428,12 @@ ILCID_POSIX_SUBTABLE(hsb) {
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
ILCID_POSIX_SUBTABLE(ibb) {
{0x69, "ibb"},
{0x0469, "ibb_NG"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
@ -458,13 +472,18 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
{0x60, "ks"},
{0x0860, "ks_IN"}, /* Documentation doesn't mention script */
{0x0460, "ks_Arab_IN"},
{0x0860, "ks_Deva_IN"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
ILCID_POSIX_SUBTABLE(la) {
{0x76, "la"},
{0x0476, "la_001"},
{0x0476, "la_IT"} /*Left in for compatibility*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
@ -535,15 +554,19 @@ ILCID_POSIX_SUBTABLE(or_IN) {
{0x0448, "or_IN"},
};
ILCID_POSIX_SUBTABLE(pa) {
{0x46, "pa"},
{0x0446, "pa_IN"},
{0x0846, "pa_PK"},
{0x0846, "pa_Arab_PK"}
{0x0846, "pa_Arab_PK"},
{0x0846, "pa_PK"}
};
ILCID_POSIX_SUBTABLE(pap) {
{0x79, "pap"},
{0x0479, "pap_029"},
{0x0479, "pap_AN"} /*Left in for compatibility*/
};
ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
@ -619,9 +642,11 @@ ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
ILCID_POSIX_SUBTABLE(sd) {
{0x59, "sd"},
{0x0459, "sd_IN"},
{0x0459, "sd_Deva_IN"},
{0x0859, "sd_PK"}
{0x0459, "sd_IN"},
{0x0859, "sd_Arab_PK"},
{0x0859, "sd_PK"},
{0x7c59, "sd_Arab"}
};
ILCID_POSIX_SUBTABLE(se) {
@ -645,9 +670,8 @@ ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
ILCID_POSIX_SUBTABLE(so) {
{0x77, "so"},
{0x0477, "so_ET"},
{0x0477, "so_SO"}
};
@ -739,7 +763,12 @@ ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
ILCID_POSIX_SUBTABLE(yi) {
{0x003d, "yi"},
{0x043d, "yi_001"}
};
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
// Windows & ICU tend to different names for some of these
@ -1103,7 +1132,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
}
if (pPosixID) {
int32_t resLen = uprv_strlen(pPosixID);
int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
uprv_memcpy(posixID, pPosixID, copyLen);
if (resLen < posixIDCapacity) {
@ -1177,7 +1206,7 @@ uprv_convertToLCIDPlatform(const char* localeID)
char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
(void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
if (U_SUCCESS(myStatus))
{
@ -1214,6 +1243,8 @@ uprv_convertToLCIDPlatform(const char* localeID)
}
}
}
#else
(void)localeID; // Suppress unused variable warning.
#endif /* USE_WINDOWS_LCID_MAPPING_API */
// No found, or not implemented on platforms without native name->lcid conversion

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -5,7 +5,7 @@
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* loadednormalizer2impl.h
* norm2allmodes.h
*
* created on: 2014sep07
* created by: Markus W. Scherer
@ -18,7 +18,9 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h"
#include "cpputils.h"
#include "normalizer2impl.h"
@ -210,8 +212,8 @@ private:
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
};
@ -224,19 +226,35 @@ public:
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
&sink, edits, errorCode);
sink.Flush();
}
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
}
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return FALSE;
}
@ -252,8 +270,16 @@ private:
}
return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
}
virtual UBool
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return FALSE;
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return UNORM_MAYBE;
}
@ -267,21 +293,21 @@ private:
return qcResult;
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
return impl.getCompQuickCheck(impl.getNorm16(c));
}
virtual UBool hasBoundaryBefore(UChar32 c) const {
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
return impl.hasCompBoundaryAfter(c, onlyContiguous);
}
virtual UBool isInert(UChar32 c) const {
return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
return impl.isCompInert(c, onlyContiguous);
}
const UBool onlyContiguous;

Просмотреть файл

@ -20,7 +20,9 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/edits.h"
#include "unicode/normalizer2.h"
#include "unicode/stringoptions.h"
#include "unicode/unistr.h"
#include "unicode/unorm.h"
#include "cstring.h"
@ -42,6 +44,20 @@ U_NAMESPACE_BEGIN
Normalizer2::~Normalizer2() {}
void
Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr) {
errorCode = U_UNSUPPORTED_ERROR;
return;
}
UnicodeString src16 = UnicodeString::fromUTF8(src);
normalize(src16, errorCode).toUTF8(sink);
}
UBool
Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
return FALSE;
@ -57,6 +73,11 @@ Normalizer2::getCombiningClass(UChar32 /*c*/) const {
return 0;
}
UBool
Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const {
return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode);
}
// Normalizer2 implementation for the old UNORM_NONE.
class NoopNormalizer2 : public Normalizer2 {
virtual ~NoopNormalizer2();
@ -64,7 +85,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if(&dest!=&src) {
dest=src;
@ -74,10 +95,27 @@ class NoopNormalizer2 : public Normalizer2 {
}
return dest;
}
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if (edits != nullptr) {
if ((options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
edits->addUnchanged(src.length());
}
if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
sink.Append(src.data(), src.length());
}
sink.Flush();
}
}
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if(&first!=&second) {
first.append(second);
@ -90,7 +128,7 @@ class NoopNormalizer2 : public Normalizer2 {
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
if(U_SUCCESS(errorCode)) {
if(&first!=&second) {
first.append(second);
@ -101,25 +139,29 @@ class NoopNormalizer2 : public Normalizer2 {
return first;
}
virtual UBool
getDecomposition(UChar32, UnicodeString &) const {
getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE {
return FALSE;
}
// No need to override the default getRawDecomposition().
// No need to U_OVERRIDE the default getRawDecomposition().
virtual UBool
isNormalized(const UnicodeString &, UErrorCode &) const {
return TRUE;
isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE {
return U_SUCCESS(errorCode);
}
virtual UBool
isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE {
return U_SUCCESS(errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &, UErrorCode &) const {
quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE {
return UNORM_YES;
}
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE {
return s.length();
}
virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
virtual UBool isInert(UChar32) const { return TRUE; }
virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; }
virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; }
};
NoopNormalizer2::~NoopNormalizer2() {}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -35,6 +35,11 @@ U_NAMESPACE_BEGIN
struct CanonIterData;
class ByteSink;
class Edits;
class InitCanonIterData;
class LcccContext;
class U_COMMON_API Hangul {
public:
/* Korean Hangul and Jamo constants */
@ -63,9 +68,9 @@ public:
return HANGUL_BASE<=c && c<HANGUL_LIMIT;
}
static inline UBool
isHangulWithoutJamoT(UChar c) {
isHangulLV(UChar32 c) {
c-=HANGUL_BASE;
return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
}
static inline UBool isJamoL(UChar32 c) {
return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT;
@ -73,6 +78,14 @@ public:
static inline UBool isJamoV(UChar32 c) {
return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT;
}
static inline UBool isJamoT(UChar32 c) {
int32_t t=c-JAMO_T_BASE;
return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself
}
static UBool isJamo(UChar32 c) {
return JAMO_L_BASE<=c && c<=JAMO_T_END &&
(c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
}
/**
* Decomposes c, which must be a Hangul syllable, into buffer
@ -117,10 +130,13 @@ class Normalizer2Impl;
class U_COMMON_API ReorderingBuffer : public UMemory {
public:
/** Constructs only; init() should be called. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
impl(ni), str(dest),
start(NULL), reorderStart(NULL), limit(NULL),
remainingCapacity(0), lastCC(0) {}
/** Constructs, removes the string contents, and initializes for a small initial capacity. */
ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
~ReorderingBuffer() {
if(start!=NULL) {
str.releaseBuffer((int32_t)(limit-start));
@ -135,11 +151,7 @@ public:
uint8_t getLastCC() const { return lastCC; }
UBool equals(const UChar *start, const UChar *limit) const;
// For Hangul composition, replacing the Leading consonant Jamo with the syllable.
void setLastChar(UChar c) {
*(limit-1)=c;
}
UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const;
UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) {
return (c<=0xffff) ?
@ -218,6 +230,12 @@ private:
UChar *codePointStart, *codePointLimit;
};
/**
* Low-level implementation of the Unicode Normalization Algorithm.
* For the data structure and details see the documentation at the end of
* this normalizer2impl.h and in the design doc at
* http://site.icu-project.org/design/normalization/custom
*/
class U_COMMON_API Normalizer2Impl : public UObject {
public:
Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
@ -234,8 +252,6 @@ public:
// low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
UBool ensureCanonIterData(UErrorCode &errorCode) const;
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
@ -255,15 +271,22 @@ public:
uint8_t getCC(uint16_t norm16) const {
if(norm16>=MIN_NORMAL_MAYBE_YES) {
return (uint8_t)norm16;
return getCCFromNormalYesOrMaybe(norm16);
}
if(norm16<minNoNo || limitNoNo<=norm16) {
return 0;
}
return getCCFromNoNo(norm16);
}
static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
return (uint8_t)(norm16 >> OFFSET_SHIFT);
}
static uint8_t getCCFromYesOrMaybe(uint16_t norm16) {
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
}
uint8_t getCCFromYesOrMaybeCP(UChar32 c) const {
if (c < minCompNoMaybeCP) { return 0; }
return getCCFromYesOrMaybe(getNorm16(c));
}
/**
@ -272,10 +295,8 @@ public:
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t getFCD16(UChar32 c) const {
if(c<0) {
if(c<minDecompNoCP) {
return 0;
} else if(c<0x180) {
return tccc180[c];
} else if(c<=0xffff) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
}
@ -291,9 +312,7 @@ public:
*/
uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
UChar32 c=*s++;
if(c<0x180) {
return tccc180[c];
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
}
UChar c2;
@ -311,8 +330,8 @@ public:
*/
uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
UChar32 c=*--s;
if(c<0x180) {
return tccc180[c];
if(c<minDecompNoCP) {
return 0;
}
if(!U16_IS_TRAIL(c)) {
if(!singleLeadMightHaveNonZeroFCD16(c)) {
@ -328,8 +347,6 @@ public:
return getFCD16FromNormData(c);
}
/** Returns the FCD data for U+0000<=c<U+0180. */
uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
/** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff
@ -340,9 +357,6 @@ public:
/** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
/**
* Gets the decomposition for one code point.
* @param c code point
@ -367,14 +381,25 @@ public:
UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
enum {
MIN_CCC_LCCC_CP=0x300
};
// Fixed norm16 values.
MIN_YES_YES_WITH_CC=0xfe02,
JAMO_VT=0xfe00,
MIN_NORMAL_MAYBE_YES=0xfc00,
JAMO_L=2, // offset=1 hasCompBoundaryAfter=FALSE
INERT=1, // offset=0 hasCompBoundaryAfter=TRUE
// norm16 bit 0 is comp-boundary-after.
HAS_COMP_BOUNDARY_AFTER=1,
OFFSET_SHIFT=1,
// For algorithmic one-way mappings, norm16 bits 2..1 indicate the
// tccc (0, 1, >1) for quick FCC boundary-after tests.
DELTA_TCCC_0=0,
DELTA_TCCC_1=2,
DELTA_TCCC_GT_1=4,
DELTA_TCCC_MASK=6,
DELTA_SHIFT=3,
enum {
MIN_YES_YES_WITH_CC=0xff01,
JAMO_VT=0xff00,
MIN_NORMAL_MAYBE_YES=0xfe00,
JAMO_L=1,
MAX_DELTA=0x40
};
@ -394,21 +419,32 @@ public:
IX_MIN_COMP_NO_MAYBE_CP,
// Norm16 value thresholds for quick check combinations and types of extra data.
IX_MIN_YES_NO, // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
/** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
IX_MIN_YES_NO,
/** Mappings are comp-normalized. */
IX_MIN_NO_NO,
IX_LIMIT_NO_NO,
IX_MIN_MAYBE_YES,
IX_MIN_YES_NO_MAPPINGS_ONLY, // Mappings only in [minYesNoMappingsOnly..minNoNo[.
/** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
IX_MIN_YES_NO_MAPPINGS_ONLY,
/** Mappings are not comp-normalized but have a comp boundary before. */
IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE,
/** Mappings do not have a comp boundary before. */
IX_MIN_NO_NO_COMP_NO_MAYBE_CC,
/** Mappings to the empty string. */
IX_MIN_NO_NO_EMPTY,
IX_RESERVED15,
IX_MIN_LCCC_CP,
IX_RESERVED19,
IX_COUNT
};
enum {
MAPPING_HAS_CCC_LCCC_WORD=0x80,
MAPPING_HAS_RAW_MAPPING=0x40,
MAPPING_NO_COMP_BOUNDARY_AFTER=0x20,
// unused bit 0x20,
MAPPING_LENGTH_MASK=0x1f
};
@ -457,6 +493,12 @@ public:
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
/** sink==nullptr: isNormalized() */
UBool composeUTF8(uint32_t options, UBool onlyContiguous,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
const UChar *makeFCD(const UChar *src, const UChar *limit,
ReorderingBuffer *buffer, UErrorCode &errorCode) const;
void makeFCDAndAppend(const UChar *src, const UChar *limit,
@ -465,27 +507,42 @@ public:
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
UBool hasDecompBoundary(UChar32 c, UBool before) const;
UBool hasDecompBoundaryBefore(UChar32 c) const;
UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
UBool hasDecompBoundaryAfter(UChar32 c) const;
UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
UBool hasCompBoundaryBefore(UChar32 c) const {
return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
}
UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
uint16_t norm16=getNorm16(c);
return isCompYesAndZeroCC(norm16) &&
(norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff);
}
UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
UBool hasFCDBoundaryAfter(UChar32 c) const {
uint16_t fcd16=getFCD16(c);
return fcd16<=1 || (fcd16&0xff)==0;
}
UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
private:
friend class InitCanonIterData;
friend class LcccContext;
UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; }
static UBool isInert(uint16_t norm16) { return norm16==0; }
static UBool isJamoL(uint16_t norm16) { return norm16==1; }
static UBool isInert(uint16_t norm16) { return norm16==INERT; }
static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; }
static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }
uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; }
UBool isHangulLVT(uint16_t norm16) const {
return norm16==hangulLVT();
}
UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
// UBool isCompYes(uint16_t norm16) const {
// return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
@ -504,7 +561,7 @@ private:
/**
* A little faster and simpler than isDecompYesAndZeroCC() but does not include
* the MaybeYes which combine-forward and have ccc=0.
* (Standard Unicode 5.2 normalization does not have such characters.)
* (Standard Unicode 10 normalization does not have such characters.)
*/
UBool isMostDecompYesAndZeroCC(uint16_t norm16) const {
return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
@ -514,7 +571,7 @@ private:
// For use with isCompYes().
// Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
// static uint8_t getCCFromYes(uint16_t norm16) {
// return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
// return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
// }
uint8_t getCCFromNoNo(uint16_t norm16) const {
const uint16_t *mapping=getMapping(norm16);
@ -525,30 +582,47 @@ private:
}
}
// requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
uint8_t getTrailCCFromCompYesAndZeroCC(const UChar *cpStart, const UChar *cpLimit) const;
uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const {
if(norm16<=minYesNo) {
return 0; // yesYes and Hangul LV have ccc=tccc=0
} else {
// For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo
}
}
uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const;
uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const;
// Requires algorithmic-NoNo.
UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const {
return c+norm16-(minMaybeYes-MAX_DELTA-1);
return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
}
UChar32 getAlgorithmicDelta(uint16_t norm16) const {
return (norm16>>DELTA_SHIFT)-centerNoNoDelta;
}
// Requires minYesNo<norm16<limitNoNo.
const uint16_t *getMapping(uint16_t norm16) const { return extraData+norm16; }
const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
return NULL;
} else if(norm16<minMaybeYes) {
return extraData+norm16; // for yesYes; if Jamo L: harmless empty list
return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list
} else {
return maybeYesCompositions+norm16-minMaybeYes;
}
}
const uint16_t *getCompositionsListForComposite(uint16_t norm16) const {
const uint16_t *list=extraData+norm16; // composite has both mapping & compositions list
// A composite has both mapping & compositions list.
const uint16_t *list=getMapping(norm16);
return list+ // mapping pointer
1+ // +1 to skip the first unit with the mapping lenth
1+ // +1 to skip the first unit with the mapping length
(*list&MAPPING_LENGTH_MASK); // + mapping length
}
const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const {
// minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT);
}
/**
* @param c code point must have compositions
* @return compositions list pointer
@ -563,46 +637,78 @@ private:
UChar32 minNeedDataCP,
ReorderingBuffer *buffer,
UErrorCode &errorCode) const;
UBool decomposeShort(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const UChar *decomposeShort(const UChar *src, const UChar *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
UBool decompose(UChar32 c, uint16_t norm16,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail);
void addComposites(const uint16_t *list, UnicodeSet &set) const;
void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex,
UBool onlyContiguous) const;
UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const {
return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
}
UBool norm16HasCompBoundaryBefore(uint16_t norm16) const {
return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
}
UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const;
UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const;
UBool hasCompBoundaryAfter(const UChar *start, const UChar *p,
UBool onlyContiguous) const;
UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p,
UBool onlyContiguous) const;
UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const {
return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
(!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
}
/** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const {
return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
(norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff);
}
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const;
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
int32_t getCanonValue(UChar32 c) const;
const UnicodeSet &getCanonStartSet(int32_t n) const;
// UVersionInfo dataVersion;
// Code point thresholds for quick check codes.
UChar32 minDecompNoCP;
UChar32 minCompNoMaybeCP;
// BMP code point thresholds for quick check loops looking at single UTF-16 code units.
UChar minDecompNoCP;
UChar minCompNoMaybeCP;
UChar minLcccCP;
// Norm16 value thresholds for quick check combinations and types of extra data.
uint16_t minYesNo;
uint16_t minYesNoMappingsOnly;
uint16_t minNoNo;
uint16_t minNoNoCompBoundaryBefore;
uint16_t minNoNoCompNoMaybeCC;
uint16_t minNoNoEmpty;
uint16_t limitNoNo;
uint16_t centerNoNoDelta;
uint16_t minMaybeYes;
const UTrie2 *normTrie;
const uint16_t *maybeYesCompositions;
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
public: // CanonIterData is public to allow access from C callback functions.
UInitOnce fCanonIterDataInitOnce;
CanonIterData *fCanonIterData;
};
@ -658,13 +764,14 @@ unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.
* Format version 2.0.
* Format version 3.0.
*
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
* ICU ships with data files for standard Unicode Normalization Forms
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
* Custom (application-specific) data can be built into additional .nrm files
* with the gennorm2 build tool.
* ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
*
* Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been
* cached already. Internally, Normalizer2Impl.load() reads the .nrm file.
@ -695,14 +802,20 @@ unorm_getFCD16(UChar32 c);
* with a decomposition mapping, that is, with NF*D_QC=No.
* minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point
* with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward).
* minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3)
* is the lowest code point with lccc!=0.
*
* The next five indexes are thresholds of 16-bit trie values for ranges of
* The next eight indexes are thresholds of 16-bit trie values for ranges of
* values indicating multiple normalization properties.
* They are listed here in threshold order, not in the order they are stored in the indexes.
* minYesNo=indexes[IX_MIN_YES_NO];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* minNoNo=indexes[IX_MIN_NO_NO];
* minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
* minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
* minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY];
* limitNoNo=indexes[IX_LIMIT_NO_NO];
* minMaybeYes=indexes[IX_MIN_MAYBE_YES];
* minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
* See the normTrie description below and the design doc for details.
*
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
@ -710,12 +823,14 @@ unorm_getFCD16(UChar32 c);
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
* Rather than using independent bits in the value (which would require more than 16 bits),
* information is extracted primarily via range checks.
* Except, format version 3 uses bit 0 for hasCompBoundaryAfter().
* For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo
* means that the character has NF*C_QC=Yes and NF*D_QC=No properties,
* which means it has a two-way (round-trip) decomposition mapping.
* Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData
* pointing to mappings, compositions lists, or both.
* Value norm16==0 means that the character is normalization-inert, that is,
* Value norm16==INERT (0 in versions 1 & 2, 1 in version 3)
* means that the character is normalization-inert, that is,
* it does not have a mapping, does not participate in composition, has a zero
* canonical combining class, and forms a boundary where text before it and after it
* can be normalized independently.
@ -729,7 +844,7 @@ unorm_getFCD16(UChar32 c);
* The trie has a value for each lead surrogate code unit representing the "worst case"
* properties of the 1024 supplementary characters whose UTF-16 form starts with
* the lead surrogate. If all of the 1024 supplementary characters are normalization-inert,
* then their lead surrogate code unit has the trie value 0.
* then their lead surrogate code unit has the trie value INERT.
* When the lead surrogate unit's value exceeds the quick check minimum during processing,
* the properties for the full supplementary code point need to be looked up.
*
@ -738,6 +853,7 @@ unorm_getFCD16(UChar32 c);
*
* There is only one byte offset for the end of these two arrays.
* The split between them is given by the constant and variable mentioned above.
* In version 3, the difference must be shifted right by OFFSET_SHIFT.
*
* The maybeYesCompositions array contains compositions lists for characters that
* combine both forward (as starters in composition pairs)
@ -754,6 +870,8 @@ unorm_getFCD16(UChar32 c);
* followed by only mappings for "NoNo" characters.
* (Referring to pairs of NFC/NFD quick check values.)
* The norm16 values of those characters are directly indexes into the extraData array.
* In version 3, the norm16 values must be shifted right by OFFSET_SHIFT
* for accessing extraData.
*
* The data structures for compositions lists and mappings are described in the design doc.
*
@ -784,6 +902,50 @@ unorm_getFCD16(UChar32 c);
* This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag.
* It is needed for the new (in ICU 49) composePair(), not for other normalization.
* - Addition of the smallFCD[] bit set.
*
* Changes from format version 2 to format version 3 (ICU 60) ------------------
*
* - norm16 bit 0 indicates hasCompBoundaryAfter(),
* except that for contiguous composition (FCC) the tccc must be checked as well.
* Data indexes and ccc values are shifted left by one (OFFSET_SHIFT).
* Thresholds like minNoNo are tested before shifting.
*
* - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT),
* to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater.
* See DELTA_TCCC_MASK etc.
* This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter().
* minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly.
*
* - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters,
* and ASCII characters are mapped algorithmically only to other ASCII characters.
* This helps with hasCompBoundaryBefore() and compose() fast paths.
* It is never necessary any more to loop for algorithmic mappings.
*
* - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE],
* indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY],
* and separation of the noNo extraData into distinct ranges.
* With this, the noNo norm16 value indicates whether the mapping is
* compose-normalized, not normalized but hasCompBoundaryBefore(),
* not even that, or maps to an empty string.
* hasCompBoundaryBefore() can be determined solely from the norm16 value.
*
* - The norm16 value for Hangul LVT is now different from that for Hangul LV,
* so that hasCompBoundaryAfter() need not check for the syllable type.
* For Hangul LV, minYesNo continues to be used (no comp-boundary-after).
* For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used.
* The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively,
* to simplify some code.
*
* - The extraData firstUnit bit 5 is no longer necessary
* (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER),
* is reserved again, and always set to 0.
*
* - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0.
* This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower:
* U+00AD Soft Hyphen maps to an empty string,
* which is artificially assigned "worst case" values lccc=1 and tccc=255.
*
* - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
*/
#endif /* !UCONFIG_NO_NORMALIZATION */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -676,8 +676,8 @@ extern U_IMPORT char *U_TZNAME[];
#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
/* These platforms are likely to use Olson timezone IDs. */
/* common targets of the symbolic link at TZDEFAULT are:
* "/usr/share/zoneinfo/<olsonID>" default, older Linus distros, macOS to 10.12
* "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu, SuSe Linux
* "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
* "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
* "/usr/share/lib/zoneinfo/<olsonID>" Solaris
* "../usr/share/lib/zoneinfo/<olsonID>" Solaris
* "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
@ -949,30 +949,30 @@ static CharString *gSearchTZFileResult = NULL;
* This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
*/
static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
DIR* dirp = opendir(path);
DIR* subDirp = NULL;
DIR* dirp = NULL;
struct dirent* dirEntry = NULL;
char* result = NULL;
UErrorCode status = U_ZERO_ERROR;
/* Save the current path */
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
goto cleanupAndReturn;
}
dirp = opendir(path);
if (dirp == NULL) {
return result;
goto cleanupAndReturn;
}
if (gSearchTZFileResult == NULL) {
gSearchTZFileResult = new CharString;
if (gSearchTZFileResult == NULL) {
return NULL;
goto cleanupAndReturn;
}
ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
}
/* Save the current path */
UErrorCode status = U_ZERO_ERROR;
CharString curpath(path, -1, status);
if (U_FAILURE(status)) {
return NULL;
}
/* Check each entry in the directory. */
while((dirEntry = readdir(dirp)) != NULL) {
const char* dirName = dirEntry->d_name;
@ -981,15 +981,16 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
CharString newpath(curpath, status);
newpath.append(dirName, -1, status);
if (U_FAILURE(status)) {
return NULL;
break;
}
DIR* subDirp = NULL;
if ((subDirp = opendir(newpath.data())) != NULL) {
/* If this new path is a directory, make a recursive call with the newpath. */
closedir(subDirp);
newpath.append('/', status);
if (U_FAILURE(status)) {
return NULL;
break;
}
result = searchForTZFile(newpath.data(), tzInfo);
/*
@ -1013,7 +1014,7 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
gSearchTZFileResult->clear();
gSearchTZFileResult->append(zoneid, -1, status);
if (U_FAILURE(status)) {
return NULL;
break;
}
result = gSearchTZFileResult->data();
/* Get out after the first one found. */
@ -1022,7 +1023,11 @@ static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
}
}
}
closedir(dirp);
cleanupAndReturn:
if (dirp) {
closedir(dirp);
}
return result;
}
#endif
@ -1055,7 +1060,7 @@ uprv_getWindowsTimeZone()
hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
if (SUCCEEDED(hr))
{
int32_t length = wcslen(timezoneString.GetRawBuffer(NULL));
int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
if (asciiId != nullptr)
{
@ -1074,6 +1079,7 @@ uprv_getWindowsTimeZone()
U_CAPI const char* U_EXPORT2
uprv_tzname(int n)
{
(void)n; // Avoid unreferenced parameter warning.
const char *tzid = NULL;
#if U_PLATFORM_USES_ONLY_WIN32_API
#if U_PLATFORM_HAS_WINUWP_API > 0
@ -1229,7 +1235,7 @@ UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
static CharString *gTimeZoneFilesDirectory = NULL;
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
static char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
static bool gCorrectedPOSIXLocaleHeapAllocated = false;
#endif
@ -1252,7 +1258,7 @@ static UBool U_CALLCONV putil_cleanup(void)
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
uprv_free(gCorrectedPOSIXLocale);
uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
gCorrectedPOSIXLocale = NULL;
gCorrectedPOSIXLocaleHeapAllocated = false;
}
@ -1288,7 +1294,7 @@ u_setDataDirectory(const char *directory) {
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
{
char *p;
while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR;
}
}
@ -1446,7 +1452,7 @@ static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
gTimeZoneFilesDirectory->append(path, status);
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
char *p = gTimeZoneFilesDirectory->data();
while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR;
}
#endif
@ -1810,6 +1816,8 @@ The leftmost codepage (.xxx) wins.
}
// Now normalize the resulting name
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
/* TODO: Should we just exit on memory allocation failure? */
if (correctedPOSIXLocale)
{
int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
@ -2327,19 +2335,16 @@ u_getVersion(UVersionInfo versionArray) {
* icucfg.h dependent code
*/
#if U_ENABLE_DYLOAD
#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
#if HAVE_DLFCN_H
#ifdef __MVS__
#ifndef __SUSV3
#define __SUSV3 1
#endif
#endif
#include <dlfcn.h>
#endif
#endif /* HAVE_DLFCN_H */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2379,38 +2384,10 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return uret.fp;
}
#else
#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
/* null (nonexistent) implementation. */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
if(U_FAILURE(*status)) return NULL;
*status = U_UNSUPPORTED_ERROR;
return NULL;
}
U_INTERNAL void U_EXPORT2
uprv_dl_close(void *lib, UErrorCode *status) {
if(U_FAILURE(*status)) return;
*status = U_UNSUPPORTED_ERROR;
return;
}
U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
if(U_SUCCESS(*status)) {
*status = U_UNSUPPORTED_ERROR;
}
return (UVoidFunction*)NULL;
}
#endif
#elif U_PLATFORM_USES_ONLY_WIN32_API
/* Windows API implementation. */
// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2437,7 +2414,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return;
}
U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
HMODULE handle = (HMODULE)lib;
@ -2459,10 +2435,9 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return addr;
}
#else
/* No dynamic loading set. */
/* No dynamic loading, null (nonexistent) implementation. */
U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char *libName, UErrorCode *status) {
@ -2480,7 +2455,6 @@ uprv_dl_close(void *lib, UErrorCode *status) {
return;
}
U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
(void)lib;
@ -2491,7 +2465,7 @@ uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
return (UVoidFunction*)NULL;
}
#endif /* U_ENABLE_DYLOAD */
#endif
/*
* Hey, Emacs, please set the following:

Просмотреть файл

@ -72,15 +72,6 @@
typedef size_t uintptr_t;
#endif
/**
* \def U_HAVE_MSVC_2003_OR_EARLIER
* Flag for workaround of MSVC 2003 optimization bugs
* @internal
*/
#if !defined(U_HAVE_MSVC_2003_OR_EARLIER) && defined(_MSC_VER) && (_MSC_VER < 1400)
#define U_HAVE_MSVC_2003_OR_EARLIER
#endif
/*===========================================================================*/
/** @{ Information about POSIX support */
/*===========================================================================*/
@ -120,15 +111,15 @@ typedef size_t uintptr_t;
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID
# define U_TIMEZONE timezone
#elif defined(__UCLIBC__)
// uClibc does not have __timezone or _timezone.
#elif defined(_NEWLIB_VERSION)
# define U_TIMEZONE _timezone
#elif defined(__GLIBC__)
// glibc
# define U_TIMEZONE __timezone
#elif U_PLATFORM_IS_LINUX_BASED
# if defined(__UCLIBC__)
/* uClibc does not have __timezone or _timezone. */
# elif defined(_NEWLIB_VERSION)
# define U_TIMEZONE _timezone
# elif defined(__GLIBC__)
/* glibc */
# define U_TIMEZONE __timezone
# endif
// not defined
#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_TIMEZONE _timezone
#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__)
@ -214,7 +205,7 @@ typedef size_t uintptr_t;
/**
* \def U_HAVE_STD_ATOMICS
* Defines whether the standard C++11 <atomic> is available.
* ICU will use this when avialable,
* ICU will use this when available,
* otherwise will fall back to compiler or platform specific alternatives.
* @internal
*/
@ -239,7 +230,7 @@ typedef size_t uintptr_t;
/**
* \def U_HAVE_CLANG_ATOMICS
* Defines whether Clang c11 style built-in atomics are avaialable.
* Defines whether Clang c11 style built-in atomics are available.
* These are used in preference to gcc atomics when both are available.
*/
#ifdef U_HAVE_CLANG_ATOMICS
@ -277,7 +268,7 @@ typedef size_t uintptr_t;
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* library. For each platform which this code is ported to, these
* functions may have to be re-implemented.
*/
@ -425,7 +416,7 @@ U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
/**
* Please use uloc_getDefault() instead.
* Return the default locale ID string by querying ths system, or
* Return the default locale ID string by querying the system, or
* zero if one cannot be found.
* This function can call setlocale() on Unix platforms. Please read the
* platform documentation on setlocale() before calling this function.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,630 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.cpp
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/ubrk.h"
#include "unicode/rbbi.h"
#include "rbbi_cache.h"
#include "brkeng.h"
#include "cmemory.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "uassert.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/*
* DictionaryCache implementation
*/
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fBreaks(NULL), fPositionInCache(-1),
fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
fBreaks = new UVector32(status);
}
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
delete fBreaks;
fBreaks = NULL;
}
void RuleBasedBreakIterator::DictionaryCache::reset() {
fPositionInCache = -1;
fStart = 0;
fLimit = 0;
fFirstRuleStatusIndex = 0;
fOtherRuleStatusIndex = 0;
fBreaks->removeAllElements();
}
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos >= fLimit || fromPos < fStart) {
fPositionInCache = -1;
return FALSE;
}
// Sequential iteration, move from previous boundary to the following
int32_t r = 0;
if (fPositionInCache >= 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
++fPositionInCache;
if (fPositionInCache >= fBreaks->size()) {
fPositionInCache = -1;
return FALSE;
}
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r > fromPos);
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
// Random indexing. Linear search for the boundary following the given position.
for (fPositionInCache = 0; fPositionInCache < fBreaks->size(); ++fPositionInCache) {
r= fBreaks->elementAti(fPositionInCache);
if (r > fromPos) {
*result = r;
*statusIndex = fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
if (fromPos <= fStart || fromPos > fLimit) {
fPositionInCache = -1;
return FALSE;
}
if (fromPos == fLimit) {
fPositionInCache = fBreaks->size() - 1;
if (fPositionInCache >= 0) {
U_ASSERT(fBreaks->elementAti(fPositionInCache) == fromPos);
}
}
int32_t r;
if (fPositionInCache > 0 && fPositionInCache < fBreaks->size() && fBreaks->elementAti(fPositionInCache) == fromPos) {
--fPositionInCache;
r = fBreaks->elementAti(fPositionInCache);
U_ASSERT(r < fromPos);
*result = r;
*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
if (fPositionInCache == 0) {
fPositionInCache = -1;
return FALSE;
}
for (fPositionInCache = fBreaks->size()-1; fPositionInCache >= 0; --fPositionInCache) {
r = fBreaks->elementAti(fPositionInCache);
if (r < fromPos) {
*result = r;
*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
return TRUE;
}
}
U_ASSERT(FALSE);
fPositionInCache = -1;
return FALSE;
}
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus) {
if ((endPos - startPos) <= 1) {
return;
}
reset();
fFirstRuleStatusIndex = firstRuleStatus;
fOtherRuleStatusIndex = otherRuleStatus;
int32_t rangeStart = startPos;
int32_t rangeEnd = endPos;
uint16_t category;
int32_t current;
UErrorCode status = U_ZERO_ERROR;
int32_t foundBreakCount = 0;
UText *text = fBI->fText;
// Loop through the text, looking for ranges of dictionary characters.
// For each span, find the appropriate break engine, and ask it to find
// any breaks within the span.
utext_setNativeIndex(text, rangeStart);
UChar32 c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
while(U_SUCCESS(status)) {
while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
utext_next32(text); // TODO: cleaner loop structure.
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
if (current >= rangeEnd) {
break;
}
// We now have a dictionary character. Get the appropriate language object
// to deal with it.
const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
// Ask the language object if there are any breaks. It will add them to the cache and
// leave the text pointer on the other side of its range, ready to search for the next one.
if (lbe != NULL) {
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBI->fBreakType, *fBreaks);
}
// Reload the loop variables for the next go-round
c = utext_current32(text);
category = UTRIE2_GET16(fBI->fData->fTrie, c);
}
// If we found breaks, ensure that the first and last entries are
// the original starting and ending position. And initialize the
// cache iteration position to the first entry.
// printf("foundBreakCount = %d\n", foundBreakCount);
if (foundBreakCount > 0) {
U_ASSERT(foundBreakCount == fBreaks->size());
if (startPos < fBreaks->elementAti(0)) {
// The dictionary did not place a boundary at the start of the segment of text.
// Add one now. This should not commonly happen, but it would be easy for interactions
// of the rules for dictionary segments and the break engine implementations to
// inadvertently cause it. Cover it here, just in case.
fBreaks->insertElementAt(startPos, 0, status);
}
if (endPos > fBreaks->peeki()) {
fBreaks->push(endPos, status);
}
fPositionInCache = 0;
// Note: Dictionary matching may extend beyond the original limit.
fStart = fBreaks->elementAti(0);
fLimit = fBreaks->peeki();
} else {
// there were no language-based breaks, even though the segment contained
// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
// for this range will fail, and the calling code will fall back to the rule based boundaries.
}
}
/*
* BreakCache implemetation
*/
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
fBI(bi), fSideBuffer(status) {
reset();
}
RuleBasedBreakIterator::BreakCache::~BreakCache() {
}
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
fStartBufIdx = 0;
fEndBufIdx = 0;
fTextIdx = pos;
fBufIdx = 0;
fBoundaries[0] = pos;
fStatuses[0] = (uint16_t)ruleStatus;
}
int32_t RuleBasedBreakIterator::BreakCache::current() {
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
fBI->fDone = FALSE;
return fTextIdx;
}
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
// startPos is in the cache. Do a next() from that position.
// TODO: an awkward set of interactions with bi->fDone
// seek() does not clear it; it can't because of interactions with populateNear().
// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
fBI->fDone = false;
next();
}
return;
}
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
if (startPos == fTextIdx) {
previous(status);
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
// if the requested position is between two bounaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
U_ASSERT(startPos > fTextIdx);
current();
}
}
return;
}
/*
* Out-of-line code for BreakCache::next().
* Cache does not already contain the boundary
*/
void RuleBasedBreakIterator::BreakCache::nextOL() {
fBI->fDone = !populateFollowing();
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
int32_t initialBufIdx = fBufIdx;
if (fBufIdx == fStartBufIdx) {
// At start of cache. Prepend to it.
populatePreceding(status);
} else {
// Cache already holds the next boundary
fBufIdx = modChunkSize(fBufIdx - 1);
fTextIdx = fBoundaries[fBufIdx];
}
fBI->fDone = (fBufIdx == initialBufIdx);
fBI->fPosition = fTextIdx;
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
return;
}
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
return FALSE;
}
if (pos == fBoundaries[fStartBufIdx]) {
// Common case: seek(0), from BreakIterator::first()
fBufIdx = fStartBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
if (pos == fBoundaries[fEndBufIdx]) {
fBufIdx = fEndBufIdx;
fTextIdx = fBoundaries[fBufIdx];
return TRUE;
}
int32_t min = fStartBufIdx;
int32_t max = fEndBufIdx;
while (min != max) {
int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
probe = modChunkSize(probe);
if (fBoundaries[probe] > pos) {
max = probe;
} else {
min = modChunkSize(probe + 1);
}
}
U_ASSERT(fBoundaries[max] > pos);
fBufIdx = modChunkSize(max - 1);
fTextIdx = fBoundaries[fBufIdx];
U_ASSERT(fTextIdx <= pos);
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
// Find a boundary somewhere in the vicinity of the requested position.
// Depending on the safe rules and the text data, it could be either before, at, or after
// the requested position.
// If the requested position is not near already cached positions, clear the existing cache,
// find a near-by boundary and begin new cache contents there.
if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
int32_t aBoundary = 0;
int32_t ruleStatusIndex = 0;
// TODO: check for position == length of text. Although may still need to back up to get rule status.
if (position > 20) {
int32_t backupPos = fBI->handlePrevious(position);
fBI->fPosition = backupPos;
aBoundary = fBI->handleNext(); // Ignore dictionary, just finding a rule based boundary.
ruleStatusIndex = fBI->fRuleStatusIndex;
}
reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
}
// Fill in boundaries between existing cache content and the new requested position.
if (fBoundaries[fEndBufIdx] < position) {
// The last position in the cache precedes the requested position.
// Add following position(s) to the cache.
while (fBoundaries[fEndBufIdx] < position) {
if (!populateFollowing()) {
U_ASSERT(false);
return false;
}
}
fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
previous(status);
}
return true;
}
if (fBoundaries[fStartBufIdx] > position) {
// The first position in the cache is beyond the requested position.
// back up more until we get a boundary <= the requested position.
while (fBoundaries[fStartBufIdx] > position) {
populatePreceding(status);
}
fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
next();
}
if (fTextIdx > position) {
// If position is not itself a boundary, the next() loop above will overshoot.
// Back up one, leaving cache position at the boundary preceding the requested position.
previous(status);
}
return true;
}
U_ASSERT(fTextIdx == position);
return true;
}
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
int32_t fromPosition = fBoundaries[fEndBufIdx];
int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
int32_t pos = 0;
int32_t ruleStatusIdx = 0;
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
}
fBI->fPosition = fromPosition;
pos = fBI->handleNext();
if (pos == UBRK_DONE) {
return FALSE;
}
ruleStatusIdx = fBI->fRuleStatusIndex;
if (fBI->fDictionaryCharCount > 0) {
// The text segment obtained from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
return TRUE;
// TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
// But be careful with interactions with populateNear().
}
}
// Rule based segment did not include dictionary characters.
// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
// meaning that we didn't take the return, above.
// Add its end point to the cache.
addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
//
for (int count=0; count<6; ++count) {
pos = fBI->handleNext();
if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
break;
}
addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
}
return TRUE;
}
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
if (U_FAILURE(status)) {
return FALSE;
}
int32_t fromPosition = fBoundaries[fStartBufIdx];
if (fromPosition == 0) {
return FALSE;
}
int32_t position = 0;
int32_t positionStatusIdx = 0;
if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
addPreceding(position, positionStatusIdx, UpdateCachePosition);
return TRUE;
}
int32_t backupPosition = fromPosition;
// Find a boundary somewhere preceding the first already-cached boundary
do {
backupPosition = backupPosition - 30;
if (backupPosition <= 0) {
backupPosition = 0;
} else {
backupPosition = fBI->handlePrevious(backupPosition);
}
if (backupPosition == UBRK_DONE || backupPosition == 0) {
position = 0;
positionStatusIdx = 0;
} else {
fBI->fPosition = backupPosition; // TODO: pass starting position in a clearer way.
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
}
} while (position >= fromPosition);
// Find boundaries between the one we just located and the first already-cached boundary
// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
fSideBuffer.removeAllElements();
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
do {
int32_t prevPosition = fBI->fPosition = position;
int32_t prevStatusIdx = positionStatusIdx;
position = fBI->handleNext();
positionStatusIdx = fBI->fRuleStatusIndex;
if (position == UBRK_DONE) {
break;
}
UBool segmentHandledByDictionary = FALSE;
if (fBI->fDictionaryCharCount != 0) {
// Segment from the rules includes dictionary characters.
// Subdivide it, with subdivided results going into the dictionary cache.
int32_t dictSegEndPosition = position;
fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
segmentHandledByDictionary = true;
U_ASSERT(position > prevPosition);
if (position >= fromPosition) {
break;
}
U_ASSERT(position <= dictSegEndPosition);
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
prevPosition = position;
}
U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
}
if (!segmentHandledByDictionary && position < fromPosition) {
fSideBuffer.addElement(position, status);
fSideBuffer.addElement(positionStatusIdx, status);
}
} while (position < fromPosition);
// Move boundaries from the side buffer to the main circular buffer.
UBool success = FALSE;
if (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
addPreceding(position, positionStatusIdx, UpdateCachePosition);
success = TRUE;
}
while (!fSideBuffer.isEmpty()) {
positionStatusIdx = fSideBuffer.popi();
position = fSideBuffer.popi();
if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
// No space in circular buffer to hold a new preceding result while
// also retaining the current cache (iteration) position.
// Bailing out is safe; the cache will refill again if needed.
break;
}
}
return success;
}
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position > fBoundaries[fEndBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
if (nextIdx == fStartBufIdx) {
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fEndBufIdx = nextIdx;
if (update == UpdateCachePosition) {
// Set current position to the newly added boundary.
fBufIdx = nextIdx;
fTextIdx = position;
} else {
// Retaining the original cache position.
// Check if the added boundary wraps around the buffer, and would over-write the original position.
// It's the responsibility of callers of this function to not add too many.
U_ASSERT(nextIdx != fBufIdx);
}
}
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
U_ASSERT(position < fBoundaries[fStartBufIdx]);
U_ASSERT(ruleStatusIdx <= UINT16_MAX);
int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
if (nextIdx == fEndBufIdx) {
if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
// Failure. The insertion of the new boundary would claim the buffer position that is the
// current iteration position. And we also want to retain the current iteration position.
// (The buffer is already completely full of entries that precede the iteration position.)
return false;
}
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
}
fBoundaries[nextIdx] = position;
fStatuses[nextIdx] = ruleStatusIdx;
fStartBufIdx = nextIdx;
if (update == UpdateCachePosition) {
fBufIdx = nextIdx;
fTextIdx = position;
}
return true;
}
void RuleBasedBreakIterator::BreakCache::dumpCache() {
#ifdef RBBI_DEBUG
RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
if (i == fEndBufIdx) {
break;
}
}
#endif
}
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION

Просмотреть файл

@ -0,0 +1,203 @@
// Copyright (C) 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// file: rbbi_cache.h
//
#ifndef RBBI_CACHE_H
#define RBBI_CACHE_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/rbbi.h"
#include "unicode/uobject.h"
#include "uvectr32.h"
U_NAMESPACE_BEGIN
/* DictionaryCache stores the boundaries obtained from a run of dictionary characters.
* Dictionary boundaries are moved first to this cache, then from here
* to the main BreakCache, where they may inter-leave with non-dictionary
* boundaries. The public BreakIterator API always fetches directly
* from the main BreakCache, not from here.
*
* In common situations, the number of boundaries in a single dictionary run
* should be quite small, it will be terminated by punctuation, spaces,
* or any other non-dictionary characters. The main BreakCache may end
* up with boundaries from multiple dictionary based runs.
*
* The boundaries are stored in a simple ArrayList (vector), with the
* assumption that they will be accessed sequentially.
*/
class RuleBasedBreakIterator::DictionaryCache: public UMemory {
public:
DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
~DictionaryCache();
void reset();
UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
/**
* Populate the cache with the dictionary based boundaries within a region of text.
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param firstRuleStatus The rule status index that applies to the break at startPos
* @param otherRuleStatus The rule status index that applies to boundaries other than startPos
* @internal
*/
void populateDictionary(int32_t startPos, int32_t endPos,
int32_t firstRuleStatus, int32_t otherRuleStatus);
RuleBasedBreakIterator *fBI;
UVector32 *fBreaks; // A vector containing the boundaries.
int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following()
// or preceding(). Optimizes sequential access.
int32_t fStart; // Text position of first boundary in cache.
int32_t fLimit; // Last boundary in cache. Which is the limit of the
// text segment being handled by the dictionary.
int32_t fFirstRuleStatusIndex; // Rule status info for first boundary.
int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries.
};
/*
* class BreakCache
*
* Cache of break boundary positions and rule status values.
* Break iterator API functions, next(), previous(), etc., will use cached results
* when possible, and otherwise cache new results as they are obtained.
*
* Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
*
* The cache is implemented as a single circular buffer.
*/
/*
* size of the circular cache buffer.
*/
class RuleBasedBreakIterator::BreakCache: public UMemory {
public:
BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
virtual ~BreakCache();
void reset(int32_t pos = 0, int32_t ruleStatus = 0);
void next() { if (fBufIdx == fEndBufIdx) {
nextOL();
} else {
fBufIdx = modChunkSize(fBufIdx + 1);
fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
fBI->fRuleStatusIndex = fStatuses[fBufIdx];
}
};
void nextOL();
void previous(UErrorCode &status);
// Move the iteration state to the position following the startPosition.
// Input position must be pinned to the input length.
void following(int32_t startPosition, UErrorCode &status);
void preceding(int32_t startPosition, UErrorCode &status);
/*
* Update the state of the public BreakIterator (fBI) to reflect the
* current state of the break iterator cache (this).
*/
int32_t current();
/**
* Add boundaries to the cache near the specified position.
* The given position need not be a boundary itself.
* The input position must be within the range of the text, and
* on a code point boundary.
* If the requested position is a break boundary, leave the iteration
* position on it.
* If the requested position is not a boundary, leave the iteration
* position on the preceding boundary and include both the the
* preceding and following boundaries in the cache.
* Additional boundaries, either preceding or following, may be added
* to the cache as a side effect.
*
* Return FALSE if the operation failed.
*/
UBool populateNear(int32_t position, UErrorCode &status);
/**
* Add boundary(s) to the cache following the current last boundary.
* Return FALSE if at the end of the text, and no more boundaries can be added.
* Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
*/
UBool populateFollowing();
/**
* Add one or more boundaries to the cache preceding the first currently cached boundary.
* Leave the iteration position on the first added boundary.
* Return false if no boundaries could be added (if at the start of the text.)
*/
UBool populatePreceding(UErrorCode &status);
enum UpdatePositionValues {
RetainCachePosition = 0,
UpdateCachePosition = 1
};
/*
* Add the boundary following the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/*
* Add the boundary preceding the current position.
* The current position can be left as it was, or changed to the newly added boundary,
* as specified by the update parameter.
*/
bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
/**
* Set the cache position to the specified position, or, if the position
* falls between to cached boundaries, to the preceding boundary.
* Fails if the requested position is outside of the range of boundaries currently held by the cache.
* The startPosition must be on a code point boundary.
*
* Return TRUE if successful, FALSE if the specified position is after
* the last cached boundary or before the first.
*/
UBool seek(int32_t startPosition);
void dumpCache();
private:
static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); };
static constexpr int32_t CACHE_SIZE = 128;
static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
RuleBasedBreakIterator *fBI;
int32_t fStartBufIdx;
int32_t fEndBufIdx; // inclusive
int32_t fTextIdx;
int32_t fBufIdx;
int32_t fBoundaries[CACHE_SIZE];
uint16_t fStatuses[CACHE_SIZE];
UVector32 fSideBuffer;
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_BREAK_ITERATION
#endif // RBBI_CACHE_H

Просмотреть файл

@ -14,7 +14,7 @@
#include "unicode/utypes.h"
#include "rbbidata.h"
#include "rbbirb.h"
#include "utrie.h"
#include "utrie2.h"
#include "udatamem.h"
#include "cmemory.h"
#include "cstring.h"
@ -23,23 +23,6 @@
#include "uassert.h"
//-----------------------------------------------------------------------------------
//
// Trie access folding function. Copied as-is from properties code in uchar.c
//
//-----------------------------------------------------------------------------------
U_CDECL_BEGIN
static int32_t U_CALLCONV
getFoldingOffset(uint32_t data) {
/* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
if(data&0x8000) {
return (int32_t)(data&0x7fff);
} else {
return 0;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//-----------------------------------------------------------------------------
@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk "
dh->info.dataFormat[1] == 0x72 &&
dh->info.dataFormat[2] == 0x6b &&
dh->info.dataFormat[3] == 0x20)
// Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
// validated when checking that.
dh->info.dataFormat[3] == 0x20 &&
isDataVersionAcceptable(dh->info.formatVersion))
) {
status = U_INVALID_FORMAT_ERROR;
return;
@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
fUDataMem = udm;
}
UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
return RBBI_DATA_FORMAT_VERSION[0] == version[0];
}
//-----------------------------------------------------------------------------
//
// init(). Does most of the work of construction, shared between the
@ -96,10 +83,11 @@ void RBBIDataWrapper::init0() {
fReverseTable = NULL;
fSafeFwdTable = NULL;
fSafeRevTable = NULL;
fRuleSource = NULL;
fRuleSource = NULL;
fRuleStatusTable = NULL;
fUDataMem = NULL;
fRefCount = 0;
fTrie = NULL;
fUDataMem = NULL;
fRefCount = 0;
fDontFreeData = TRUE;
}
@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
return;
}
fHeader = data;
if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3)
{
if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
status = U_INVALID_FORMAT_ERROR;
return;
}
@ -131,16 +118,23 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable);
}
// Rule Compatibility Hacks
// If a rule set includes reverse rules but does not explicitly include safe reverse rules,
// the reverse rules are to be treated as safe reverse rules.
utrie_unserialize(&fTrie,
(uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen,
&status);
if (fSafeRevTable == NULL && fReverseTable != NULL) {
fSafeRevTable = fReverseTable;
fReverseTable = NULL;
}
fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
(uint8_t *)data + fHeader->fTrie,
fHeader->fTrieLen,
NULL, // *actual length
&status);
if (U_FAILURE(status)) {
return;
}
fTrie.getFoldingOffset=getFoldingOffset;
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
fRuleString.setTo(TRUE, fRuleSource, -1);
@ -165,6 +159,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
//-----------------------------------------------------------------------------
RBBIDataWrapper::~RBBIDataWrapper() {
U_ASSERT(fRefCount == 0);
utrie2_close(fTrie);
fTrie = NULL;
if (fUDataMem) {
udata_close(fUDataMem);
} else if (!fDontFreeData) {
@ -323,7 +319,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
pInfo->dataFormat[1]==0x72 &&
pInfo->dataFormat[2]==0x6b &&
pInfo->dataFormat[3]==0x20 &&
pInfo->formatVersion[0]==3 )) {
RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
pInfo->dataFormat[0], pInfo->dataFormat[1],
pInfo->dataFormat[2], pInfo->dataFormat[3],
@ -344,17 +340,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
//
// Get the RRBI Data Header, and check that it appears to be OK.
//
// Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually
// an int32_t with a value of 1. Starting with ICU 3.4,
// RBBI's fDataFormat matches the dataFormat field from the
// UDataInfo header, four int8_t bytes. The value is {3,1,0,0}
//
const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 ||
rbbiDH->fFormatVersion[0] != 3 ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader))
{
!RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) {
udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
*status=U_UNSUPPORTED_ERROR;
return 0;
@ -451,8 +441,8 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
}
// Trie table for character categories
utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
outBytes+ds->readUInt32(rbbiDH->fTrie), status);
// Source Rules Text. It's UChar data
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),

Просмотреть файл

@ -51,22 +51,23 @@ ubrk_swap(const UDataSwapper *ds,
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/uversion.h"
#include "umutex.h"
#include "utrie.h"
#include "utrie2.h"
U_NAMESPACE_BEGIN
// The current RBBI data format version.
static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
/*
* The following structs map exactly onto the raw data from ICU common data file.
*/
struct RBBIDataHeader {
uint32_t fMagic; /* == 0xbla0 */
uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
/* if there is one associated with this data. */
/* (version originates in rbbi, is copied to UDataInfo) */
/* For ICU 3.2 and earlier, this field was */
/* uint32_t fVersion */
/* with a value of 1. */
uint32_t fLength; /* Total length in bytes of this RBBI Data, */
/* including all sections, not just the header. */
uint32_t fCatCount; /* Number of character categories. */
@ -152,6 +153,8 @@ public:
RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
~RBBIDataWrapper();
static UBool isDataVersionAcceptable(const UVersionInfo version);
void init0();
void init(const RBBIDataHeader *data, UErrorCode &status);
RBBIDataWrapper *addReference();
@ -181,11 +184,11 @@ public:
/* number of int32_t values in the rule status table. Used to sanity check indexing */
int32_t fStatusMaxIdx;
UTrie fTrie;
UTrie2 *fTrie;
private:
u_atomic_int32_t fRefCount;
UDataMemory *fUDataMem;
UDataMemory *fUDataMem;
UnicodeString fRuleString;
UBool fDontFreeData;

Просмотреть файл

@ -24,16 +24,16 @@
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
#include "cmemory.h"
#include "cstring.h"
#include "rbbirb.h"
#include "rbbinode.h"
#include "rbbiscan.h"
#include "rbbisetb.h"
#include "rbbitblb.h"
#include "rbbidata.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
@ -164,8 +164,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
+ safeFwdTableSize + safeRevTableSize
(void)safeFwdTableSize;
int32_t totalSize = headerSize
+ forwardTableSize
+ /* reverseTableSize */ 0
+ /* safeFwdTableSize */ 0
+ (safeRevTableSize ? safeRevTableSize : reverseTableSize)
+ statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
@ -177,23 +182,45 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fMagic = 0xb1a0;
data->fFormatVersion[0] = 3;
data->fFormatVersion[1] = 1;
data->fFormatVersion[2] = 0;
data->fFormatVersion[3] = 0;
data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
// Only save the forward table and the safe reverse table,
// because these are the only ones used at run-time.
//
// For the moment, we still build the other tables if they are present in the rule source files,
// for backwards compatibility. Old rule files need to work, and this is the simplest approach.
//
// Additional backwards compatibility consideration: if no safe rules are provided, consider the
// reverse rules to actually be the safe reverse rules.
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = reverseTableSize;
data->fSFTable = data->fRTable + reverseTableSize;
data->fSFTableLen = safeFwdTableSize;
data->fSRTable = data->fSFTable + safeFwdTableSize;
data->fSRTableLen = safeRevTableSize;
data->fTrie = data->fSRTable + safeRevTableSize;
// Do not save Reverse Table.
data->fRTable = data->fFTable + forwardTableSize;
data->fRTableLen = 0;
// Do not save the Safe Forward table.
data->fSFTable = data->fRTable + 0;
data->fSFTableLen = 0;
data->fSRTable = data->fSFTable + 0;
if (safeRevTableSize > 0) {
data->fSRTableLen = safeRevTableSize;
} else if (reverseTableSize > 0) {
data->fSRTableLen = reverseTableSize;
} else {
U_ASSERT(FALSE); // Rule build should have failed for lack of a reverse table
// before reaching this point.
}
data->fTrie = data->fSRTable + data->fSRTableLen;
data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
@ -203,9 +230,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
fForwardTables->exportTable((uint8_t *)data + data->fFTable);
fReverseTables->exportTable((uint8_t *)data + data->fRTable);
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
// fReverseTables->exportTable((uint8_t *)data + data->fRTable);
// fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
if (safeRevTableSize > 0) {
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
} else {
fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
}
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);

Просмотреть файл

@ -15,6 +15,9 @@
#define RBBIRB_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
@ -207,6 +210,9 @@ struct RBBISetTableEl {
#endif
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif

Просмотреть файл

@ -47,6 +47,7 @@
//
//------------------------------------------------------------------------------
static const UChar gRuleSet_rule_char_pattern[] = {
// Characters that may appear as literals in patterns without escaping or quoting.
// [ ^ [ \ p { Z } \ u 0 0 2 0
0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30,
// - \ u 0 0 7 f ] - [ \ p
@ -558,6 +559,10 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
fRB->fDefaultTree = &fRB->fSafeRevTree;
} else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) {
fRB->fLookAheadHardBreak = TRUE;
} else if (opt == UNICODE_STRING("quoted_literals_only", 20)) {
fRuleSets[kRuleSet_rule_char-128].clear();
} else if (opt == UNICODE_STRING("unquoted_literals", 17)) {
fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus);
} else {
error(U_BRK_UNRECOGNIZED_OPTION);
}

Просмотреть файл

@ -35,7 +35,7 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uniset.h"
#include "utrie.h"
#include "utrie2.h"
#include "uvector.h"
#include "uassert.h"
#include "cmemory.h"
@ -44,43 +44,6 @@
#include "rbbisetb.h"
#include "rbbinode.h"
//------------------------------------------------------------------------
//
// getFoldedRBBIValue Call-back function used during building of Trie table.
// Folding value: just store the offset (16 bits)
// if there is any non-0 entry.
// (It'd really be nice if the Trie builder would provide a
// simple default, so this function could go away from here.)
//
//------------------------------------------------------------------------
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
U_CDECL_BEGIN
static uint32_t U_CALLCONV
getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
uint32_t value;
UChar32 limit;
UBool inBlockZero;
limit=start+0x400;
while(start<limit) {
value=utrie_get32(trie, start, &inBlockZero);
if(inBlockZero) {
start+=UTRIE_DATA_BLOCK_LENGTH;
} else if(value!=0) {
return (uint32_t)(offset|0x8000);
} else {
++start;
}
}
return 0;
}
U_CDECL_END
U_NAMESPACE_BEGIN
//------------------------------------------------------------------------
@ -116,7 +79,7 @@ RBBISetBuilder::~RBBISetBuilder()
delete r;
}
utrie_close(fTrie);
utrie2_close(fTrie);
}
@ -287,33 +250,38 @@ void RBBISetBuilder::build() {
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number
//
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in
NULL, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately
fTrie = utrie2_open(0, // Initial value for all code points.
0, // Error value for out-of-range input.
fStatus);
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) {
utrie2_setRange32(fTrie,
rlRange->fStartChar, // Range start
rlRange->fEndChar, // Range end (inclusive)
rlRange->fNum, // value for range
TRUE, // Overwrite previously written values
fStatus);
}
}
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
//
//-----------------------------------------------------------------------------------
int32_t RBBISetBuilder::getTrieSize() /*const*/ {
fTrieSize = utrie_serialize(fTrie,
NULL, // Buffer
0, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus);
int32_t RBBISetBuilder::getTrieSize() {
if (U_FAILURE(*fStatus)) {
return 0;
}
utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
fTrieSize = utrie2_serialize(fTrie,
NULL, // Buffer
0, // Capacity
fStatus);
if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
*fStatus = U_ZERO_ERROR;
}
// RBBIDebugPrintf("Trie table size is %d\n", trieSize);
return fTrieSize;
}
@ -327,12 +295,10 @@ int32_t RBBISetBuilder::getTrieSize() /*const*/ {
//
//-----------------------------------------------------------------------------------
void RBBISetBuilder::serializeTrie(uint8_t *where) {
utrie_serialize(fTrie,
where, // Buffer
fTrieSize, // Capacity
getFoldedRBBIValue,
TRUE, // Reduce to 16 bits
fStatus);
utrie2_serialize(fTrie,
where, // Buffer
fTrieSize, // Capacity
fStatus);
}
//------------------------------------------------------------------------

Просмотреть файл

@ -13,12 +13,14 @@
#define RBBISETB_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/uobject.h"
#include "rbbirb.h"
#include "utrie2.h"
#include "uvector.h"
struct UNewTrie;
U_NAMESPACE_BEGIN
//
@ -109,8 +111,8 @@ private:
RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
uint32_t fTrieSize; // the Unicode Sets.
UTrie2 *fTrie; // The mapping TRIE that is the end result of processing
uint32_t fTrieSize; // the Unicode Sets.
// Groups correspond to character categories -
// groups of ranges that are in the same original UnicodeSets.
@ -129,4 +131,7 @@ private:
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif

Просмотреть файл

@ -11,37 +11,37 @@
#ifdef INCLUDED_FROM_UBIDI_PROPS_C
static const UVersionInfo ubidi_props_dataVersion={9,0,0,0};
static const UVersionInfo ubidi_props_dataVersion={0xa,0,0,0};
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6060,0x5ce8,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x5802b6};
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6028,0x5cb0,0x1a,0x620,0x8c0,0x10ac0,0x10af0,0,0,0,0,0,0,0,0x6302b6};
static const uint16_t ubidi_props_trieIndex[11884]={
static const uint16_t ubidi_props_trieIndex[11856]={
0x36a,0x372,0x37a,0x382,0x39a,0x3a2,0x3aa,0x3b2,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x38a,0x392,
0x38a,0x392,0x38a,0x392,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,0x3dc,0x3e4,0x3ec,0x3f4,0x3ef,0x3f7,
0x38a,0x392,0x38a,0x392,0x3ff,0x407,0x38a,0x392,0x38a,0x392,0x38a,0x392,0x40d,0x415,0x41d,0x425,
0x42d,0x435,0x43d,0x445,0x44b,0x453,0x45b,0x463,0x46b,0x473,0x479,0x481,0x489,0x491,0x499,0x4a1,
0x4ad,0x4a9,0x4b5,0x4bd,0x41f,0x4cd,0x4d5,0x4c5,0x4dd,0x4df,0x4e7,0x4ef,0x4f7,0x4f8,0x500,0x508,
0x510,0x4f8,0x518,0x51d,0x510,0x4f8,0x525,0x52d,0x4f7,0x535,0x53d,0x4ef,0x542,0x38a,0x54a,0x54e,
0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x57f,0x581,0x500,0x4ef,0x38a,0x38a,0x589,0x38a,
0x38a,0x58f,0x597,0x38a,0x38a,0x59b,0x5a3,0x38a,0x5a7,0x5ae,0x38a,0x5b6,0x5be,0x5c5,0x541,0x38a,
0x38a,0x5cd,0x5d5,0x5dd,0x5e5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5ed,0x38a,0x5f5,0x38a,0x38a,0x38a,
0x5fd,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x605,0x38a,0x38a,0x38a,0x60d,0x60d,0x504,0x504,0x38a,0x613,0x61b,0x5f5,
0x631,0x623,0x623,0x639,0x640,0x629,0x38a,0x38a,0x38a,0x648,0x650,0x38a,0x38a,0x38a,0x652,0x65a,
0x662,0x38a,0x669,0x671,0x38a,0x679,0x38a,0x38a,0x681,0x684,0x542,0x68c,0x401,0x694,0x38a,0x69b,
0x38a,0x6a0,0x38a,0x38a,0x38a,0x38a,0x6a6,0x6ae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6b6,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6be,0x6c6,0x6ca,
0x6e2,0x6e8,0x6d2,0x6da,0x6f0,0x6f8,0x6fc,0x5c8,0x704,0x70c,0x714,0x38a,0x71c,0x65a,0x65a,0x65a,
0x72c,0x734,0x73c,0x744,0x749,0x751,0x759,0x724,0x761,0x769,0x38a,0x76f,0x776,0x65a,0x65a,0x65a,
0x65a,0x56d,0x77c,0x65a,0x784,0x38a,0x38a,0x657,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,
0x65a,0x65a,0x65a,0x65a,0x65a,0x78c,0x65a,0x65a,0x65a,0x65a,0x65a,0x792,0x65a,0x65a,0x79a,0x7a2,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a,0x65a,0x65a,0x7b2,0x7b9,0x7c1,0x7aa,
0x7d1,0x7d9,0x7e1,0x7e8,0x7f0,0x7f8,0x7ff,0x7c9,0x65a,0x65a,0x65a,0x807,0x80d,0x813,0x81b,0x820,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x827,0x38a,0x38a,0x38a,0x82f,0x38a,0x38a,0x38a,0x3d8,
0x837,0x83f,0x76c,0x38a,0x842,0x65a,0x65a,0x65d,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x849,0x84f,
0x85f,0x857,0x38a,0x38a,0x867,0x5fd,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x82e,
0x3bf,0x38a,0x86f,0x877,0x38a,0x87f,0x820,0x38a,0x38a,0x38a,0x38a,0x887,0x38a,0x38a,0x652,0x3b0,
0x556,0x557,0x55f,0x567,0x4f7,0x56f,0x577,0x4ef,0x401,0x57b,0x500,0x4ef,0x38a,0x38a,0x583,0x38a,
0x38a,0x589,0x591,0x38a,0x38a,0x595,0x59d,0x38a,0x5a1,0x5a8,0x38a,0x5b0,0x5b8,0x5bf,0x541,0x38a,
0x38a,0x5c7,0x5cf,0x5d7,0x5df,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x5e7,0x38a,0x5ef,0x38a,0x38a,0x38a,
0x5f7,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x5ff,0x38a,0x38a,0x38a,0x607,0x607,0x504,0x504,0x38a,0x60d,0x615,0x5ef,
0x62b,0x61d,0x61d,0x633,0x63a,0x623,0x38a,0x38a,0x38a,0x642,0x64a,0x38a,0x38a,0x38a,0x64c,0x654,
0x65c,0x38a,0x663,0x66b,0x38a,0x673,0x38a,0x38a,0x534,0x67b,0x542,0x683,0x401,0x68b,0x38a,0x692,
0x38a,0x697,0x38a,0x38a,0x38a,0x38a,0x69d,0x6a5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0x6ad,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x6b5,0x6bd,0x6c1,
0x6d9,0x6df,0x6c9,0x6d1,0x6e7,0x6ef,0x6f3,0x5c2,0x6fb,0x703,0x70b,0x38a,0x713,0x654,0x654,0x654,
0x723,0x72b,0x733,0x73b,0x740,0x748,0x750,0x71b,0x758,0x760,0x38a,0x766,0x76d,0x654,0x654,0x654,
0x654,0x56d,0x773,0x654,0x77b,0x38a,0x38a,0x651,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x654,0x654,0x654,0x654,0x654,0x783,0x654,0x654,0x654,0x654,0x654,0x789,0x654,0x654,0x791,0x799,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,0x654,0x654,0x7a9,0x7b0,0x7b8,0x7a1,
0x7c8,0x7d0,0x7d8,0x7df,0x7e7,0x7ef,0x7f6,0x7c0,0x654,0x654,0x654,0x7fe,0x804,0x80a,0x812,0x817,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x81e,0x38a,0x38a,0x38a,0x826,0x38a,0x38a,0x38a,0x3d8,
0x82e,0x836,0x763,0x38a,0x839,0x654,0x654,0x657,0x654,0x654,0x654,0x654,0x654,0x654,0x840,0x846,
0x856,0x84e,0x38a,0x38a,0x85e,0x5f7,0x38a,0x3b1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x825,
0x3bf,0x38a,0x866,0x86e,0x38a,0x876,0x817,0x38a,0x38a,0x38a,0x38a,0x87e,0x38a,0x38a,0x64c,0x3b0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -54,7 +54,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,0x65a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,0x654,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -98,10 +98,10 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x86f,0x65a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x88e,0x38a,0x38a,0x893,0x557,0x38a,0x38a,0x5a9,0x65a,0x651,0x38a,0x38a,0x89b,0x38a,0x38a,0x38a,
0x8a3,0x8aa,0x623,0x8b2,0x38a,0x38a,0x8b9,0x8c1,0x38a,0x8c8,0x8cf,0x38a,0x4dd,0x8d4,0x38a,0x4f6,
0x38a,0x8dc,0x8e4,0x4f8,0x38a,0x8e8,0x4f7,0x8f0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8f7,
0x38a,0x38a,0x38a,0x38a,0x866,0x654,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x885,0x38a,0x38a,0x88a,0x557,0x38a,0x38a,0x5a3,0x654,0x64b,0x38a,0x38a,0x892,0x38a,0x38a,0x38a,
0x89a,0x8a1,0x61d,0x8a9,0x38a,0x38a,0x579,0x8b1,0x38a,0x8b8,0x8bf,0x38a,0x4dd,0x8c4,0x38a,0x4f6,
0x38a,0x8cc,0x8d4,0x4f8,0x38a,0x8d8,0x4f7,0x8e0,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8e7,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
@ -141,9 +141,9 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x90b,0x8ff,0x903,0x489,0x489,0x489,0x489,0x489,
0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x913,0x489,0x489,0x489,0x489,0x91b,0x91f,
0x927,0x92f,0x933,0x93b,0x489,0x489,0x489,0x93f,0x947,0x37a,0x94f,0x957,0x38a,0x38a,0x38a,0x95f,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x8fb,0x8ef,0x8f3,0x489,0x489,0x489,0x489,0x489,
0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x903,0x489,0x489,0x489,0x489,0x90b,0x90f,
0x917,0x91f,0x923,0x92b,0x489,0x489,0x489,0x92f,0x937,0x37a,0x93f,0x947,0x38a,0x38a,0x38a,0x94f,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0xe28,0xe28,0xe68,0xea8,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xee0,0xf20,0xf60,0xf70,0xfb0,0xfbc,
@ -180,61 +180,61 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd17,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x65a,0x65a,0x96f,0x5fd,0x38a,0x4f0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x977,0x38a,0x38a,0x38a,0x97e,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x957,0x38a,0x654,0x654,0x95f,0x5f7,0x38a,0x4f0,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x967,0x38a,0x38a,0x38a,0x96e,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x986,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x98e,0x992,0x41f,0x41f,0x41f,0x41f,0x9a2,0x99a,0x41f,0x9aa,0x41f,0x41f,0x9b2,0x9b8,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x976,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x97e,0x982,0x41f,0x41f,0x41f,0x41f,0x992,0x98a,0x41f,0x99a,0x41f,0x41f,0x9a2,0x9a8,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x9c0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x4f7,0x8bb,0x9c8,0x9cf,0x401,0x9d2,0x38a,0x38a,0x4dd,0x9da,0x38a,0x9e0,0x401,0x9e5,0x60f,0x38a,
0x38a,0x9ed,0x38a,0x38a,0x38a,0x38a,0x82f,0x9f5,0x401,0x4f8,0x556,0x9fc,0x38a,0x38a,0x38a,0x38a,
0x38a,0x8bb,0xa04,0x38a,0x38a,0xa08,0xa10,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa14,0xa1c,0x38a,
0x38a,0xa24,0x556,0xa2c,0x38a,0xa32,0x38a,0x38a,0x5ed,0xa3a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa42,0xa46,0xa4e,0x38a,0xa55,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa5c,0x38a,0x38a,0xa64,0xa6a,
0x38a,0x38a,0x38a,0xa70,0xa78,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa7c,0x38a,0xa82,0x38a,
0x41f,0x41f,0x41f,0x9b0,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x4f7,0x9b8,0x9bf,0x9c6,0x401,0x9c9,0x38a,0x38a,0x4dd,0x9d1,0x38a,0x9d7,0x401,0x9dc,0x609,0x38a,
0x38a,0x9e4,0x38a,0x38a,0x38a,0x38a,0x826,0x9ec,0x401,0x4f8,0x556,0x9f3,0x38a,0x38a,0x38a,0x38a,
0x38a,0x9b8,0x9fb,0x38a,0x38a,0x9ff,0xa07,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa0b,0xa13,0x38a,
0x38a,0xa1b,0x556,0xa23,0x38a,0xa29,0x38a,0x38a,0x5e7,0xa31,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa39,0xa3d,0xa45,0x38a,0xa4c,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa53,0x38a,0x38a,0xa61,0xa5b,
0x38a,0x38a,0x38a,0xa69,0xa71,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xa75,0x38a,0xa7b,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0xa88,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0xa81,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa90,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x511,0xa89,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0xa97,0xa9f,0xaa5,0x38a,0x38a,0x65a,0x65a,0xaad,0x38a,0x38a,0x38a,0x38a,0x38a,0x65a,
0x65a,0xab5,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xabb,0x38a,0xac2,
0x38a,0xabe,0x38a,0xac5,0x38a,0xacd,0xad1,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad9,0x3d8,0xae0,0xae7,0xaef,0x38a,0x38a,0x38a,
0x38a,0x38a,0xa90,0xa98,0xa9e,0x38a,0x38a,0x654,0x654,0xaa6,0x38a,0x38a,0x38a,0x38a,0x38a,0x654,
0x654,0xaae,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xab4,0x38a,0xabb,
0x38a,0xab7,0x38a,0xabe,0x38a,0xac6,0xaca,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x3d8,0xad2,0x3d8,0xad9,0xae0,0xae8,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf7,0xaff,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xaf0,0xaf8,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb07,0x41f,0xb0f,
0xb0f,0xb16,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0xb00,0x41f,0xb08,
0xb08,0xb0f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb1e,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x65a,0xb26,0x65a,0x65a,0x65d,0xb2b,0xb2f,0x849,0xb37,
0x38a,0x38a,0xb3d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x76d,0x38a,0x38a,0x38a,0x38a,0x65a,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,
0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0xb45,0xb4d,0x65a,
0x65a,0x65a,0x65d,0x65a,0x65a,0xb45,0x38a,0xb26,0x65a,0xb55,0x65a,0xb5d,0x84b,0x38a,0x38a,0xb26,
0xb61,0xb69,0x65f,0x65c,0x38a,0xb71,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0xb17,0x41f,
0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x41f,0x654,0xb1f,0x654,0x654,0x657,0xb24,0xb28,0x840,0xb30,
0x38a,0x38a,0xb36,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x764,0x38a,0x38a,0x38a,0x38a,0x654,
0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,
0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0x654,0xb3e,0xb46,0x654,
0x654,0x654,0x657,0x654,0x654,0xb3e,0x38a,0xb1f,0x654,0xb4e,0x654,0xb56,0x842,0x38a,0x38a,0xb1f,
0xb5a,0xb62,0x659,0x656,0x38a,0xb6a,0x56d,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb79,0xb89,0xb81,0xb81,0xb81,0xb8a,0xb8a,0xb8a,0xb8a,0x3d8,
0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb92,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,
0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0xb8a,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x38a,0x38a,0x38a,0x38a,0x38a,0x38a,0xb72,0xb82,0xb7a,0xb7a,0xb7a,0xb83,0xb83,0xb83,0xb83,0x3d8,
0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0xb8b,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,
0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0xb83,0x369,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,4,4,0xa,0xa,
0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,2,2,2,2,
@ -319,7 +319,7 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,
0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1,1,0xb1,0xb1,0xb1,
1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
1,1,1,1,0x4d,0xd,0x4d,0x4d,0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,
0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
@ -348,8 +348,8 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0x11,0x11,
0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
@ -367,215 +367,211 @@ static const uint16_t ubidi_props_trieIndex[11884]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0,0,0xa0,0,0,0,0,
0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x11,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x11,
0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,
0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,4,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x310a,
0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,
0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
0xa,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,
0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x11,0x11,
0x11,0x11,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,
0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,
2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,
9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,
4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,
0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0,
0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,
0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,
0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0x100a,0x100a,0xa,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,
0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,
0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,
0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,
0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,
0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,
0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,
0x100a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,
0xf00a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0,
0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,4,
0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,
0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0,0xa,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,
0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0,
0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,
0,0,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,0,0,0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815,0x813,0x816,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2,2,2,3,3,
0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9,9,9,9,0xb2,
0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6,4,4,4,4,
4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0xa,
0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0,0xa,0,0xa,0,0,0,0,4,0,0,0,0,0,
0,0,0,0,0,0,0xa,0xa,0,0,0,0,0x100a,0xa,0xa,0xa,
0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x900a,0x900a,
0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a,0xa,0x100a,0x100a,0x100a,
0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa,0xa,0x100a,3,4,
0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,
0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0x100a,0xa,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,
0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,
0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa,0x300a,0xf00a,0xa,0x500a,
0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,
0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,
0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,
0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,
0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,
0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,
0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,
0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x100a,
0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,
0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,
0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,
0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,
0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,
0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,
@ -642,123 +638,125 @@ static const uint16_t ubidi_props_trieIndex[11884]={
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,
0,0xb1,0xb1,0,0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,
0,0,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,
0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,
0x11,0,0,0x11,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0x11,
0,0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0,
0,0x11,0x11,0x11,0,0,0,0,0,0,0,0,0,0,0x11,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0x11,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0x11,0,0x11,0x11,0,0x11,
0x11,0x11,0x11,0x11,0x11,0x11,0,0x11,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0,0xa0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,
0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,
0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,
0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,
0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,
0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,
2,2,2,2,2,2,2,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,
0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,
0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0,
0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xb2,0xb2,0xb2,0xb2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,
0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,
0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,
0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,
0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,
0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
2,2,2,2,2,2,2,2,2,2,2,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,
0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,
0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0,0,0,0
};
static const uint32_t ubidi_props_mirrors[26]={
@ -803,7 +801,7 @@ static const uint8_t ubidi_props_jgArray[672]={
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@ -828,13 +826,13 @@ static const UBiDiProps ubidi_props_singleton={
ubidi_props_trieIndex+3496,
NULL,
3496,
8388,
8360,
0x1a0,
0xe28,
0x0,
0x0,
0x110000,
0x2e68,
0x2e4c,
NULL, 0, FALSE, FALSE, 0, NULL
},
{ 2,2,0,0 }

Просмотреть файл

@ -961,6 +961,7 @@ ucase_toFullLower(UChar32 c,
0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
*pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) {
/*
@ -1059,6 +1060,7 @@ toUpperOrTitle(UChar32 c,
0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
*/
*pString=nullptr;
return 0; /* remove the dot (continue without output) */
} else {
/* no known conditional special case mapping, use a normal mapping */

Просмотреть файл

@ -61,7 +61,7 @@ enum {
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case-insensitive string comparison.
* See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
* See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
* @internal
*/
#define _STRCASECMP_OPTIONS_MASK 0xffff
@ -69,10 +69,16 @@ enum {
/**
* Bit mask for getting just the options from a string compare options word
* that are relevant for case folding (of a single string or code point).
* See uchar.h.
*
* Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* It is conceivable that at some point we might use one more bit for using uppercase sharp s.
* It is conceivable that at some point we might want the option to use only simple case foldings
* when operating on strings.
*
* See stringoptions.h.
* @internal
*/
#define _FOLD_CASE_OPTIONS_MASK 0xff
#define _FOLD_CASE_OPTIONS_MASK 7
/* single-code point functions */

Просмотреть файл

@ -11,36 +11,36 @@
#ifdef INCLUDED_FROM_UCASE_CPP
static const UVersionInfo ucase_props_dataVersion={9,0,0,0};
static const UVersionInfo ucase_props_dataVersion={0xa,0,0,0};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6c6c,0x5a10,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x6dfc,0x5ba0,0x79c,0x172,0,0,0,0,0,0,0,0,0,0,3};
static const uint16_t ucase_props_trieIndex[11520]={
static const uint16_t ucase_props_trieIndex[11720]={
0x327,0x32f,0x337,0x33f,0x34d,0x355,0x35d,0x365,0x36d,0x375,0x37c,0x384,0x38c,0x394,0x39c,0x3a4,
0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3fa,0x402,0x40a,0x412,0x41a,0x422,
0x42a,0x432,0x43a,0x442,0x44a,0x452,0x45a,0x462,0x45e,0x466,0x46b,0x473,0x47a,0x482,0x48a,0x492,
0x49a,0x4a2,0x4aa,0x4b2,0x346,0x34e,0x4b7,0x4bf,0x4c4,0x4cc,0x4d4,0x4dc,0x4db,0x4e3,0x4e8,0x4f0,
0x4f7,0x4fe,0x502,0x346,0x346,0x327,0x512,0x50a,0x51a,0x51c,0x524,0x52c,0x530,0x531,0x539,0x541,
0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x541,0x530,0x562,0x56a,0x541,0x56f,0x346,0x577,0x346,
0x4a1,0x4dd,0x57f,0x541,0x530,0x562,0x586,0x541,0x530,0x346,0x539,0x541,0x346,0x346,0x58c,0x346,
0x346,0x592,0x599,0x346,0x346,0x59d,0x5a5,0x346,0x5a9,0x5b0,0x346,0x5b7,0x5bf,0x5c6,0x5ce,0x346,
0x346,0x5d3,0x5db,0x5e3,0x5eb,0x5f3,0x5fb,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x5ff,0x346,0x346,0x60f,0x617,0x607,
0x549,0x531,0x551,0x556,0x549,0x531,0x55e,0x566,0x530,0x56e,0x576,0x541,0x57b,0x346,0x583,0x346,
0x4a1,0x4dd,0x58b,0x541,0x530,0x56e,0x592,0x541,0x59a,0x59c,0x539,0x541,0x346,0x346,0x5a4,0x346,
0x346,0x5aa,0x5b1,0x346,0x346,0x5b5,0x5bd,0x346,0x5c1,0x5c8,0x346,0x5cf,0x5d7,0x5de,0x5e6,0x346,
0x346,0x5eb,0x5f3,0x5fb,0x603,0x60b,0x613,0x490,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x617,0x346,0x346,0x627,0x62f,0x61f,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x61f,0x61f,0x53d,0x53d,0x346,0x625,0x62d,0x346,
0x635,0x346,0x63d,0x346,0x548,0x643,0x346,0x346,0x346,0x64b,0x346,0x346,0x346,0x346,0x346,0x346,
0x652,0x346,0x659,0x661,0x346,0x669,0x346,0x346,0x671,0x674,0x67c,0x682,0x68a,0x692,0x346,0x699,
0x346,0x69e,0x346,0x6a4,0x6ac,0x346,0x6b0,0x6b8,0x6c0,0x6c5,0x6c8,0x6d0,0x6e0,0x6d8,0x6f0,0x6e8,
0x36d,0x6f8,0x36d,0x700,0x703,0x36d,0x70b,0x36d,0x713,0x71b,0x723,0x72b,0x733,0x73b,0x743,0x74b,
0x753,0x75a,0x346,0x762,0x76a,0x346,0x772,0x77a,0x782,0x78a,0x792,0x79a,0x7a2,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x637,0x637,0x53d,0x53d,0x346,0x63d,0x645,0x346,
0x64d,0x346,0x655,0x346,0x548,0x65b,0x346,0x346,0x346,0x663,0x346,0x346,0x346,0x346,0x346,0x346,
0x66a,0x346,0x671,0x679,0x346,0x681,0x346,0x346,0x56d,0x689,0x691,0x697,0x59a,0x69f,0x346,0x6a6,
0x346,0x6ab,0x346,0x6b1,0x6b9,0x346,0x6bd,0x6c5,0x6cd,0x6d2,0x6d5,0x6dd,0x6ed,0x6e5,0x6fd,0x6f5,
0x36d,0x705,0x36d,0x70d,0x710,0x36d,0x718,0x36d,0x720,0x728,0x730,0x738,0x740,0x748,0x750,0x758,
0x760,0x767,0x346,0x76f,0x777,0x346,0x77f,0x787,0x78f,0x797,0x79f,0x7a7,0x7af,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x7a5,0x7ab,0x7b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x7b2,0x7b8,0x7be,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x7b9,0x7be,0x7c2,0x7ca,0x36d,0x36d,0x36d,0x7d2,0x7da,0x7e2,0x346,0x7e7,0x346,0x346,0x346,0x7ef,
0x346,0x63a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x52f,0x7f7,0x346,0x346,0x7fe,0x346,0x346,0x806,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x7c6,0x7cb,0x7cf,0x7d7,0x36d,0x36d,0x36d,0x7df,0x7e7,0x7ef,0x346,0x7f4,0x346,0x346,0x346,0x7fc,
0x346,0x652,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x52f,0x804,0x346,0x346,0x80b,0x346,0x346,0x813,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -96,12 +96,12 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x80e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x81b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6a4,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x814,0x346,0x81c,0x821,0x829,0x346,0x346,0x831,0x839,0x841,0x36d,0x846,0x84e,0x854,0x346,0x85a,
0x862,0x548,0x346,0x346,0x346,0x346,0x869,0x871,0x346,0x878,0x87f,0x346,0x51a,0x884,0x88c,0x548,
0x346,0x892,0x89a,0x89e,0x346,0x8a6,0x8ae,0x8b6,0x346,0x8bc,0x8c0,0x8c8,0x8d8,0x8d0,0x346,0x8e0,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x6b1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x821,0x346,0x829,0x82e,0x836,0x346,0x346,0x83e,0x846,0x84e,0x36d,0x853,0x85b,0x861,0x346,0x867,
0x86f,0x548,0x346,0x346,0x346,0x346,0x876,0x87e,0x346,0x885,0x88c,0x346,0x51a,0x891,0x899,0x548,
0x346,0x89f,0x8a7,0x8ab,0x346,0x8b3,0x8bb,0x8c3,0x346,0x8c9,0x8cd,0x8d5,0x8e5,0x8dd,0x346,0x8ed,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
@ -141,15 +141,15 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8e8,0x346,0x346,0x346,0x346,0x8f0,0x68a,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x8f5,0x346,0x346,0x346,0x346,0x8fd,0x59a,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x8f5,0x8fd,0x901,0x346,0x346,0x346,0x346,0x329,0x32f,0x909,0x911,0x918,0x4dd,0x346,0x346,0x920,
0x902,0x90a,0x90e,0x346,0x346,0x346,0x346,0x329,0x32f,0x916,0x91e,0x925,0x4dd,0x346,0x346,0x92d,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0xd1c,0xd1c,0xd34,0xd74,0xdb4,0xdf0,0xe30,0xe70,0xea8,0xee8,0xf28,0xf68,0xfa8,0xfe8,0x1028,0x1068,
0x10a8,0x10e8,0x1128,0x1168,0x1178,0x11ac,0x11e8,0x1228,0x1268,0x12a8,0xd18,0x12dc,0x1310,0x1350,0x136c,0x13a0,
0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab5,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf5,0x188,0x188,0xb2a,0xb69,0xba9,0xbe3,0xc1a,0x188,
0x9e1,0xa11,0xa51,0xa8c,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xab7,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xaf7,0x188,0x188,0xb2c,0xb6b,0xbab,0xbe5,0xc1c,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
@ -174,50 +174,50 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
0xc5a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x63e,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x928,0x346,0x346,0x346,0x92b,0x346,0x346,0x346,
0x346,0x933,0x939,0x93d,0x346,0x346,0x941,0x945,0x94b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0xc5c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x656,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x935,0x346,0x346,0x346,0x938,0x346,0x346,0x346,
0x346,0x940,0x946,0x94a,0x346,0x346,0x94e,0x952,0x958,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x953,0x957,0x346,0x346,0x346,0x346,0x346,0x95f,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x967,0x96b,0x973,0x977,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x960,0x964,0x346,0x346,0x346,0x346,0x346,0x96c,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x974,0x978,0x980,0x984,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x530,0x97c,0x983,0x985,0x68a,0x98d,0x346,0x346,0x995,0x99c,0x346,0x988,0x68a,0x9a2,0x9aa,
0x346,0x346,0x9af,0x346,0x346,0x346,0x346,0x329,0x9b7,0x68a,0x531,0x9bf,0x9c6,0x346,0x346,0x346,
0x346,0x346,0x97c,0x9ce,0x346,0x346,0x9d2,0x9da,0x346,0x346,0x346,0x346,0x346,0x346,0x9de,0x9e6,
0x346,0x346,0x9ee,0x4a1,0x346,0x346,0x9f6,0x346,0x346,0x9fc,0xa04,0x346,0x346,0x346,0x346,0x346,
0x346,0xa0c,0xa14,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa1c,0x346,0x346,
0x8f0,0xa24,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa2a,0x346,0xa30,0x671,
0x346,0x530,0x989,0x990,0x59b,0x59a,0x994,0x346,0x346,0x99c,0x9a3,0x346,0x9a9,0x59a,0x9ae,0x9b6,
0x346,0x346,0x9bb,0x346,0x346,0x346,0x346,0x329,0x9c3,0x59a,0x531,0x9cb,0x9d2,0x346,0x346,0x346,
0x346,0x346,0x989,0x9da,0x346,0x346,0x9de,0x9e6,0x346,0x346,0x346,0x346,0x346,0x346,0x9ea,0x9f2,
0x346,0x346,0x9fa,0x4a1,0x346,0x346,0xa02,0x346,0x346,0xa08,0xa10,0x346,0x346,0x346,0x346,0x346,
0x346,0xa18,0xa20,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa28,0xa2c,0xa34,0x346,
0xa3b,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa42,0x346,0x346,
0x8fd,0xa4a,0x346,0x346,0x346,0xa50,0xa58,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa5c,0x346,
0xa62,0x56d,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa68,0x346,0x346,0x59a,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0xa36,0x346,0x346,0x4a1,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa70,0x56d,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa3e,0x671,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa78,0xa80,0xa86,0x346,0x346,0x346,0x346,0xa8e,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa96,0xa9e,0xaa3,0xaa9,
0xab1,0xab9,0xac1,0xa9a,0xac9,0xad1,0xad9,0xae0,0xa9b,0xa96,0xa9e,0xa99,0xaa9,0xa9c,0xa97,0xae8,
0xa9a,0xaf0,0xaf8,0xb00,0xb07,0xaf3,0xafb,0xb03,0xb0a,0xaf6,0xb12,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x846,0xb1a,0x846,0xb21,0xb28,
0xb30,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0xa46,0xa4e,0xa54,0x346,0x346,0x346,0x346,0xa5c,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xa64,0xa6c,0xa71,0xa77,0xa7f,0xa87,
0xa8f,0xa68,0xa97,0xa9f,0xaa7,0xaae,0xa69,0xa64,0xa6c,0xa67,0xa77,0xa6a,0xa65,0xab6,0xa68,0xabe,
0xac6,0xace,0xad5,0xac1,0xac9,0xad1,0xad8,0xac4,0xae0,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x839,0xae8,0x839,0xaef,0xaf6,0xafe,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb06,0xb0e,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb38,0xb40,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb12,0x346,0xb1a,0xb22,0xb29,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0xb44,0x346,0xb4c,0xb54,0xb5b,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0xa60,0xb31,0xb31,0xb37,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x997,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0xa92,0xb63,0xb63,0xb69,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x99e,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x839,0x839,0x839,0x346,0x346,
0x346,0x346,0x839,0x839,0x839,0x839,0x839,0x839,0x839,0xa3a,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x530,0x846,0x846,0x846,
0x346,0x346,0x346,0x346,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0xa6c,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x326,0x326,0,0,0,0,
0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0x346,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,4,0,0,0,0,0,
@ -361,6 +361,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,0,0,0,0,0,0,4,4,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,
4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x64,0,0,4,0,4,4,4,4,0,0,0,
0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,4,0,
@ -371,6 +374,9 @@ static const uint16_t ucase_props_trieIndex[11520]={
0,0,4,4,4,0,4,4,4,0x64,0,0,0,0,0,0,
0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,4,0,
0,0,0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,
0,0,4,4,4,0,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,4,4,4,4,
@ -428,211 +434,208 @@ static const uint16_t ucase_props_trieIndex[11520]={
4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,
0x44,0x64,4,0,4,4,4,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0,
0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0,
4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0,
0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0,
0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64,
0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0,
0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1,
1,1,1,1,1,1,1,1,5,0x60b9,1,1,1,0x60f9,1,1,
5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x44,0x64,0x64,0x44,0x64,
0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64,
0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,0x6839,0x409,0x6939,0x409,
0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,0x409,0x409,0x409,0x409,
0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0x2509,0x2509,0x2b09,0x2b09,
0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,0x6d59,0x6e39,0x6f19,0x6ff9,
0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,0x7b59,0x7c39,0x7d19,0x7df9,
0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,0x8959,0x8a39,0x8b19,0x8bf9,
0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,0x409,0x409,0x9759,0x9859,
0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,4,4,0x9e19,0x9f19,
0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,0x409,0x409,0xa439,0xa599,
0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,0x409,0x409,0xa999,0xaaf9,
0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,0,0,0xaff9,0xb0f9,
0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,
4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0,
0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,
5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,
0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44,
0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0xb61a,0,
2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,1,0,0,0,
0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,0xff89,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,0xb81a,0xb859,0xb899,0x8a,
0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,1,0x8a,0xff89,1,
1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,1,0,0,0,
0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,0xbb59,0xbb99,0xbbd9,0xbc19,
0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,0xbf59,0xbf99,0xbfd9,0xc019,
0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,0xc359,0xc399,0,0xc3d9,
0,0,0,0,0,0xc419,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,
0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,
4,4,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0x64,0x64,4,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
5,5,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,1,1,1,1,
1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,0xc65a,0xc69a,0xc6da,0,
0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,4,4,4,4,0,0,4,0,0,0,
0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44,
0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0xc819,1,1,1,1,1,1,1,4,5,5,5,5,
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,0xccd9,0xcd39,0xcd99,0xcdf9,
0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,0xded9,0xdf39,0xdf99,0xdff9,
0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,0xe4d9,0xe539,0xe599,0xe5f9,
0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,0xd2d9,0xd339,0xd399,0xd3f9,
0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,0xd8d9,0xd939,0xd999,0xd9f9,
0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,0,0,0,0,
0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,0,0,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,
0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0,
0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,4,4,4,
0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0,
0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0,
0,0,0,0,0,0,0,0,0,0,0,0,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0,
0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0,4,4,4,4,4,0,
4,0,0,0,0,0,4,0,0x60,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,0,0,4,4,0x60,0x64,4,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,
4,4,0,0,0,4,0,4,4,4,0x60,0x60,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,0,0,0x5cd9,0x5d39,0x5d99,0x5df9,0x5e59,0x5ef9,0x5f99,0x5ff9,0x6059,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,0,0,0x64,0x64,0,
0,4,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,
0x64,0x64,0x64,0x64,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,
0,0x64,0,0,0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,0x25,5,5,5,5,5,5,5,5,1,
1,1,1,1,1,1,1,1,1,1,1,1,5,0x60b9,1,1,
1,0x60f9,1,1,5,5,5,5,0x25,5,5,5,0x25,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,0x21,1,1,1,1,5,
5,5,5,5,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,
0x64,0x64,0x44,0x64,0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
0x44,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x613a,0x61b9,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x6239,0x6339,0x6439,0x6539,0x6639,0x6739,1,1,0x679a,1,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xffa9,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0,0,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0,0,
0x6839,0x409,0x6939,0x409,0x6a99,0x409,0x6bf9,0x409,0,0xfc0a,0,0xfc0a,0,0xfc0a,0,0xfc0a,
0x409,0x409,0x409,0x409,0x409,0x409,0x409,0x409,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,0xfc0a,
0x2509,0x2509,0x2b09,0x2b09,0x2b09,0x2b09,0x3209,0x3209,0x4009,0x4009,0x3809,0x3809,0x3f09,0x3f09,0,0,
0x6d59,0x6e39,0x6f19,0x6ff9,0x70d9,0x71b9,0x7299,0x7379,0x745b,0x753b,0x761b,0x76fb,0x77db,0x78bb,0x799b,0x7a7b,
0x7b59,0x7c39,0x7d19,0x7df9,0x7ed9,0x7fb9,0x8099,0x8179,0x825b,0x833b,0x841b,0x84fb,0x85db,0x86bb,0x879b,0x887b,
0x8959,0x8a39,0x8b19,0x8bf9,0x8cd9,0x8db9,0x8e99,0x8f79,0x905b,0x913b,0x921b,0x92fb,0x93db,0x94bb,0x959b,0x967b,
0x409,0x409,0x9759,0x9859,0x9939,0,0x9a39,0x9b39,0xfc0a,0xfc0a,0xdb0a,0xdb0a,0x9c9b,4,0x9d79,4,
4,4,0x9e19,0x9f19,0x9ff9,0,0xa0f9,0xa1f9,0xd50a,0xd50a,0xd50a,0xd50a,0xa35b,4,4,4,
0x409,0x409,0xa439,0xa599,0,0,0xa739,0xa839,0xfc0a,0xfc0a,0xce0a,0xce0a,0,4,4,4,
0x409,0x409,0xa999,0xaaf9,0xac99,0x389,0xad99,0xae99,0xfc0a,0xfc0a,0xc80a,0xc80a,0xfc8a,4,4,4,
0,0,0xaff9,0xb0f9,0xb1d9,0,0xb2d9,0xb3d9,0xc00a,0xc00a,0xc10a,0xc10a,0xb53b,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,
4,0,0,4,0,0,4,4,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,0,4,4,4,4,4,4,4,4,4,4,0,0x25,0,0,
0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,
0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,
4,0x64,0x64,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,0,2,0,0,1,2,2,2,1,1,2,2,2,1,
0,2,0,0,0,2,2,2,2,2,0,0,0,0,0,0,
2,0,0xb61a,0,2,0,0xb69a,0xb71a,2,2,0,1,2,2,0xe0a,2,
1,0,0,0,0,1,0,0,1,1,2,2,0,0,0,0,
0,2,1,1,0x21,0x21,0,0,0,0,0xf209,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x80a,0x80a,0x80a,0x80a,
0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0x80a,0xf809,0xf809,0xf809,0xf809,
0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0xf809,0,0,0,0x8a,
0xff89,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xd0a,0xd0a,
0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,0xd0a,
0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,0xf309,
0xf309,0xf309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,
0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0x180a,0,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,
0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0xe809,0,0x8a,0xff89,0xb79a,0xb7da,
0xb81a,0xb859,0xb899,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xb8da,0xb91a,0xb95a,0xb99a,1,0x8a,0xff89,
1,0x8a,0xff89,1,1,1,1,1,0x25,5,0xb9da,0xba1a,0x8a,0xff89,0x8a,0xff89,
1,0,0,0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x44,0x44,0x44,0x8a,0xff89,
0,0,0,0,0,0,0,0,0,0,0,0,0xba59,0xba99,0xbad9,0xbb19,
0xbb59,0xbb99,0xbbd9,0xbc19,0xbc59,0xbc99,0xbcd9,0xbd19,0xbd59,0xbd99,0xbdd9,0xbe19,0xbe59,0xbe99,0xbed9,0xbf19,
0xbf59,0xbf99,0xbfd9,0xc019,0xc059,0xc099,0xc0d9,0xc119,0xc159,0xc199,0xc1d9,0xc219,0xc259,0xc299,0xc2d9,0xc319,
0xc359,0xc399,0,0xc3d9,0,0,0,0,0,0xc419,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x60,0x60,
0,4,4,4,4,4,0,0,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4,
4,4,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0x64,0x64,4,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc45a,0xc4d9,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0,0x44,4,4,4,0,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,5,5,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,5,1,1,1,
1,1,1,1,1,0x8a,0xff89,0x8a,0xff89,0xc55a,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,
0x8a,0xff89,0x8a,0xff89,4,4,4,0x8a,0xff89,0xc59a,1,0,0x8a,0xff89,0x8a,0xff89,
1,1,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0x8a,0xff89,0xc5da,0xc61a,
0xc65a,0xc69a,0xc6da,0,0xc71a,0xc75a,0xc79a,0xc7da,0x8a,0xff89,0x8a,0xff89,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,5,5,1,0,0,0,0,0,0,0,4,0,
0,0,0x64,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x64,4,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,0x64,
0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,4,4,4,4,0,0,
4,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,4,4,4,4,4,0,0,4,4,0,
0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,4,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,
0x64,0,0,0x44,0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,4,
4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,0xc819,1,1,1,1,1,1,1,4,
5,5,5,5,1,1,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0xc859,0xc8b9,0xc919,0xc979,0xc9d9,0xca39,0xca99,0xcaf9,0xcb59,0xcbb9,0xcc19,0xcc79,
0xccd9,0xcd39,0xcd99,0xcdf9,0xda59,0xdab9,0xdb19,0xdb79,0xdbd9,0xdc39,0xdc99,0xdcf9,0xdd59,0xddb9,0xde19,0xde79,
0xded9,0xdf39,0xdf99,0xdff9,0xe059,0xe0b9,0xe119,0xe179,0xe1d9,0xe239,0xe299,0xe2f9,0xe359,0xe3b9,0xe419,0xe479,
0xe4d9,0xe539,0xe599,0xe5f9,0xce59,0xceb9,0xcf19,0xcf79,0xcfd9,0xd039,0xd099,0xd0f9,0xd159,0xd1b9,0xd219,0xd279,
0xd2d9,0xd339,0xd399,0xd3f9,0xd459,0xd4b9,0xd519,0xd579,0xd5d9,0xd639,0xd699,0xd6f9,0xd759,0xd7b9,0xd819,0xd879,
0xd8d9,0xd939,0xd999,0xd9f9,0,0,0,0,0,4,0,0,4,0,0,0,
0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0xe659,0xe759,0xe859,0xe959,0xeab9,0xec19,0xed59,0,0,0,0,0,
0,0,0,0,0,0,0,0xee99,0xef99,0xf099,0xf199,0xf299,0,0,0,0,
0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,4,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,
0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,4,0,0,4,0,0,
0,0,0,0,0,0,0,0,0,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0x100a,0x100a,0x100a,0,0,0,4,0,4,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,
0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0,0,0,0,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,
0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0xec09,0,0,0,0,
0,4,4,4,0,4,4,0,0,0,0,0,4,0x64,4,0x44,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,
0,0,0,0,0,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,0x200a,
0x200a,0x200a,0x200a,0,0,0,0,0,0,0,0,0,0,0,0,0,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,0xe009,
0xe009,0xe009,0xe009,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
4,4,4,4,4,4,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,0,0,0x64,0x64,0,0,4,0,0,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,0x64,
0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,
4,4,4,4,4,4,4,0,0x60,0,0,0,0,0,0,0,
0,0,0x64,4,4,0,0,0,0,0,0,0,0,0,0,0,
@ -663,79 +666,89 @@ static const uint16_t ucase_props_trieIndex[11520]={
0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,0xf009,
0,4,4,4,4,4,4,0,0,4,4,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,0,4,4,4,4,4,4,0,0x64,
4,4,4,4,4,4,4,4,0,0,4,4,4,4,4,4,
4,0,4,4,0,4,4,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0,
0,0,0,4,0x64,4,4,4,4,0,0,4,4,4,4,0,
0,0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,
0,4,4,4,4,4,4,0,0,4,4,4,0,0,0,0,
0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,4,
4,4,4,0,4,0x64,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,0,
4,4,4,4,4,4,0,0x64,4,4,4,4,4,4,4,4,
0,0,4,4,4,4,4,4,4,0,4,4,0,4,4,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0,4,4,4,4,4,4,0,0,0,4,0,4,4,0,4,
4,4,0x64,4,0x64,0x64,0,4,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,4,0x64,0,0,0,0,0,0,0x60,0x60,0x64,
0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4,4,4,4,4,
4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0x44,0x44,0x44,
0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,0,0x21,0x21,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,0,2,2,0,0,2,0,
0,2,2,0,0,2,2,2,2,0,2,2,2,2,2,2,
2,2,1,1,1,1,0,1,0,1,0x21,0x21,1,1,1,1,
0,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,
2,2,0,2,2,2,2,0,0,2,2,2,2,2,2,2,
2,0,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,0,2,2,2,2,0,2,2,2,2,
2,0,2,0,0,0,2,2,2,2,2,2,2,0,1,1,
1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,1,1,1,0,0,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,0,1,1,1,1,1,1,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,0,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,0,1,1,1,1,1,1,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,0,0,0,0,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,
0,4,0,0,0,0,0,0,0,0,0,0,4,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,4,4,4,4,4,0,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0,0x44,0x44,
0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,
0,0,0,0,0,0,0,0,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0,
0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x64,0,
0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,
0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,
0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
1,0,0x21,0x21,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,0,2,2,0,0,2,0,0,2,2,0,0,2,2,2,
2,0,2,2,2,2,2,2,2,2,1,1,1,1,0,1,
0,1,0x21,0x21,1,1,1,1,0,1,1,1,1,1,1,1,
1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,1,1,1,1,2,2,0,2,2,2,2,0,
0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,0,2,
2,2,2,0,2,2,2,2,2,0,2,0,0,0,2,2,
2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,
1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,1,1,1,0,1,1,1,1,
1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,
0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,
0x110a,0x110a,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,0xef09,
0x44,0x44,0x44,0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,0,0,0,0,0,0,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0
};
static const uint16_t ucase_props_exceptions[1948]={
@ -900,13 +913,13 @@ static const UCaseProps ucase_props_singleton={
ucase_props_trieIndex+3228,
NULL,
3228,
8292,
8492,
0x188,
0xd18,
0x0,
0x0,
0xe0800,
0x2cfc,
0x2dc4,
NULL, 0, FALSE, FALSE, 0, NULL
},
{ 3,0,0,0 }

Просмотреть файл

@ -20,8 +20,11 @@
#include "unicode/utypes.h"
#include "unicode/brkiter.h"
#include "unicode/bytestream.h"
#include "unicode/casemap.h"
#include "unicode/edits.h"
#include "unicode/stringoptions.h"
#include "unicode/stringpiece.h"
#include "unicode/ubrk.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
@ -32,6 +35,7 @@
#include "unicode/utf.h"
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "bytesinkutil.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
@ -39,27 +43,6 @@
#include "ucasemap_imp.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
namespace {
// TODO: share with UTF-16? inline in ucasemap_imp.h?
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
Edits *edits, UErrorCode &errorCode) {
if (U_SUCCESS(errorCode)) {
if (destIndex > destCapacity) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != NULL) {
edits->copyErrorTo(errorCode);
}
}
return destIndex;
}
} // namespace
U_NAMESPACE_END
U_NAMESPACE_USE
/* UCaseMap service object -------------------------------------------------- */
@ -150,148 +133,39 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
/* TODO(markus): Move to a new, separate utf8case.cpp file. */
namespace {
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
static inline int32_t
appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s,
int32_t cpLength, uint32_t options, icu::Edits *edits) {
UChar32 c;
int32_t length;
UErrorCode errorCode;
inline UBool
appendResult(int32_t cpLength, int32_t result, const UChar *s,
ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
U_ASSERT(U_SUCCESS(errorCode));
/* decode the result */
if(result<0) {
/* (not) original code point */
if(edits!=NULL) {
edits->addUnchanged(cpLength);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
c=~result;
if(destIndex<destCapacity && c<=0x7f) { // ASCII slightly-fastpath
dest[destIndex++]=(uint8_t)c;
return destIndex;
if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
}
length=cpLength;
} else {
if(result<=UCASE_MAX_STRING_LENGTH) {
// string: "result" is the UTF-16 length
errorCode=U_ZERO_ERROR;
if(destIndex<destCapacity) {
u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
s, result, &errorCode);
} else {
u_strToUTF8(NULL, 0, &length, s, result, &errorCode);
}
if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
return -1;
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
// We might have an overflow, but we know the actual length.
return destIndex+length;
} else if(destIndex<destCapacity && result<=0x7f) { // ASCII slightly-fastpath
dest[destIndex++]=(uint8_t)result;
if(edits!=NULL) {
edits->addReplace(cpLength, 1);
}
return destIndex;
return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
} else {
c=result;
length=U8_LENGTH(c);
if(edits!=NULL) {
edits->addReplace(cpLength, length);
}
ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
}
}
// c>=0 single code point
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if(destIndex<destCapacity) {
/* append the result */
UBool isError=FALSE;
U8_APPEND(dest, destIndex, destCapacity, c, isError);
if(isError) {
/* overflow, nothing written */
destIndex+=length;
}
} else {
/* preflight */
destIndex+=length;
}
return destIndex;
}
static inline int32_t
appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
if(destIndex<destCapacity) {
dest[destIndex]=c;
} else if(destIndex==INT32_MAX) {
return -1; // integer overflow
}
return destIndex+1;
return TRUE;
}
// See unicode/utf8.h U8_APPEND_UNSAFE().
static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
static inline int32_t
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
U_ASSERT(0x370 <= c && c <= 0x3ff); // 2-byte UTF-8, main Greek block
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=getTwoByteLead(c);
dest[1]=getTwoByteTrail(c);
}
return limit;
}
static inline int32_t
appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
if(2>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
int32_t limit=destIndex+2;
if(limit<=destCapacity) {
dest+=destIndex;
dest[0]=(uint8_t)s[0];
dest[1]=(uint8_t)s[1];
}
return limit;
}
static inline int32_t
appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
if(length>0) {
if(edits!=NULL) {
edits->addUnchanged(length);
if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
return destIndex;
}
}
if(length>(INT32_MAX-destIndex)) {
return -1; // integer overflow
}
if((destIndex+length)<=destCapacity) {
uprv_memcpy(dest+destIndex, s, length);
}
destIndex+=length;
}
return destIndex;
}
} // namespace
static UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
@ -329,17 +203,15 @@ utf8_caseContextIterator(void *context, int8_t dir) {
* Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
*/
static int32_t
static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex=srcStart;
int32_t destIndex=0;
while(srcIndex<srcLimit) {
while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
int32_t cpStart;
csc->cpStart=cpStart=srcIndex;
UChar32 c;
@ -347,45 +219,32 @@ _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
csc->cpLimit=srcIndex;
if(c<0) {
// Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+cpStart, srcIndex-cpStart, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
continue;
}
const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
sink, options, edits, errorCode);
} else {
const UChar *s;
c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
}
}
return destIndex;
}
#if !UCONFIG_NO_BREAK_ITERATION
U_CFUNC int32_t U_CALLCONV
U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle(
int32_t caseLocale, uint32_t options, BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) {
return 0;
if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
return;
}
/* set up local variables */
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
int32_t destIndex=0;
int32_t prev=0;
UBool isFirstIndex=TRUE;
@ -404,45 +263,36 @@ ucasemap_internalUTF8ToTitle(
}
/*
* Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* In this implementation, segment [prev..index[ into 3 parts:
* a) uncased characters (copy as-is) [prev..titleStart[
* b) first case letter (titlecase) [titleStart..titleLimit[
* Segment [prev..index[ into 3 parts:
* a) skipped characters (copy as-is) [prev..titleStart[
* b) first letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[
*/
if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */
/* find and copy skipped characters [prev..titleStart[ */
int32_t titleStart=prev;
int32_t titleLimit=prev;
UChar32 c;
U8_NEXT(src, titleLimit, index, c);
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */
for(;;) {
if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
// Adjust the titlecasing index to the next cased character,
// or to the next letter/number/symbol/private use.
// Stop with titleStart<titleLimit<=index
// if there is a character to be titlecased,
// or else stop with titleStart==titleLimit==index.
UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
titleStart=titleLimit;
if(titleLimit==index) {
/*
* only uncased characters in [prev..index[
* stop with titleStart==titleLimit==index
*/
break;
}
U8_NEXT(src, titleLimit, index, c);
if(UCASE_NONE!=ucase_getType(c)) {
break; /* cased letter at [titleStart..titleLimit[ */
}
}
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+prev, titleStart-prev, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (prev < titleStart) {
if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
sink, options, edits, errorCode)) {
return;
}
}
}
@ -453,16 +303,15 @@ ucasemap_internalUTF8ToTitle(
csc.cpLimit=titleLimit;
const UChar *s;
c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
titleLimit-titleStart, options, edits);
if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
return;
}
} else {
// Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleStart, titleLimit-titleStart, options, edits);
}
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
sink, options, edits, errorCode)) {
return;
}
}
/* Special case Dutch IJ titlecasing */
@ -470,22 +319,13 @@ ucasemap_internalUTF8ToTitle(
caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
if (src[titleStart+1] == 0x006A) {
destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
if(edits!=NULL) {
edits->addReplace(1, 1);
}
ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits);
titleLimit++;
} else if (src[titleStart+1] == 0x004A) {
// Keep the capital J from getting lowercased.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleStart+1, 1, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1,
sink, options, edits, errorCode)) {
return;
}
titleLimit++;
}
@ -495,26 +335,18 @@ ucasemap_internalUTF8ToTitle(
if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
_caseMap(
caseLocale, options, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, &csc,
titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
}
_caseMap(caseLocale, options, ucase_toFullLower,
src, &csc,
titleLimit, index,
sink, edits, errorCode);
if(U_FAILURE(errorCode)) {
return destIndex;
return;
}
} else {
/* Optionally just copy the rest of the word unchanged. */
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+titleLimit, index-titleLimit, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
sink, options, edits, errorCode)) {
return;
}
}
}
@ -523,8 +355,6 @@ ucasemap_internalUTF8ToTitle(
prev=index;
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
#endif
@ -549,12 +379,10 @@ UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
}
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
int32_t toUpper(uint32_t options,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
Edits *edits,
UErrorCode &errorCode) {
int32_t destIndex=0;
void toUpper(uint32_t options,
const uint8_t *src, int32_t srcLength,
ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
uint32_t state = 0;
for (int32_t i = 0; i < srcLength;) {
int32_t nextIndex = i;
@ -630,8 +458,10 @@ int32_t toUpper(uint32_t options,
}
}
UBool change = TRUE;
if (edits != NULL) {
UBool change;
if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
change = TRUE; // common, simple usage
} else {
// Find out first whether we are changing the text.
U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block
change = (i + 2) > nextIndex ||
@ -662,148 +492,146 @@ int32_t toUpper(uint32_t options,
edits->addUnchanged(oldLength);
}
// Write unchanged text?
change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
}
}
if (change) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper);
if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308"); // restore or add a dialytika
ByteSinkUtil::appendTwoBytes(upper, sink);
if ((data & HAS_EITHER_DIALYTIKA) != 0) {
sink.Append(u8"\u0308", 2); // restore or add a dialytika
}
if (destIndex >= 0 && addTonos) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301");
if (addTonos) {
sink.Append(u8"\u0301", 2);
}
while (destIndex >= 0 && numYpogegrammeni > 0) {
destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399");
while (numYpogegrammeni > 0) {
sink.Append(u8"\u0399", 2);
--numYpogegrammeni;
}
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
} else if(c>=0) {
const UChar *s;
c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
nextIndex - i, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
return;
}
} else {
// Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+i, nextIndex-i, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
sink, options, edits, errorCode)) {
return;
}
}
i = nextIndex;
state = nextState;
}
return destIndex;
}
} // namespace GreekUpper
U_NAMESPACE_END
static int32_t U_CALLCONV
static void U_CALLCONV
ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
int32_t destIndex = _caseMap(
_caseMap(
caseLocale, options, ucase_toFullLower,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
sink, edits, errorCode);
}
static int32_t U_CALLCONV
static void U_CALLCONV
ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
int32_t destIndex;
if (caseLocale == UCASE_LOC_GREEK) {
destIndex = GreekUpper::toUpper(options, dest, destCapacity,
src, srcLength, edits, errorCode);
GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
} else {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
csc.limit=srcLength;
destIndex = _caseMap(
_caseMap(
caseLocale, options, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
edits, errorCode);
sink, edits, errorCode);
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
static int32_t U_CALLCONV
static void U_CALLCONV
ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* case mapping loop */
int32_t srcIndex = 0;
int32_t destIndex = 0;
while (srcIndex < srcLength) {
while (U_SUCCESS(errorCode) && srcIndex < srcLength) {
int32_t cpStart = srcIndex;
UChar32 c;
U8_NEXT(src, srcIndex, srcLength, c);
if(c<0) {
// Malformed UTF-8.
destIndex=appendUnchanged(dest, destIndex, destCapacity,
src+cpStart, srcIndex-cpStart, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
continue;
}
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
sink, options, edits, errorCode);
} else {
const UChar *s;
c = ucase_toFullFolding(c, &s, options);
appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
}
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
}
U_CFUNC int32_t
void
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode) {
/* check argument values */
if (U_FAILURE(errorCode)) {
return;
}
if ((src == nullptr && srcLength != 0) || srcLength < -1) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Get the string length.
if (srcLength == -1) {
srcLength = (int32_t)uprv_strlen((const char *)src);
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, errorCode);
sink.Flush();
if (U_SUCCESS(errorCode)) {
if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
}
int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode &errorCode) {
int32_t destLength;
/* check argument values */
if(U_FAILURE(errorCode)) {
return 0;
}
if( destCapacity<0 ||
(dest==NULL && destCapacity>0) ||
src==NULL ||
srcLength<-1
(src==NULL && srcLength!=0) || srcLength<-1
) {
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -823,12 +651,21 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
return 0;
}
if(edits!=NULL) {
CheckedArrayByteSink sink(dest, destCapacity);
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
dest, destCapacity, src, srcLength, edits, errorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode);
stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
(const uint8_t *)src, srcLength, sink, edits, errorCode);
sink.Flush();
if (U_SUCCESS(errorCode)) {
if (sink.Overflowed()) {
errorCode = U_BUFFER_OVERFLOW_ERROR;
} else if (edits != nullptr) {
edits->copyErrorTo(errorCode);
}
}
return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
}
/* public API functions */
@ -840,8 +677,8 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
}
@ -852,8 +689,8 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(
csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
}
@ -864,13 +701,43 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(
UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8Fold, NULL, *pErrorCode);
}
U_NAMESPACE_BEGIN
void CaseMap::utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToLower, sink, edits, errorCode);
}
void CaseMap::utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
}
void CaseMap::utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
src.data(), src.length(),
ucasemap_internalUTF8Fold, sink, edits, errorCode);
}
int32_t CaseMap::utf8ToLower(
const char *locale, uint32_t options,
const char *src, int32_t srcLength,
@ -878,8 +745,8 @@ int32_t CaseMap::utf8ToLower(
UErrorCode &errorCode) {
return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToLower, edits, errorCode);
}
@ -890,8 +757,8 @@ int32_t CaseMap::utf8ToUpper(
UErrorCode &errorCode) {
return ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToUpper, edits, errorCode);
}
@ -902,8 +769,8 @@ int32_t CaseMap::utf8Fold(
UErrorCode &errorCode) {
return ucasemap_mapUTF8(
UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8Fold, edits, errorCode);
}

Просмотреть файл

@ -9,16 +9,26 @@
#include "unicode/utypes.h"
#include "unicode/ucasemap.h"
#include "unicode/uchar.h"
#include "ucase.h"
#ifndef U_COMPARE_IGNORE_CASE
/* see also unorm.h */
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* Bit mask for the titlecasing iterator options bit field.
* Currently only 3 out of 8 values are used:
* 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* See stringoptions.h.
* @internal
*/
#define U_COMPARE_IGNORE_CASE 0x10000
#endif
#define U_TITLECASE_ITERATOR_MASK 0xe0
/**
* Bit mask for the titlecasing index adjustment options bit set.
* Currently two bits are defined:
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
* See stringoptions.h.
* @internal
*/
#define U_TITLECASE_ADJUSTMENT_MASK 0x600
/**
* Internal API, used by u_strcasecmp() etc.
@ -32,7 +42,7 @@ u_strcmpFold(const UChar *s1, int32_t length1,
UErrorCode *pErrorCode);
/**
* Interanl API, used for detecting length of
* Internal API, used for detecting length of
* shared prefix case-insensitively.
* @param s1 input string 1
* @param length1 length of string 1, or -1 (NULL terminated)
@ -61,6 +71,44 @@ uprv_haveProperties(UErrorCode *pErrorCode);
#ifdef __cplusplus
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
class ByteSink;
class Locale; // unicode/locid.h
/** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */
inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
// Both options together.
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
return TRUE;
}
inline UBool ustrcase_isLNS(UChar32 c) {
// Letter, number, symbol,
// or a private use code point because those are typically used as letters or numbers.
// Consider modifier letters only if they are cased.
const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
int gc = u_charType(c);
return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
}
#if !UCONFIG_NO_BREAK_ITERATION
/** Returns nullptr if error. Pass in either locale or locID, not both. */
U_CFUNC
BreakIterator *ustrcase_getTitleBreakIterator(
const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
#endif
U_NAMESPACE_END
#include "unicode/unistr.h" // for UStringCaseMapper
/*
@ -163,39 +211,43 @@ ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITE
* UTF-8 version of UStringCaseMapper.
* All error checking must be done.
* The UCaseMap must be fully initialized, with locale and/or iter set as needed.
* src and dest must not overlap.
*/
typedef int32_t U_CALLCONV
typedef void U_CALLCONV
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter,
#endif
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/** Implements UTF8CaseMapper. */
U_CFUNC int32_t U_CALLCONV
U_CFUNC void U_CALLCONV
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
icu::BreakIterator *iter,
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
icu::Edits *edits,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode);
#endif
void
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::ByteSink &sink, icu::Edits *edits,
UErrorCode &errorCode);
/**
* Implements argument checking and buffer handling
* for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
int32_t
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
uint8_t *dest, int32_t destCapacity,
const uint8_t *src, int32_t srcLength,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UTF8CaseMapper *stringCaseMapper,
icu::Edits *edits,
UErrorCode &errorCode);

Просмотреть файл

@ -31,6 +31,29 @@
U_NAMESPACE_BEGIN
void CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
UText utext = UTEXT_INITIALIZER;
utext_openUTF8(&utext, src.data(), src.length(), &errorCode);
LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if (iter == nullptr) {
utext_close(&utext);
return;
}
iter->setText(&utext, errorCode);
ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter,
src.data(), src.length(),
ucasemap_internalUTF8ToTitle, sink, edits, errorCode);
utext_close(&utext);
}
int32_t CaseMap::utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
const char *src, int32_t srcLength,
@ -42,19 +65,16 @@ int32_t CaseMap::utf8ToTitle(
UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, src, srcLength, &errorCode);
LocalPointer<BreakIterator> ownedIter;
iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode);
if(iter==NULL) {
iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
ownedIter.adoptInstead(iter);
}
if(U_FAILURE(errorCode)) {
utext_close(&utext);
return 0;
}
iter->setText(&utext, errorCode);
int32_t length=ucasemap_mapUTF8(
ustrcase_getCaseLocale(locale), options, iter,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToTitle, edits, errorCode);
utext_close(&utext);
return length;
@ -88,17 +108,24 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
}
UText utext=UTEXT_INITIALIZER;
utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(csm->iter==NULL) {
csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) {
return 0;
}
if(csm->iter==NULL) {
LocalPointer<BreakIterator> ownedIter;
BreakIterator *iter = ustrcase_getTitleBreakIterator(
nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode);
if (iter == nullptr) {
utext_close(&utext);
return 0;
}
csm->iter = ownedIter.orphan();
}
csm->iter->setText(&utext, *pErrorCode);
int32_t length=ucasemap_mapUTF8(
csm->caseLocale, csm->options, csm->iter,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
dest, destCapacity,
src, srcLength,
ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
utext_close(&utext);
return length;

Просмотреть файл

@ -729,8 +729,5 @@ upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
}
/* add the start code point of each same-value range of the properties vectors trie */
if(propsVectorsColumns>0) {
/* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
}
utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -287,7 +287,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) {
hash=hash*37+ustr_hashUCharsN(units, len);
hash=hash*37u+ustr_hashUCharsN(units, len);
}
UBool

Просмотреть файл

@ -35,7 +35,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_START = -1,
UCLN_COMMON_USPREP,
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT,
UCLN_COMMON_RBBI,
UCLN_COMMON_SERVICE,
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,

Просмотреть файл

@ -315,6 +315,7 @@ _CompoundTextClose(UConverter *converter) {
}
uprv_free(converter->extraInfo);
converter->extraInfo = NULL;
}
}
@ -519,7 +520,7 @@ UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args,
currentState = tmpState;
}
sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength;
sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength);
mySource += sourceOffset;

Просмотреть файл

@ -966,26 +966,26 @@ _LMBCSFromUnicode(UConverterFromUnicodeArgs* args,
if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START)
{
bytes_written = LMBCSConversionWorker (extraInfo,
bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_L1, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried);
if(!bytes_written)
{
bytes_written = LMBCSConversionWorker (extraInfo,
bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried);
}
if(!bytes_written)
{
bytes_written = LMBCSConversionWorker (extraInfo,
bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried);
}
}
else
{
bytes_written = LMBCSConversionWorker (extraInfo,
bytes_written = (int32_t)LMBCSConversionWorker (extraInfo,
extraInfo->localeConverterIndex, pLMBCS, &uniChar,
&lastConverterIndex, groups_tried);
}

Просмотреть файл

@ -1323,9 +1323,17 @@ _UTF16GetName(const UConverter *cnv) {
U_CDECL_END
extern const UConverterSharedData _UTF16Data;
#define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData)
#define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData)
#define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data)
static inline bool IS_UTF16BE(const UConverter *cnv) {
return ((cnv)->sharedData == &_UTF16BEData);
}
static inline bool IS_UTF16LE(const UConverter *cnv) {
return ((cnv)->sharedData == &_UTF16LEData);
}
static inline bool IS_UTF16(const UConverter *cnv) {
return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
}
U_CDECL_BEGIN
static void U_CALLCONV

Просмотреть файл

@ -31,6 +31,7 @@
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "cmemory.h"
#include "ustr_imp.h"
/* Prototypes --------------------------------------------------------------- */
@ -44,51 +45,13 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args
/* UTF-8 -------------------------------------------------------------------- */
/* UTF-8 Conversion DATA
* for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
*/
/*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
#define MAXIMUM_UCS2 0x0000FFFF
#define MAXIMUM_UTF 0x0010FFFF
#define MAXIMUM_UCS4 0x7FFFFFFF
#define HALF_SHIFT 10
#define HALF_BASE 0x0010000
#define HALF_MASK 0x3FF
#define SURROGATE_HIGH_START 0xD800
#define SURROGATE_HIGH_END 0xDBFF
#define SURROGATE_LOW_START 0xDC00
#define SURROGATE_LOW_END 0xDFFF
/* -SURROGATE_LOW_START + HALF_BASE */
#define SURROGATE_LOW_BASE 9216
static const uint32_t offsetsFromUTF8[7] = {0,
static const uint32_t offsetsFromUTF8[5] = {0,
(uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
(uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
(uint32_t) 0x03C82080
};
/* END OF UTF-8 Conversion DATA */
static const int8_t bytesFromUTF8[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
};
/*
* Starting with Unicode 3.0.1:
* UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
* byte sequences with more than 4 bytes are illegal in UTF-8,
* which is tested with impossible values for them
*/
static const uint32_t
utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
static UBool hasCESU8Data(const UConverter *cnv)
{
#if UCONFIG_ONLY_HTML_CONVERSION
@ -127,7 +90,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
while (mySource < sourceLimit && myTarget < targetLimit)
{
ch = *(mySource++);
if (ch < 0x80) /* Simple case */
if (U8_IS_SINGLE(ch)) /* Simple case */
{
*(myTarget++) = (UChar) ch;
}
@ -135,7 +98,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
{
/* store the first char */
toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */
i = 1;
morebytes:
@ -144,7 +107,8 @@ morebytes:
if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2))
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{
break; /* i < inBytes */
}
@ -162,24 +126,12 @@ morebytes:
}
}
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
// In CESU-8, only surrogates, not supplementary code points, are encoded directly.
if (i == inBytes && (!isCESU8 || i <= 3))
{
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2)
{
@ -189,9 +141,8 @@ morebytes:
else
{
/* write out the surrogates */
ch -= HALF_BASE;
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
*(myTarget++) = U16_LEAD(ch);
ch = U16_TRAIL(ch);
if (myTarget < targetLimit)
{
*(myTarget++) = (UChar)ch;
@ -256,7 +207,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
while (mySource < sourceLimit && myTarget < targetLimit)
{
ch = *(mySource++);
if (ch < 0x80) /* Simple case */
if (U8_IS_SINGLE(ch)) /* Simple case */
{
*(myTarget++) = (UChar) ch;
*(myOffsets++) = offsetNum++;
@ -264,7 +215,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr
else
{
toUBytes[0] = (char)ch;
inBytes = bytesFromUTF8[ch];
inBytes = U8_COUNT_BYTES_NON_ASCII(ch);
i = 1;
morebytes:
@ -273,7 +224,8 @@ morebytes:
if (mySource < sourceLimit)
{
toUBytes[i] = (char) (ch2 = *mySource);
if (!U8_IS_TRAIL(ch2))
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
{
break; /* i < inBytes */
}
@ -290,24 +242,12 @@ morebytes:
}
}
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
* In CESU-8, only surrogates, not supplementary code points, are encoded directly.
*/
if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
(isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
// In CESU-8, only surrogates, not supplementary code points, are encoded directly.
if (i == inBytes && (!isCESU8 || i <= 3))
{
/* Remove the accumulated high bits */
ch -= offsetsFromUTF8[inBytes];
/* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
if (ch <= MAXIMUM_UCS2)
{
@ -318,10 +258,9 @@ morebytes:
else
{
/* write out the surrogates */
ch -= HALF_BASE;
*(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
*(myTarget++) = U16_LEAD(ch);
*(myOffsets++) = offsetNum;
ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
ch = U16_TRAIL(ch);
if (myTarget < targetLimit)
{
*(myTarget++) = (UChar)ch;
@ -616,10 +555,9 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
UConverter *cnv;
const uint8_t *sourceInitial;
const uint8_t *source;
uint16_t extraBytesToWrite;
uint8_t myByte;
UChar32 ch;
int8_t i, isLegalSequence;
int8_t i;
/* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
@ -633,14 +571,14 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
}
myByte = (uint8_t)*(source++);
if (myByte < 0x80)
if (U8_IS_SINGLE(myByte))
{
args->source = (const char *)source;
return (UChar32)myByte;
}
extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
if (extraBytesToWrite == 0) {
uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte);
if (countTrailBytes == 0) {
cnv->toUBytes[0] = myByte;
cnv->toULength = 1;
*err = U_ILLEGAL_CHAR_FOUND;
@ -649,15 +587,17 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
}
/*The byte sequence is longer than the buffer area passed*/
if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit)
if (((const char *)source + countTrailBytes) > args->sourceLimit)
{
/* check if all of the remaining bytes are trail bytes */
uint16_t extraBytesToWrite = countTrailBytes + 1;
cnv->toUBytes[0] = myByte;
i = 1;
*err = U_TRUNCATED_CHAR_FOUND;
while(source < (const uint8_t *)args->sourceLimit) {
if(U8_IS_TRAIL(myByte = *source)) {
cnv->toUBytes[i++] = myByte;
uint8_t b = *source;
if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) {
cnv->toUBytes[i++] = b;
++source;
} else {
/* error even before we run out of input */
@ -670,81 +610,28 @@ static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
return 0xffff;
}
isLegalSequence = 1;
ch = myByte << 6;
switch(extraBytesToWrite)
{
/* note: code falls through cases! (sic)*/
case 6:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
if(countTrailBytes == 2) {
uint8_t t1 = *source, t2;
if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) {
args->source = (const char *)(source + 1);
return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3];
}
++source;
U_FALLTHROUGH;
case 5:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} else if(countTrailBytes == 1) {
uint8_t t1 = *source;
if(U8_IS_TRAIL(t1)) {
args->source = (const char *)(source + 1);
return (ch + t1) - offsetsFromUTF8[2];
}
++source;
U_FALLTHROUGH;
case 4:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
} else { // countTrailBytes == 3
uint8_t t1 = *source, t2, t3;
if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) &&
U8_IS_TRAIL(t3 = *++source)) {
args->source = (const char *)(source + 1);
return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4];
}
++source;
U_FALLTHROUGH;
case 3:
ch += (myByte = *source);
ch <<= 6;
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
}
++source;
U_FALLTHROUGH;
case 2:
ch += (myByte = *source);
if (!U8_IS_TRAIL(myByte))
{
isLegalSequence = 0;
break;
}
++source;
};
ch -= offsetsFromUTF8[extraBytesToWrite];
args->source = (const char *)source;
/*
* Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
* - use only trail bytes after a lead byte (checked above)
* - use the right number of trail bytes for a given lead byte
* - encode a code point <= U+10ffff
* - use the fewest possible number of bytes for their code points
* - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
*
* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
* There are no irregular sequences any more.
*/
if (isLegalSequence &&
(uint32_t)ch <= MAXIMUM_UTF &&
(uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
!U_IS_SURROGATE(ch)
) {
return ch; /* return the code point */
}
args->source = (const char *)source;
for(i = 0; sourceInitial < source; ++i) {
cnv->toUBytes[i] = *sourceInitial++;
@ -757,14 +644,6 @@ U_CDECL_END
/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
U_CDECL_BEGIN
/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
static void U_CALLCONV
@ -812,39 +691,35 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
*pErrorCode=U_USING_DEFAULT_WARNING;
return;
} else {
/*
* Use a single counter for source and target, counting the minimum of
* the source length and the target capacity.
* As a result, the source length is checked only once per multi-byte
* character instead of twice.
*
* Make sure that the last byte sequence is complete, or else
* stop just before it.
* (The longest legal byte sequence has 3 trail bytes.)
* Count oldToULength (number of source bytes from a previous buffer)
* into the source length but reduce the source index by toULimit
* while going back over trail bytes in order to not go back into
* the bytes that will be read for finishing a partial
* sequence from the previous buffer.
* Let the standard converter handle edge cases.
*/
int32_t i;
// Use a single counter for source and target, counting the minimum of
// the source length and the target capacity.
// Let the standard converter handle edge cases.
if(count>targetCapacity) {
count=targetCapacity;
}
i=0;
while(i<3 && i<(count-toULimit)) {
b=source[count-oldToULength-i-1];
if(U8_IS_TRAIL(b)) {
++i;
} else {
if(i<U8_COUNT_TRAIL_BYTES(b)) {
/* stop converting before the lead byte if there are not enough trail bytes for it */
count-=i+1;
// The conversion loop checks count>0 only once per 1/2/3-byte character.
// If the buffer ends with a truncated 2- or 3-byte sequence,
// then we reduce the count to stop before that,
// and collect the remaining bytes after the conversion loop.
{
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
if(length>0) {
uint8_t b1=*(sourceLimit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(sourceLimit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
count-=2;
}
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--count;
}
break;
}
}
}
@ -859,17 +734,17 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* conversion loop */
while(count>0) {
b=*source++;
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
/* convert ASCII */
*target++=b;
--count;
continue;
} else {
if(b>0xe0) {
if( /* handle U+1000..U+D7FF inline */
(t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) ||
(b==0xed && (t1 <= 0x9f))) &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf
if(b>=0xe0) {
if( /* handle U+0800..U+FFFF inline */
b<0xf0 &&
U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
U8_IS_TRAIL(t2=source[1])
) {
source+=2;
*target++=b;
@ -878,10 +753,10 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=3;
continue;
}
} else if(b<0xe0) {
} else {
if( /* handle U+0080..U+07FF inline */
b>=0xc2 &&
(t1=*source) >= 0x80 && t1 <= 0xbf
U8_IS_TRAIL(t1=*source)
) {
++source;
*target++=b;
@ -889,30 +764,18 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
count-=2;
continue;
}
} else if(b==0xe0) {
if( /* handle U+0800..U+0FFF inline */
(t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
(t2=source[1]) >= 0x80 && t2 <= 0xbf
) {
source+=2;
*target++=b;
*target++=t1;
*target++=t2;
count-=3;
continue;
}
}
/* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0;
toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b;
moreBytes:
while(toULength<toULimit) {
if(source<sourceLimit) {
b=*source;
if(U8_IS_TRAIL(b)) {
if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source;
++toULength;
c=(c<<6)+b;
@ -934,18 +797,7 @@ moreBytes:
}
}
if( toULength==toULimit && /* consumed all trail bytes */
(toULength==3 || toULength==2) && /* BMP */
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
/* legal byte sequence for BMP code point */
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* legal byte sequence for supplementary code point */
} else {
if(toULength!=toULimit) {
/* error handling: illegal UTF-8 byte sequence */
source-=(toULength-oldToULength);
while(oldToULength<toULength) {
@ -979,7 +831,7 @@ moreBytes:
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
} else {
b=*source;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES(b);
if(toULimit>(sourceLimit-source)) {
/* collect a truncated byte sequence */
toULength=0;

Просмотреть файл

@ -23,6 +23,7 @@
#include "unicode/utf8.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */
#define LATIN1_UNROLL_FROM_UNICODE 1
@ -374,7 +375,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) {
if(targetCapacity>0) {
b=*source++;
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
/* convert ASCII */
*target++=(uint8_t)b;
--targetCapacity;
@ -409,7 +410,7 @@ ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
utf8->toULength=1;
utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
utf8->mode=U8_COUNT_BYTES(b);
}
/* write back the updated pointers */

Просмотреть файл

@ -59,6 +59,7 @@
#include "cmemory.h"
#include "cstring.h"
#include "umutex.h"
#include "ustr_imp.h"
/* control optimizations according to the platform */
#define MBCS_UNROLL_SINGLE_TO_BMP 1
@ -5011,13 +5012,9 @@ ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
static const UChar32
utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
static const UChar32
utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
static void U_CALLCONV
ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
@ -5037,7 +5034,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint8_t b, t1, t2;
uint32_t asciiRoundtrips;
uint16_t value, minValue;
uint16_t value, minValue = 0;
UBool hasSupplementary;
/* set up the local pointers */
@ -5075,28 +5072,27 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0;
}
/*
* Make sure that the last byte sequence before sourceLimit is complete
* or runs into a lead byte.
* Do not go back into the bytes that will be read for finishing a partial
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
// If the buffer ends with a truncated 2- or 3-byte sequence,
// then we reduce the sourceLimit to before that,
// and collect the remaining bytes after the conversion loop.
{
int32_t i, length;
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) {
b=*(sourceLimit-i-1);
if(U8_IS_TRAIL(b)) {
++i;
} else {
if(i<U8_COUNT_TRAIL_BYTES(b)) {
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
sourceLimit-=i+1;
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
if(length>0) {
uint8_t b1=*(sourceLimit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(sourceLimit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
sourceLimit-=2;
}
break;
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
}
}
}
@ -5130,7 +5126,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) {
if(targetCapacity>0) {
b=*source++;
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
/* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=(uint8_t)b;
@ -5185,7 +5181,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
/* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0;
toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b;
moreBytes:
while(toULength<toULimit) {
@ -5198,7 +5194,7 @@ moreBytes:
*/
if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source;
if(U8_IS_TRAIL(b)) {
if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source;
++toULength;
c=(c<<6)+b;
@ -5220,22 +5216,18 @@ moreBytes:
}
}
if( toULength==toULimit && /* consumed all trail bytes */
(toULength==3 || toULength==2) && /* BMP */
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
value=0;
} else {
if(toULength==toULimit) {
c-=utf8_offsets[toULength];
if(toULength<=3) { /* BMP */
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
} else {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
value=0;
} else {
value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
}
}
} else {
/* error handling: illegal UTF-8 byte sequence */
@ -5310,7 +5302,7 @@ moreBytes:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++;
toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b;
@ -5344,7 +5336,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
uint32_t stage2Entry;
uint32_t asciiRoundtrips;
uint16_t value;
uint16_t value = 0;
UBool hasSupplementary;
/* set up the local pointers */
@ -5375,28 +5367,27 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
toULength=oldToULength=toULimit=0;
}
/*
* Make sure that the last byte sequence before sourceLimit is complete
* or runs into a lead byte.
* Do not go back into the bytes that will be read for finishing a partial
* sequence from the previous buffer.
* In the conversion loop compare source with sourceLimit only once
* per multi-byte character.
*/
// The conversion loop checks source<sourceLimit only once per 1/2/3-byte character.
// If the buffer ends with a truncated 2- or 3-byte sequence,
// then we reduce the sourceLimit to before that,
// and collect the remaining bytes after the conversion loop.
{
int32_t i, length;
length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
for(i=0; i<3 && i<length;) {
b=*(sourceLimit-i-1);
if(U8_IS_TRAIL(b)) {
++i;
} else {
if(i<U8_COUNT_TRAIL_BYTES(b)) {
/* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
sourceLimit-=i+1;
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
if(length>0) {
uint8_t b1=*(sourceLimit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(sourceLimit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
sourceLimit-=2;
}
break;
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--sourceLimit;
}
}
}
@ -5412,7 +5403,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
while(source<sourceLimit) {
if(targetCapacity>0) {
b=*source++;
if((int8_t)b>=0) {
if(U8_IS_SINGLE(b)) {
/* convert ASCII */
if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
*target++=b;
@ -5426,13 +5417,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
}
}
} else {
if(b>0xe0) {
if( /* handle U+1000..U+D7FF inline */
(((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) ||
(b==0xed && (t1 <= 0x1f))) &&
if(b>=0xe0) {
if( /* handle U+0800..U+D7FF inline */
b<=0xed && // do not assume maxFastUChar>0xd7ff
U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) &&
(t2=(uint8_t)(source[1]-0x80)) <= 0x3f
) {
c=((b&0xf)<<6)|t1;
c=((b&0xf)<<6)|(t1&0x3f);
source+=2;
value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
if(value==0) {
@ -5442,7 +5433,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else {
c=-1;
}
} else if(b<0xe0) {
} else {
if( /* handle U+0080..U+07FF inline */
b>=0xc2 &&
(t1=(uint8_t)(*source-0x80)) <= 0x3f
@ -5457,15 +5448,13 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
} else {
c=-1;
}
} else {
c=-1;
}
if(c<0) {
/* handle "complicated" and error cases, and continuing partial characters */
oldToULength=0;
toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES_NON_ASCII(b);
c=b;
moreBytes:
while(toULength<toULimit) {
@ -5478,7 +5467,7 @@ moreBytes:
*/
if(source<(uint8_t *)pToUArgs->sourceLimit) {
b=*source;
if(U8_IS_TRAIL(b)) {
if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) {
++source;
++toULength;
c=(c<<6)+b;
@ -5500,22 +5489,18 @@ moreBytes:
}
}
if( toULength==toULimit && /* consumed all trail bytes */
(toULength==3 || toULength==2) && /* BMP */
(c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
(c<=0xd7ff || 0xe000<=c) /* not a surrogate */
) {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} else if(
toULength==toULimit && toULength==4 &&
(0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
) {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
stage2Entry=0;
} else {
if(toULength==toULimit) {
c-=utf8_offsets[toULength];
if(toULength<=3) { /* BMP */
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
} else {
/* supplementary code point */
if(!hasSupplementary) {
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
stage2Entry=0;
} else {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
}
}
} else {
/* error handling: illegal UTF-8 byte sequence */
@ -5620,7 +5605,7 @@ unassigned:
source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
c=utf8->toUBytes[0]=b=*source++;
toULength=1;
toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
toULimit=U8_COUNT_BYTES(b);
while(source<sourceLimit) {
utf8->toUBytes[toULength++]=b=*source++;
c=(c<<6)+b;

Просмотреть файл

@ -25,6 +25,7 @@
#include "uenumimp.h"
#include "uhash.h"
#include "hash.h"
#include "uinvchar.h"
#include "uresimp.h"
#include "ulist.h"
#include "ureslocs.h"
@ -545,93 +546,97 @@ U_CAPI int32_t U_EXPORT2
ucurr_forLocale(const char* locale,
UChar* buff,
int32_t buffCapacity,
UErrorCode* ec)
{
int32_t resLen = 0;
const UChar* s = NULL;
if (ec != NULL && U_SUCCESS(*ec)) {
if ((buff && buffCapacity) || !buffCapacity) {
UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY];
if ((resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus))) {
// there is a currency keyword. Try to see if it's valid
if(buffCapacity > resLen) {
/* Normalize the currency keyword value to upper case. */
T_CString_toUpperCase(id);
u_charsToUChars(id, buff, resLen);
}
} else {
// get country or country_variant in `id'
uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
UErrorCode* ec) {
if (U_FAILURE(*ec)) { return 0; }
if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (U_FAILURE(*ec)) {
return 0;
}
char currency[4]; // ISO currency codes are alpha3 codes.
UErrorCode localStatus = U_ZERO_ERROR;
int32_t resLen = uloc_getKeywordValue(locale, "currency",
currency, UPRV_LENGTHOF(currency), &localStatus);
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
if (resLen < buffCapacity) {
T_CString_toUpperCase(currency);
u_charsToUChars(currency, buff, resLen);
}
return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
// get country or country_variant in `id'
char id[ULOC_FULLNAME_CAPACITY];
uint32_t variantType = idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
if (U_FAILURE(*ec)) {
return 0;
}
#if !UCONFIG_NO_SERVICE
const UChar* result = CReg::get(id);
if (result) {
if(buffCapacity > u_strlen(result)) {
u_strcpy(buff, result);
}
return u_strlen(result);
}
const UChar* result = CReg::get(id);
if (result) {
if(buffCapacity > u_strlen(result)) {
u_strcpy(buff, result);
}
resLen = u_strlen(result);
return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
#endif
// Remove variants, which is only needed for registration.
char *idDelim = strchr(id, VAR_DELIM);
if (idDelim) {
idDelim[0] = 0;
}
// Remove variants, which is only needed for registration.
char *idDelim = uprv_strchr(id, VAR_DELIM);
if (idDelim) {
idDelim[0] = 0;
}
// Look up the CurrencyMap element in the root bundle.
UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
const UChar* s = NULL; // Currency code from data file.
if (id[0] == 0) {
// No point looking in the data for an empty string.
// This is what we would get.
localStatus = U_MISSING_RESOURCE_ERROR;
} else {
// Look up the CurrencyMap element in the root bundle.
localStatus = U_ZERO_ERROR;
UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
// Get the second item when PREEURO is requested, and this is a known Euro country.
// If the requested variant is PREEURO, and this isn't a Euro country,
// assume that the country changed over to the Euro in the future.
// This is probably an old version of ICU that hasn't been updated yet.
// The latest currency is probably correct.
if (U_SUCCESS(localStatus)) {
if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) {
currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
/*
Get the second item when PREEURO is requested, and this is a known Euro country.
If the requested variant is PREEURO, and this isn't a Euro country, assume
that the country changed over to the Euro in the future. This is probably
an old version of ICU that hasn't been updated yet. The latest currency is
probably correct.
*/
if (U_SUCCESS(localStatus)) {
if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) {
currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus);
s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus);
}
else if ((variantType & VARIANT_IS_EURO)) {
s = EUR_STR;
}
}
ures_close(countryArray);
ures_close(currencyReq);
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0)
{
// We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, sizeof(id), ec);
*ec = U_USING_FALLBACK_WARNING;
return ucurr_forLocale(id, buff, buffCapacity, ec);
}
else if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible.
*ec = localStatus;
}
if (U_SUCCESS(*ec)) {
if(buffCapacity > resLen) {
u_strcpy(buff, s);
}
}
} else if ((variantType & VARIANT_IS_EURO)) {
s = EUR_STR;
}
return u_terminateUChars(buff, buffCapacity, resLen, ec);
} else {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
}
ures_close(currencyReq);
ures_close(countryArray);
}
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
// We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
*ec = U_USING_FALLBACK_WARNING;
// TODO: Loop over the shortened id rather than recursing and
// looking again for a currency keyword.
return ucurr_forLocale(id, buff, buffCapacity, ec);
}
if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible.
*ec = localStatus;
}
if (U_SUCCESS(*ec)) {
if(buffCapacity > resLen) {
u_strcpy(buff, s);
}
}
return resLen;
return u_terminateUChars(buff, buffCapacity, resLen, ec);
}
// end registration
@ -648,7 +653,16 @@ static UBool fallback(char *loc) {
return FALSE;
}
UErrorCode status = U_ZERO_ERROR;
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
if (uprv_strcmp(loc, "en_GB") == 0) {
// HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
// in order to consume the correct data strings. This hack will be removed
// when proper data sink loading is implemented here.
// NOTE: "001" adds 1 char over "GB". However, both call sites allocate
// arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
uprv_strcpy(loc + 3, "001");
} else {
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
}
/*
char *i = uprv_strrchr(loc, '_');
if (i == NULL) {
@ -2216,6 +2230,7 @@ ucurr_countCurrencies(const char* locale,
UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY];
uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
// get country or country_variant in `id'
/*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec);

Просмотреть файл

@ -206,6 +206,8 @@ setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to ca
return didUpdate;
}
#if U_PLATFORM_HAS_WINUWP_API == 0
static UBool
setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
UDataMemory tData;
@ -215,6 +217,8 @@ setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCod
return setCommonICUData(&tData, FALSE, pErrorCode);
}
#endif
static const char *
findBasename(const char *path) {
const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
@ -982,7 +986,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
/* init path iterator for individual files */
UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
while((pathBuffer = iter.next(pErrorCode)))
while((pathBuffer = iter.next(pErrorCode)) != NULL)
{
#ifdef UDATA_DEBUG
fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
@ -1165,7 +1169,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
altSepPath.append(path, *pErrorCode);
char *p;
while((p=uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR))) {
while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
*p = U_FILE_SEP_CHAR;
}
#if defined (UDATA_DEBUG)

Просмотреть файл

@ -79,14 +79,14 @@
* prime number while being less than a power of two.
*/
static const int32_t PRIMES[] = {
13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
1073741789, 2147483647 /*, 4294967291 */
};
#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES)
#define DEFAULT_PRIME_INDEX 3
#define DEFAULT_PRIME_INDEX 4
/* These ratios are tuned to the PRIMES array such that a resize
* places the table back into the zone of non-resizing. That is,
@ -231,7 +231,7 @@ _uhash_allocate(UHashtable *hash,
emptytok.pointer = NULL; /* Only one of these two is needed */
emptytok.integer = 0; /* but we don't know which one. */
limit = p + hash->length;
while (p < limit) {
p->key = emptytok;
@ -247,7 +247,7 @@ _uhash_allocate(UHashtable *hash,
static UHashtable*
_uhash_init(UHashtable *result,
UHashFunction *keyHash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
@ -275,7 +275,7 @@ _uhash_init(UHashtable *result,
}
static UHashtable*
_uhash_create(UHashFunction *keyHash,
_uhash_create(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t primeIndex,
@ -415,7 +415,7 @@ _uhash_rehash(UHashtable *hash, UErrorCode *status) {
if (U_FAILURE(*status)) {
hash->elements = old;
hash->length = oldLength;
hash->length = oldLength;
return;
}
@ -536,7 +536,7 @@ _uhash_put(UHashtable *hash,
********************************************************************/
U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
@ -545,7 +545,7 @@ uhash_open(UHashFunction *keyHash,
}
U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
@ -562,7 +562,7 @@ uhash_openSize(UHashFunction *keyHash,
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *fillinResult,
UHashFunction *keyHash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status) {
@ -570,6 +570,22 @@ uhash_init(UHashtable *fillinResult,
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
}
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *fillinResult,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status) {
// Find the smallest index i for which PRIMES[i] >= size.
int32_t i = 0;
while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
++i;
}
return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status);
}
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash) {
if (hash == NULL) {
@ -604,7 +620,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
hash->keyComparator = fn;
return result;
}
U_CAPI UValueComparator *U_EXPORT2
U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
UValueComparator *result = hash->valueComparator;
hash->valueComparator = fn;
@ -630,7 +646,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
UErrorCode status = U_ZERO_ERROR;
_uhash_internalSetResizePolicy(hash, policy);
hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
_uhash_rehash(hash, &status);
}
@ -844,7 +860,7 @@ uhash_hashUChars(const UHashTok key) {
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) {
const char *s = (const char *)key.pointer;
return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, uprv_strlen(s)));
}
U_CAPI int32_t U_EXPORT2
@ -853,7 +869,7 @@ uhash_hashIChars(const UHashTok key) {
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
}
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
int32_t count1, count2, pos, i;
@ -886,14 +902,14 @@ uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
if(count1!=count2){
return FALSE;
}
pos=UHASH_FIRST;
for(i=0; i<count1; i++){
const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
const UHashTok key1 = elem1->key;
const UHashTok val1 = elem1->value;
/* here the keys are not compared, instead the key form hash1 is used to fetch
* value from hash2. If the hashes are equal then then both hashes should
* value from hash2. If the hashes are equal then then both hashes should
* contain equal values for the same key!
*/
const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));

Просмотреть файл

@ -154,7 +154,7 @@ struct UHashtable {
* If NULL won't do anything */
/* Size parameters */
int32_t count; /* The number of key-value pairs in this table.
* 0 <= count <= length. In practice we
* never let count == length (see code). */
@ -162,12 +162,12 @@ struct UHashtable {
* and values. Must be prime. */
/* Rehashing thresholds */
int32_t highWaterMark; /* If count > highWaterMark, rehash */
int32_t lowWaterMark; /* If count < lowWaterMark, rehash */
float highWaterRatio; /* 0..1; high water as a fraction of length */
float lowWaterRatio; /* 0..1; low water as a fraction of length */
int8_t primeIndex; /* Index into our prime table for length.
* length == PRIMES[primeIndex] */
UBool allocated; /* Was this UHashtable allocated? */
@ -190,7 +190,7 @@ U_CDECL_END
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_open(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
@ -207,7 +207,7 @@ uhash_open(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_open
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_openSize(UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
@ -224,18 +224,37 @@ uhash_openSize(UHashFunction *keyHash,
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
U_CAPI UHashtable* U_EXPORT2
uhash_init(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
UErrorCode *status);
/**
* Initialize an existing UHashtable.
* @param keyHash A pointer to the key hashing function. Must not be
* NULL.
* @param keyComp A pointer to the function that compares keys. Must
* not be NULL.
* @param size The initial capacity of this hash table.
* @param status A pointer to an UErrorCode to receive any errors.
* @return A pointer to a UHashtable, or 0 if an error occurred.
* @see uhash_openSize
*/
U_CAPI UHashtable* U_EXPORT2
uhash_initSize(UHashtable *hash,
UHashFunction *keyHash,
UKeyComparator *keyComp,
UValueComparator *valueComp,
int32_t size,
UErrorCode *status);
/**
* Close a UHashtable, releasing the memory used.
* @param hash The UHashtable to close. If hash is NULL no operation is performed.
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_close(UHashtable *hash);
@ -246,7 +265,7 @@ uhash_close(UHashtable *hash);
* @param fn the function to be used hash keys; must not be NULL
* @return the previous key hasher; non-NULL
*/
U_CAPI UHashFunction *U_EXPORT2
U_CAPI UHashFunction *U_EXPORT2
uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
/**
@ -256,7 +275,7 @@ uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn);
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
U_CAPI UKeyComparator *U_EXPORT2
U_CAPI UKeyComparator *U_EXPORT2
uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
/**
@ -266,7 +285,7 @@ uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn);
* @param fn the function to be used compare keys; must not be NULL
* @return the previous key comparator; non-NULL
*/
U_CAPI UValueComparator *U_EXPORT2
U_CAPI UValueComparator *U_EXPORT2
uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
/**
@ -279,7 +298,7 @@ uhash_setValueComparator(UHashtable *hash, UValueComparator *fn);
* @param fn the function to be used delete keys, or NULL
* @return the previous key deleter; may be NULL
*/
U_CAPI UObjectDeleter *U_EXPORT2
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
@ -292,7 +311,7 @@ uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param fn the function to be used delete values, or NULL
* @return the previous value deleter; may be NULL
*/
U_CAPI UObjectDeleter *U_EXPORT2
U_CAPI UObjectDeleter *U_EXPORT2
uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
/**
@ -302,7 +321,7 @@ uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
* @param hash The UHashtable to set
* @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED}
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
/**
@ -310,7 +329,7 @@ uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy);
* @param hash The UHashtable to query.
* @return The number of key-value pairs stored in hash.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_count(const UHashtable *hash);
/**
@ -326,7 +345,7 @@ uhash_count(const UHashtable *hash);
* @return The previous value, or NULL if none.
* @see uhash_get
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
void *key,
void *value,
@ -344,7 +363,7 @@ uhash_put(UHashtable *hash,
* @return The previous value, or NULL if none.
* @see uhash_get
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iput(UHashtable *hash,
int32_t key,
void* value,
@ -362,7 +381,7 @@ uhash_iput(UHashtable *hash,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_puti(UHashtable *hash,
void* key,
int32_t value,
@ -380,7 +399,7 @@ uhash_puti(UHashtable *hash,
* @return The previous value, or 0 if none.
* @see uhash_get
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_iputi(UHashtable *hash,
int32_t key,
int32_t value,
@ -393,8 +412,8 @@ uhash_iputi(UHashtable *hash,
* @param key A pointer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash,
U_CAPI void* U_EXPORT2
uhash_get(const UHashtable *hash,
const void *key);
/**
@ -404,7 +423,7 @@ uhash_get(const UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The requested item, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iget(const UHashtable *hash,
int32_t key);
@ -415,7 +434,7 @@ uhash_iget(const UHashtable *hash,
* @param key A pointer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_geti(const UHashtable *hash,
const void* key);
/**
@ -425,7 +444,7 @@ uhash_geti(const UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash,
int32_t key);
@ -435,7 +454,7 @@ uhash_igeti(const UHashtable *hash,
* @param key A key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_remove(UHashtable *hash,
const void *key);
@ -445,7 +464,7 @@ uhash_remove(UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The item removed, or NULL if not found.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_iremove(UHashtable *hash,
int32_t key);
@ -455,7 +474,7 @@ uhash_iremove(UHashtable *hash,
* @param key An key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_removei(UHashtable *hash,
const void* key);
@ -465,7 +484,7 @@ uhash_removei(UHashtable *hash,
* @param key An integer key stored in a hashtable
* @return The item removed, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_iremovei(UHashtable *hash,
int32_t key);
@ -473,7 +492,7 @@ uhash_iremovei(UHashtable *hash,
* Remove all items from a UHashtable.
* @param hash The target UHashtable.
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);
/**
@ -487,7 +506,7 @@ uhash_removeAll(UHashtable *hash);
* @param key A key stored in a hashtable
* @return a hash element, or NULL if the key is not found.
*/
U_CAPI const UHashElement* U_EXPORT2
U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key);
/**
@ -510,7 +529,7 @@ uhash_find(const UHashtable *hash, const void* key);
* @return a hash element, or NULL if no further key-value pairs
* exist in the table.
*/
U_CAPI const UHashElement* U_EXPORT2
U_CAPI const UHashElement* U_EXPORT2
uhash_nextElement(const UHashtable *hash,
int32_t *pos);
@ -525,7 +544,7 @@ uhash_nextElement(const UHashtable *hash,
* modified.
* @return the value that was removed.
*/
U_CAPI void* U_EXPORT2
U_CAPI void* U_EXPORT2
uhash_removeElement(UHashtable *hash, const UHashElement* e);
/********************************************************************
@ -537,7 +556,7 @@ uhash_removeElement(UHashtable *hash, const UHashElement* e);
* @param i The given integer
* @return a UHashTok for an integer.
*/
/*U_CAPI UHashTok U_EXPORT2
/*U_CAPI UHashTok U_EXPORT2
uhash_toki(int32_t i);*/
/**
@ -545,7 +564,7 @@ uhash_toki(int32_t i);*/
* @param p The given pointer
* @return a UHashTok for a pointer.
*/
/*U_CAPI UHashTok U_EXPORT2
/*U_CAPI UHashTok U_EXPORT2
uhash_tokp(void* p);*/
/********************************************************************
@ -559,7 +578,7 @@ uhash_tokp(void* p);*/
* @param key The string (const UChar*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key);
/**
@ -569,7 +588,7 @@ uhash_hashUChars(const UHashTok key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);
/**
@ -589,7 +608,7 @@ uhash_hashIChars(const UHashTok key);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareUChars(const UHashTok key1, const UHashTok key2);
/**
@ -599,7 +618,7 @@ uhash_compareUChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareChars(const UHashTok key1, const UHashTok key2);
/**
@ -609,7 +628,7 @@ uhash_compareChars(const UHashTok key1, const UHashTok key2);
* @param key2 The string for comparison
* @return true if key1 and key2 are equal, return false otherwise.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareIChars(const UHashTok key1, const UHashTok key2);
/********************************************************************
@ -621,7 +640,7 @@ uhash_compareIChars(const UHashTok key1, const UHashTok key2);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key);
/**
@ -630,7 +649,7 @@ uhash_hashUnicodeString(const UElement key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key);
/********************************************************************
@ -642,7 +661,7 @@ uhash_hashCaselessUnicodeString(const UElement key);
* @param key The string (const char*) to hash.
* @return A hash code for the key.
*/
U_CAPI int32_t U_EXPORT2
U_CAPI int32_t U_EXPORT2
uhash_hashLong(const UHashTok key);
/**
@ -651,7 +670,7 @@ uhash_hashLong(const UHashTok key);
* @param Key2 The integer for comparison
* @return true if key1 and key2 are equal, return false otherwise
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_compareLong(const UHashTok key1, const UHashTok key2);
/********************************************************************
@ -662,7 +681,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
* Deleter for Hashtable objects.
* @param obj The object to be deleted
*/
U_CAPI void U_EXPORT2
U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);
/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
@ -673,7 +692,7 @@ uhash_deleteHashtable(void *obj);
* @param hash2
* @return true if the hashtables are equal and false if not.
*/
U_CAPI UBool U_EXPORT2
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2);

Просмотреть файл

@ -573,7 +573,7 @@ uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst;
if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */
n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
}
/* copy non-null */
while(*src && n>0) {
@ -594,7 +594,7 @@ uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
uint8_t *orig_dst = dst;
if(n==-1) {
n = uprv_strlen((const char*)src)+1; /* copy NUL */
n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
}
/* copy non-null */
while(*src && n>0) {

Просмотреть файл

@ -252,7 +252,7 @@ U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t
s = (const char *)ulist_getNext((UList *)(en->context));
if (s != NULL && resultLength != NULL) {
*resultLength = uprv_strlen(s);
*resultLength = static_cast<int32_t>(uprv_strlen(s));
}
return s;
}

Просмотреть файл

@ -98,6 +98,7 @@ locale_getKeywords(const char *localeID,
*/
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES[] = {
"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
@ -109,7 +110,7 @@ static const char * const LANGUAGES[] = {
"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
"ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
"cs", "csb", "cu", "cv", "cy",
@ -213,6 +214,7 @@ static const char* const REPLACEMENT_LANGUAGES[]={
*/
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
/* ISO639 table version is 20150505 */
/* Subsequent hand addition of selected languages */
static const char * const LANGUAGES_3[] = {
"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
@ -224,7 +226,7 @@ static const char * const LANGUAGES_3[] = {
"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
"cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
"ces", "csb", "chu", "chv", "cym",
@ -529,14 +531,16 @@ static const VariantMap VARIANT_MAP[] = {
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
#define _ConvertBCP47(finalID, id, buffer, length,err) \
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
finalID=id; \
if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
} else { \
finalID=buffer; \
}
/* Gets the size of the shortest subtag in the given localeID. */
static int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = uprv_strlen(localeID);
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
int32_t length = localeIDLength;
int32_t tmpLength = 0;
int32_t i;
@ -2486,7 +2490,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG)
fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
#endif
while((l=uenum_next(availableLocales, NULL, status))) {
while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l);
#endif
@ -2526,7 +2530,7 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
#if defined(ULOC_DEBUG)
fprintf(stderr,"Try: [%s]", fallbackList[i]);
#endif
while((l=uenum_next(availableLocales, NULL, status))) {
while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
#if defined(ULOC_DEBUG)
fprintf(stderr," %s\n", l);
#endif

Просмотреть файл

@ -1022,7 +1022,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
no known mapping. This implementation normalizes the
the value to lower case
*/
int32_t bcpValueLen = uprv_strlen(bcpValue);
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
@ -1288,7 +1288,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
bufIdx++;
}
len = uprv_strlen(attr->attribute);
len = static_cast<int32_t>(uprv_strlen(attr->attribute));
uprv_memcpy(buf + bufIdx, attr->attribute, len);
bufIdx += len;
@ -1841,7 +1841,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
int32_t newTagLength;
grandfatheredLen = tagLen; /* back up for output parsedLen */
newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
if (tagLen < newTagLength) {
uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1);

Просмотреть файл

@ -102,10 +102,7 @@
{
HANDLE map;
HANDLE file;
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
UDataMemory_init(pData); /* Clear the output struct. */
/* open the input file */
@ -143,6 +140,11 @@
This is required for multiuser systems on Windows 2000 SP4 and beyond */
// TODO: UWP does not have this function and I do not think it is required?
#if U_PLATFORM_HAS_WINUWP_API == 0
SECURITY_ATTRIBUTES mappingAttributes;
SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL;
SECURITY_DESCRIPTOR securityDesc;
if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) {
/* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */
if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) {

Просмотреть файл

@ -132,7 +132,7 @@ umtx_condBroadcast(UConditionVar *condition) {
}
U_CAPI void U_EXPORT2
umtx_condSignal(UConditionVar *condition) {
umtx_condSignal(UConditionVar * /* condition */) {
// Function not implemented. There is no immediate requirement from ICU to have it.
// Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be
// changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function

Просмотреть файл

@ -250,7 +250,7 @@ public:
virtual int32_t next(void) = 0;
/**
* Return character index of the current interator position within the text.
* Return character index of the current iterator position within the text.
* @return The boundary most recently returned.
* @stable ICU 2.0
*/
@ -277,7 +277,7 @@ public:
virtual int32_t preceding(int32_t offset) = 0;
/**
* Return true if the specfied position is a boundary position.
* Return true if the specified position is a boundary position.
* As a side effect, the current position of the iterator is set
* to the first boundary position at or following the specified offset.
* @param offset the offset to check.
@ -331,7 +331,7 @@ public:
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
* normal way, without attemtping to store any values.
* normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
@ -469,7 +469,7 @@ public:
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
* Get name of the object for the desired Locale, in the desired langauge.
* Get name of the object for the desired Locale, in the desired language.
* @param objectLocale must be from getAvailableLocales.
* @param displayLocale specifies the desired locale for output.
* @param name the fill-in parameter of the return value
@ -482,7 +482,7 @@ public:
UnicodeString& name);
/**
* Get name of the object for the desired Locale, in the langauge of the
* Get name of the object for the desired Locale, in the language of the
* default locale.
* @param objectLocale must be from getMatchingLocales
* @param name the fill-in parameter of the return value
@ -629,10 +629,12 @@ protected:
/** @internal */
BreakIterator();
/** @internal */
BreakIterator (const BreakIterator &other) : UObject(other) {}
BreakIterator (const BreakIterator &other);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
BreakIterator (const Locale& valid, const Locale& actual);
BreakIterator (const Locale& valid, const Locale &actual);
/** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
BreakIterator &operator = (const BreakIterator &other);
#endif /* U_HIDE_INTERNAL_API */
private:
@ -640,12 +642,6 @@ private:
/** @internal */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
/**
* The assignment operator has no real implementation.
* It's provided to make the compiler happy. Do not call.
*/
BreakIterator& operator=(const BreakIterator&);
};
#ifndef U_HIDE_DEPRECATED_API
@ -661,5 +657,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif // _BRKITER
#endif // BRKITER_H
//eof

Просмотреть файл

@ -126,8 +126,8 @@ public:
virtual void Flush();
private:
ByteSink(const ByteSink &); // copy constructor not implemented
ByteSink &operator=(const ByteSink &); // assignment operator not implemented
ByteSink(const ByteSink &) = delete;
ByteSink &operator=(const ByteSink &) = delete;
};
// -------------------------------------------------------------
@ -217,9 +217,10 @@ private:
int32_t size_;
int32_t appended_;
UBool overflowed_;
CheckedArrayByteSink(); ///< default constructor not implemented
CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented
CheckedArrayByteSink() = delete;
CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};
/**
@ -236,6 +237,21 @@ class StringByteSink : public ByteSink {
* @stable ICU 4.2
*/
StringByteSink(StringClass* dest) : dest_(dest) { }
#ifndef U_HIDE_DRAFT_API
/**
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
*
* @param dest pointer to string object to append to
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
* @draft ICU 60
*/
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
if (initialAppendCapacity > 0 &&
(uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
dest->reserve(dest->length() + initialAppendCapacity);
}
}
#endif // U_HIDE_DRAFT_API
/**
* Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes
@ -245,9 +261,10 @@ class StringByteSink : public ByteSink {
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private:
StringClass* dest_;
StringByteSink(); ///< default constructor not implemented
StringByteSink(const StringByteSink &); ///< copy constructor not implemented
StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented
StringByteSink() = delete;
StringByteSink(const StringByteSink &) = delete;
StringByteSink &operator=(const StringByteSink &) = delete;
};
U_NAMESPACE_END

Просмотреть файл

@ -8,6 +8,7 @@
#define __CASEMAP_H__
#include "unicode/utypes.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
/**
@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
class BreakIterator;
class ByteSink;
class Edits;
/**
@ -36,7 +38,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -48,7 +50,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -71,7 +74,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -83,7 +86,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -112,8 +116,10 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
@ -130,7 +136,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -159,7 +166,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -172,7 +179,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -188,6 +196,129 @@ public:
char16_t *dest, int32_t destCapacity, Edits *edits,
UErrorCode &errorCode);
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToLower
* @draft ICU 60
*/
static void utf8ToLower(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/**
* Uppercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToUpper
* @draft ICU 60
*/
static void utf8ToUpper(
const char *locale, uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#if !UCONFIG_NO_BREAK_ITERATION
/**
* Titlecases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
*
* Titlecasing uses a break iterator to find the first characters of words
* that are to be titlecased. It titlecases those characters and lowercases
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8ToTitle
* @draft ICU 60
*/
static void utf8ToTitle(
const char *locale, uint32_t options, BreakIterator *iter,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
#endif // UCONFIG_NO_BREAK_ITERATION
/**
* Case-folds a UTF-8 string and optionally records edits.
*
* Case folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
* and dotless i that are marked with 'T' in CaseFolding.txt.
*
* The result may be longer or shorter than the original.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param sink A ByteSink to which the result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
*
* @see ucasemap_utf8FoldCase
* @draft ICU 60
*/
static void utf8Fold(
uint32_t options,
StringPiece src, ByteSink &sink, Edits *edits,
UErrorCode &errorCode);
/**
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
@ -195,7 +326,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -207,7 +338,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -217,7 +349,7 @@ public:
* @see ucasemap_utf8ToLower
* @draft ICU 59
*/
static int32_t utf8ToLower(
static int32_t utf8ToLower(
const char *locale, uint32_t options,
const char *src, int32_t srcLength,
char *dest, int32_t destCapacity, Edits *edits,
@ -230,7 +362,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@ -242,7 +374,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -271,10 +404,12 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
@ -289,7 +424,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@ -317,7 +453,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@ -330,7 +466,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first. edits can be NULL.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.

Просмотреть файл

@ -95,45 +95,45 @@ private:
return reinterpret_cast<char16_t *>(t);
}
char16_t *p;
char16_t *p_;
#else
union {
char16_t *cp;
uint16_t *up;
wchar_t *wp;
} u;
} u_;
#endif
};
#ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p(p) {}
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {}
Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {}
Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {}
Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() {
U_ALIASING_BARRIER(p);
U_ALIASING_BARRIER(p_);
}
char16_t *Char16Ptr::get() const { return p; }
char16_t *Char16Ptr::get() const { return p_; }
#else
Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; }
Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; }
Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; }
Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif
Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; }
Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {}
char16_t *Char16Ptr::get() const { return u.cp; }
char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
@ -203,45 +203,45 @@ private:
return reinterpret_cast<const char16_t *>(t);
}
const char16_t *p;
const char16_t *p_;
#else
union {
const char16_t *cp;
const uint16_t *up;
const wchar_t *wp;
} u;
} u_;
#endif
};
#ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {}
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {}
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {}
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {}
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() {
U_ALIASING_BARRIER(p);
U_ALIASING_BARRIER(p_);
}
const char16_t *ConstChar16Ptr::get() const { return p; }
const char16_t *ConstChar16Ptr::get() const { return p_; }
#else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; }
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; }
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; }
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; }
ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {}
const char16_t *ConstChar16Ptr::get() const { return u.cp; }
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif

Просмотреть файл

@ -140,7 +140,7 @@
* <tr>
* <td>Number Formatting</td>
* <td>unum.h</td>
* <td>icu::NumberFormat</td>
* <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
* </tr>
* <tr>
* <td>Number Spellout<br/>(Rule Based Number Formatting)</td>

Просмотреть файл

@ -36,19 +36,61 @@ public:
* @draft ICU 59
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
errorCode(U_ZERO_ERROR) {}
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
errorCode_(U_ZERO_ERROR) {}
/**
* Copy constructor.
* @param other source edits
* @draft ICU 60
*/
Edits(const Edits &other) :
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
delta(other.delta), numChanges(other.numChanges),
errorCode_(other.errorCode_) {
copyArray(other);
}
/**
* Move constructor, might leave src empty.
* This object will have the same contents that the source object had.
* @param src source edits
* @draft ICU 60
*/
Edits(Edits &&src) U_NOEXCEPT :
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
delta(src.delta), numChanges(src.numChanges),
errorCode_(src.errorCode_) {
moveArray(src);
}
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
/**
* Assignment operator.
* @param other source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(const Edits &other);
/**
* Move assignment operator, might leave src empty.
* This object will have the same contents that the source object had.
* The behavior is undefined if *this and src are the same object.
* @param src source edits
* @return *this
* @draft ICU 60
*/
Edits &operator=(Edits &&src) U_NOEXCEPT;
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
void reset();
void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
@ -66,6 +108,9 @@ public:
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
* @param outErrorCode Set to an error code if it does not contain one already
* and an error occurred while recording edits.
* Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
@ -81,7 +126,13 @@ public:
* @return TRUE if there are any change edits
* @draft ICU 59
*/
UBool hasChanges() const;
UBool hasChanges() const { return numChanges != 0; }
/**
* @return the number of change edits
* @draft ICU 60
*/
int32_t numberOfChanges() const { return numChanges; }
/**
* Access to the list of edits.
@ -90,6 +141,15 @@ public:
* @draft ICU 59
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
* Default constructor, empty iterator.
* @draft ICU 60
*/
Iterator() :
array(nullptr), index(0), length(0),
remaining(0), onlyChanges_(FALSE), coarse(FALSE),
dir(0), changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
/**
* Copy constructor.
* @draft ICU 59
@ -103,6 +163,9 @@ public:
/**
* Advances to the next edit.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit
* @draft ICU 59
*/
@ -121,10 +184,86 @@ public:
* if the source index is out of bounds for the source string.
*
* @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, TRUE, errorCode) == 0;
}
/**
* Finds the edit that contains the destination index.
* The destination index may be found in a non-change
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
* The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
* The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the destination index was found
* @draft ICU 60
*/
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
return findIndex(i, FALSE, errorCode) == 0;
}
/**
* Returns the destination index corresponding to the given source index.
* If the source index is inside a change edit (not at its start),
* then the destination index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i source index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return destination index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
/**
* Returns the source index corresponding to the given destination index.
* If the destination index is inside a change edit (not at its start),
* then the source index at the end of that edit is returned,
* since there is no information about index mapping inside a change edit.
*
* (This means that indexes to the start and middle of an edit,
* for example around a grapheme cluster, are mapped to indexes
* encompassing the entire edit.
* The alternative, mapping an interior index to the start,
* would map such an interval to an empty one.)
*
* This operation will usually but not always modify this object.
* The iterator state after this search is undefined.
*
* @param i destination index
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return source index; undefined if i is not 0..string length
* @draft ICU 60
*/
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
@ -167,15 +306,22 @@ public:
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head);
void updateIndexes();
void updateNextIndexes();
void updatePreviousIndexes();
UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode);
UBool previous(UErrorCode &errorCode);
/** @return -1: error or i<0; 0: found; 1: i>=string length */
int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
// 0 if we are not within compressed equal-length changes.
// Otherwise the number of remaining changes, including the current one.
int32_t remaining;
UBool onlyChanges_, coarse;
int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
UBool changed;
int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex;
@ -219,9 +365,39 @@ public:
return Iterator(array, length, FALSE, FALSE);
}
/**
* Merges the two input Edits and appends the result to this object.
*
* Consider two string transformations (for example, normalization and case mapping)
* where each records Edits in addition to writing an output string.<br>
* Edits ab reflect how substrings of input string a
* map to substrings of intermediate string b.<br>
* Edits bc reflect how substrings of intermediate string b
* map to substrings of output string c.<br>
* This function merges ab and bc such that the additional edits
* recorded in this object reflect how substrings of input string a
* map to substrings of output string c.
*
* If unrelated Edits are passed in where the output string of the first
* has a different length than the input string of the second,
* then a U_ILLEGAL_ARGUMENT_ERROR is reported.
*
* @param ab reflects how substrings of input string a
* map to substrings of intermediate string b.
* @param bc reflects how substrings of intermediate string b
* map to substrings of output string c.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return *this, with the merged edits appended
* @draft ICU 60
*/
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
private:
Edits(const Edits &) = delete;
Edits &operator=(const Edits &) = delete;
void releaseArray() U_NOEXCEPT;
Edits &copyArray(const Edits &other);
Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@ -234,7 +410,8 @@ private:
int32_t capacity;
int32_t length;
int32_t delta;
UErrorCode errorCode;
int32_t numChanges;
UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY];
};

Просмотреть файл

@ -55,14 +55,30 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of createEmptyInstance, which has
* identical behavior.
* @param status The error code.
* @return the new builder
* @deprecated ICU 60 use createEmptyInstance instead
* @see createEmptyInstance()
*/
static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
return createEmptyInstance(status);
}
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Construct an empty FilteredBreakIteratorBuilder.
* In this state, it will not suppress any segment boundaries.
* @param status The error code.
* @return the new builder
* @stable ICU 56
* @draft ICU 60
*/
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
#endif /* U_HIDE_DRAFT_API */
/**
* Suppress a certain string from being the end of a segment.
@ -89,6 +105,20 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
#ifndef U_HIDE_DEPRECATED_API
/**
* This function has been deprecated in favor of wrapIteratorWithFilter()
* The behavior is identical.
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
* @deprecated ICU 60 use wrapIteratorWithFilter() instead
* @see wrapBreakIteratorWithFilter()
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Wrap (adopt) an existing break iterator in a new filtered instance.
* The resulting BreakIterator is owned by the caller.
@ -96,12 +126,16 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* Note that the adoptBreakIterator is adopted by the new BreakIterator
* and should no longer be used by the caller.
* The FilteredBreakIteratorBuilder may be reused.
* This function is an alias for build()
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
* @stable ICU 56
* @draft ICU 60
*/
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
return build(adoptBreakIterator, status);
}
#endif /* U_HIDE_DRAFT_API */
protected:
/**

Просмотреть файл

@ -213,7 +213,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
@ -222,7 +221,6 @@ public:
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
#endif
/**
* Destructor deletes the object it owns.
* @stable ICU 4.4
@ -230,7 +228,6 @@ public:
~LocalPointer() {
delete LocalPointerBase<T>::ptr;
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@ -241,7 +238,6 @@ public:
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, leaves src with isNull().
@ -362,7 +358,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
@ -371,7 +366,6 @@ public:
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
#endif
/**
* Destructor deletes the array it owns.
* @stable ICU 4.4
@ -379,7 +373,6 @@ public:
~LocalArray() {
delete[] LocalPointerBase<T>::ptr;
}
#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@ -390,7 +383,6 @@ public:
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, leaves src with isNull().
@ -492,7 +484,6 @@ public:
* @see LocalPointer
* @stable ICU 4.4
*/
#if U_HAVE_RVALUE_REFERENCES
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
@ -526,34 +517,6 @@ public:
ptr=p; \
} \
}
#else
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
using LocalPointerBase<Type>::operator*; \
using LocalPointerBase<Type>::operator->; \
explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
~LocalPointerClassName() { closeFunction(ptr); } \
LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \
if (ptr != NULL) { closeFunction(ptr); } \
LocalPointerBase<Type>::ptr=src.ptr; \
src.ptr=NULL; \
return *this; \
} \
void swap(LocalPointerClassName &other) U_NOEXCEPT { \
Type *temp=LocalPointerBase<Type>::ptr; \
LocalPointerBase<Type>::ptr=other.ptr; \
other.ptr=temp; \
} \
friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
p1.swap(p2); \
} \
void adoptInstead(Type *p) { \
if (ptr != NULL) { closeFunction(ptr); } \
ptr=p; \
} \
}
#endif
U_NAMESPACE_END

Просмотреть файл

@ -88,7 +88,7 @@ class UnicodeString;
* <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
* For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX.
* For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with

Просмотреть файл

@ -28,12 +28,15 @@
#if !UCONFIG_NO_NORMALIZATION
#include "unicode/stringpiece.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm2.h"
U_NAMESPACE_BEGIN
class ByteSink;
/**
* Unicode normalization functionality for standard Unicode normalization or
* for using custom mapping tables.
@ -215,6 +218,35 @@ public:
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const = 0;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const;
/**
* Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string.
@ -340,6 +372,30 @@ public:
*/
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
/**
* Tests if the string is normalized.
@ -479,7 +535,36 @@ public:
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const;
UErrorCode &errorCode) const U_OVERRIDE;
/**
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
* @param sink A ByteSink to which the normalized UTF-8 result string is written.
* sink.Flush() is called at the end.
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
* This function calls edits->reset() first unless
* options includes U_EDITS_NO_RESET. edits can be nullptr.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 60
*/
virtual void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string.
@ -497,7 +582,7 @@ public:
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const;
UErrorCode &errorCode) const U_OVERRIDE;
/**
* Appends the second string to the first string
* (merging them at the boundary) and returns the first string.
@ -515,7 +600,7 @@ public:
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const;
UErrorCode &errorCode) const U_OVERRIDE;
/**
* Gets the decomposition mapping of c.
@ -529,7 +614,7 @@ public:
* @stable ICU 4.6
*/
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const;
getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/**
* Gets the raw decomposition mapping of c.
@ -543,7 +628,7 @@ public:
* @stable ICU 49
*/
virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/**
* Performs pairwise composition of a & b and returns the composite if there is one.
@ -556,7 +641,7 @@ public:
* @stable ICU 49
*/
virtual UChar32
composePair(UChar32 a, UChar32 b) const;
composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
/**
* Gets the combining class of c.
@ -567,7 +652,7 @@ public:
* @stable ICU 49
*/
virtual uint8_t
getCombiningClass(UChar32 c) const;
getCombiningClass(UChar32 c) const U_OVERRIDE;
/**
* Tests if the string is normalized.
@ -581,7 +666,30 @@ public:
* @stable ICU 4.4
*/
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the UTF-8 string is normalized.
* Internally, in cases where the quickCheck() method would return "maybe"
* (which is only possible for the two COMPOSE modes) this method
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return TRUE if s is normalized
* @draft ICU 60
*/
virtual UBool
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the string is normalized.
* For details see the Normalizer2 base class documentation.
@ -594,7 +702,7 @@ public:
* @stable ICU 4.4
*/
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Returns the end of the normalized substring of the input string.
* For details see the Normalizer2 base class documentation.
@ -607,7 +715,7 @@ public:
* @stable ICU 4.4
*/
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the character always has a normalization boundary before it,
@ -617,7 +725,7 @@ public:
* @return TRUE if c has a normalization boundary before it
* @stable ICU 4.4
*/
virtual UBool hasBoundaryBefore(UChar32 c) const;
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
/**
* Tests if the character always has a normalization boundary after it,
@ -627,7 +735,7 @@ public:
* @return TRUE if c has a normalization boundary after it
* @stable ICU 4.4
*/
virtual UBool hasBoundaryAfter(UChar32 c) const;
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
/**
* Tests if the character is normalization-inert.
@ -636,7 +744,7 @@ public:
* @return TRUE if c is normalization-inert
* @stable ICU 4.4
*/
virtual UBool isInert(UChar32 c) const;
virtual UBool isInert(UChar32 c) const U_OVERRIDE;
private:
UnicodeString &
normalize(const UnicodeString &src,
@ -644,6 +752,12 @@ private:
USetSpanCondition spanCondition,
UErrorCode &errorCode) const;
void
normalizeUTF8(uint32_t options, const char *src, int32_t length,
ByteSink &sink, Edits *edits,
USetSpanCondition spanCondition,
UErrorCode &errorCode) const;
UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,

Просмотреть файл

@ -132,6 +132,8 @@
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
/** Fuchsia is a POSIX-ish platform. @internal */
#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000
@ -152,6 +154,8 @@
# include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
#elif defined(__Fuchsia__)
# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__)
@ -192,6 +196,20 @@
# define U_PLATFORM U_PF_UNKNOWN
#endif
/**
* \def UPRV_INCOMPLETE_CPP11_SUPPORT
* This switch turns off ICU 60 NumberFormatter code.
* By default, this switch is enabled on AIX and z/OS,
* which have poor C++11 support.
*
* NOTE: This switch is intended to be temporary; see #13393.
*
* @internal
*/
#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT
# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS )
#endif
/**
* \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
@ -330,31 +348,6 @@
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif
/**
* \def U_IOSTREAM_SOURCE
* Defines what support for C++ streams is available.
*
* If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
* (the ISO/IEC C++ FDIS was published in November 1997), and then
* one should qualify streams using the std namespace in ICU header
* files.
* Starting with ICU 49, this is the only supported version.
*
* If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
* available instead (in June 1985 Stroustrup published
* "An Extensible I/O Facility for C++" at the summer USENIX conference).
* Starting with ICU 49, this version is not supported any more.
*
* If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
* then C++ streams are not available and
* support for them will be silently suppressed in ICU.
*
* @internal
*/
#ifndef U_IOSTREAM_SOURCE
#define U_IOSTREAM_SOURCE 199711
#endif
/*===========================================================================*/
/** @{ Compiler and environment features */
/*===========================================================================*/
@ -505,22 +498,6 @@ namespace std {
};
#endif
/**
* \def U_HAVE_RVALUE_REFERENCES
* Set to 1 if the compiler supports rvalue references.
* C++11 feature, necessary for move constructor & move assignment.
* @internal
*/
#ifdef U_HAVE_RVALUE_REFERENCES
/* Use the predefined value. */
#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_rvalue_references) \
|| defined(__GXX_EXPERIMENTAL_CXX0X__) \
|| (defined(_MSC_VER) && _MSC_VER >= 1600) /* Visual Studio 2010 */
# define U_HAVE_RVALUE_REFERENCES 1
#else
# define U_HAVE_RVALUE_REFERENCES 0
#endif
/**
* \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty.
@ -871,6 +848,16 @@ namespace std {
# define U_CALLCONV U_EXPORT2
#endif
/**
* \def U_CALLCONV_FPTR
* Similar to U_CALLCONV, but only used on function pointers.
* @internal
*/
#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
# define U_CALLCONV_FPTR U_CALLCONV
#else
# define U_CALLCONV_FPTR
#endif
/* @} */
#endif

Просмотреть файл

@ -31,23 +31,14 @@
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
struct UTrie;
U_NAMESPACE_BEGIN
/** @internal */
struct RBBIDataHeader;
class RuleBasedBreakIteratorTables;
class BreakIterator;
class RBBIDataWrapper;
class UStack;
class LanguageBreakEngine;
struct RBBIDataHeader;
class RBBIDataWrapper;
class UnhandledEngine;
struct RBBIStateTable;
class UStack;
/**
*
@ -96,19 +87,36 @@ private:
*/
RBBIDataWrapper *fData;
/** Index of the Rule {tag} values for the most recent match.
/**
* The iteration state - current position, rule status for the current position,
* and whether the iterator ran off the end, yielding UBRK_DONE.
* Current position is pinned to be 0 < position <= text.length.
* Current position is always set to a boundary.
* @internal
*/
int32_t fLastRuleStatusIndex;
/**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).
*/
int32_t fPosition;
/**
* Rule tag value valid flag.
* Some iterator operations don't intrinsically set the correct tag value.
* This flag lets us lazily compute the value if we are ever asked for it.
* @internal
*/
UBool fLastStatusIndexValid;
* TODO:
*/
int32_t fRuleStatusIndex;
/**
* True when iteration has run off the end, and iterator functions should return UBRK_DONE.
*/
UBool fDone;
/**
* Cache of previously determined boundary positions.
*/
public: // TODO: debug, return to private.
class BreakCache;
BreakCache *fBreakCache;
private:
/**
* Counter for the number of characters encountered with the "dictionary"
* flag set.
@ -117,26 +125,11 @@ private:
uint32_t fDictionaryCharCount;
/**
* When a range of characters is divided up using the dictionary, the break
* positions that are discovered are stored here, preventing us from having
* to use either the dictionary or the state table again until the iterator
* leaves this range of text. Has the most impact for line breaking.
* @internal
* Cache of boundary positions within a region of text that has been
* sub-divided by dictionary based breaking.
*/
int32_t* fCachedBreakPositions;
/**
* The number of elements in fCachedBreakPositions
* @internal
*/
int32_t fNumCachedBreakPositions;
/**
* if fCachedBreakPositions is not null, this indicates which item in the
* cache the current iteration position refers to
* @internal
*/
int32_t fPositionInCache;
class DictionaryCache;
DictionaryCache *fDictionaryCache;
/**
*
@ -179,13 +172,11 @@ private:
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
/** @internal */
friend class RBBIRuleBuilder;
/** @internal */
friend class BreakIterator;
public:
/** Default constructor. Creates an empty shell of an iterator, with no
@ -469,7 +460,10 @@ public:
virtual UBool isBoundary(int32_t offset);
/**
* Returns the current iteration position.
* Returns the current iteration position. Note that UBRK_DONE is never
* returned from this function; if iteration has run to the end of a
* string, current() will return the length of the string while
* next() will return UBRK_DONE).
* @return The current iteration position.
* @stable ICU 2.0
*/
@ -501,6 +495,7 @@ public:
* Note: this function is not thread safe. It should not have been
* declared const, and the const remains only for compatibility
* reasons. (The function is logically const, but not bit-wise const).
* TODO: check this. Probably thread safe now.
* <p>
* @return the status from the break rule that determined the most recently
* returned break position.
@ -660,46 +655,31 @@ private:
* Common initialization function, used by constructors and bufferClone.
* @internal
*/
void init();
void init(UErrorCode &status);
/**
* This method backs the iterator back up to a "safe position" in the text.
* This is a position that we know, without any context, must be a break position.
* The various calling methods then iterate forward from this safe position to
* the appropriate position to return. (For more information, see the description
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
* @param statetable state table used of moving backwards
* Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
* This locates a "Safe Position" from which the forward break rules
* will operate correctly. A Safe Position is not necessarily a boundary itself.
*
* @param fromPosition the position in the input text to begin the iteration.
* @internal
*/
int32_t handlePrevious(const RBBIStateTable *statetable);
int32_t handlePrevious(int32_t fromPosition);
/**
* This method is the actual implementation of the next() method. All iteration
* vectors through here. This method initializes the state machine to state 1
* and advances through the text character by character until we reach the end
* of the text or the state machine transitions to state 0. We update our return
* value every time the state machine passes through a possible end state.
* @param statetable state table used of moving forwards
* Find a rule-based boundary by running the state machine.
* Input
* fPosition, the position in the text to begin from.
* Output
* fPosition: the boundary following the starting position.
* fDictionaryCharCount the number of dictionary characters encountered.
* If > 0, the segment will be further subdivided
* fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
*
* @internal
*/
int32_t handleNext(const RBBIStateTable *statetable);
/**
* This is the function that actually implements dictionary-based
* breaking. Covering at least the range from startPos to endPos,
* it checks for dictionary characters, and if it finds them determines
* the appropriate object to deal with them. It may cache found breaks in
* fCachedBreakPositions as it goes. It may well also look at text outside
* the range startPos to endPos.
* If going forward, endPos is the normal Unicode break result, and
* if goind in reverse, startPos is the normal Unicode break result
* @param startPos The start position of a range of text
* @param endPos The end position of a range of text
* @param reverse The call is for the reverse direction
* @internal
*/
int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
int32_t handleNext();
/**
@ -710,11 +690,14 @@ private:
*/
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
public:
#ifndef U_HIDE_INTERNAL_API
/**
* @internal
* Debugging function only.
* @internal
*/
void makeRuleStatusValid();
void dumpCache();
#endif /* U_HIDE_INTERNAL_API */
};
//------------------------------------------------------------------------------

Просмотреть файл

@ -21,6 +21,13 @@
U_NAMESPACE_BEGIN
// Forward declaration:
namespace number {
namespace impl {
class SimpleModifier;
}
}
/**
* Formats simple patterns like "{1} was born in {0}".
* Minimal subset of MessageFormat; fast, simple, minimal dependencies.
@ -286,6 +293,9 @@ private:
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode);
// Give access to internals to SimpleModifier for number formatting
friend class number::impl::SimpleModifier;
};
U_NAMESPACE_END

Просмотреть файл

@ -0,0 +1,198 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// stringoptions.h
// created: 2017jun08 Markus W. Scherer
#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__
#include "unicode/utypes.h"
/**
* \file
* \brief C API: Bit set option bit constants for various string and character processing functions.
*/
/**
* Option value for case folding: Use default mappings defined in CaseFolding.txt.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
#ifndef U_HIDE_DRAFT_API
/**
* Titlecase the string as a whole rather than each word.
* (Titlecase only the character at index 0, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_WHOLE_STRING 0x20
/**
* Titlecase sentences rather than words.
* (Titlecase only the first character of each sentence, possibly adjusted.)
* Option bits value for titlecasing APIs that take an options bit set.
*
* It is an error to specify multiple titlecasing iterator options together,
* including both an options bit and an explicit BreakIterator.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @draft ICU 60
*/
#define U_TITLECASE_SENTENCES 0x40
#endif // U_HIDE_DRAFT_API
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the character at each
* (possibly adjusted) BreakIterator index and
* lowercase all other characters up to the next iterator index.
* With this option, the other characters will not be modified.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing BreakIterator indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
* and titlecase that one.
*
* Other characters are lowercased.
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_ADJUST_TO_CASED
* @see U_TITLECASE_NO_LOWERCASE
* @see UnicodeString::toTitle
* @see CaseMap::toTitle
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
#ifndef U_HIDE_DRAFT_API
/**
* Adjust each titlecasing BreakIterator index to the next cased character.
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
* Option bit for titlecasing APIs that take an options bit set.
*
* This used to be the default index adjustment in ICU.
* Since ICU 60, the default index adjustment is to the next character that is
* a letter, number, symbol, or private use code point.
* (Uncased modifier letters are skipped.)
* The difference in behavior is small for word titlecasing,
* but the new adjustment is much better for whole-string and sentence titlecasing:
* It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
*
* It is an error to specify multiple titlecasing adjustment options together.
*
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
* @draft ICU 60
*/
#define U_TITLECASE_ADJUST_TO_CASED 0x400
/**
* Option for string transformation functions to not first reset the Edits object.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_EDITS_NO_RESET 0x2000
/**
* Omit unchanged text when recording how source substrings
* relate to changed and unchanged result substrings.
* Used for example in some case-mapping and normalization functions.
*
* @see CaseMap
* @see Edits
* @see Normalizer2
* @draft ICU 60
*/
#define U_OMIT_UNCHANGED_TEXT 0x4000
#endif // U_HIDE_DRAFT_API
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
* @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
#endif // __STRINGOPTIONS_H__

Просмотреть файл

@ -256,7 +256,7 @@ protected:
/** @internal */
class FinalValueNode : public Node {
public:
FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
virtual UBool operator==(const Node &other) const;
virtual void write(StringTrieBuilder &builder);
protected:
@ -276,7 +276,7 @@ protected:
void setValue(int32_t v) {
hasValue=TRUE;
value=v;
hash=hash*37+v;
hash=hash*37u+v;
}
protected:
UBool hasValue;
@ -290,7 +290,7 @@ protected:
class IntermediateValueNode : public ValueNode {
public:
IntermediateValueNode(int32_t v, Node *nextNode)
: ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
: ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(StringTrieBuilder &builder);
@ -307,7 +307,7 @@ protected:
class LinearMatchNode : public ValueNode {
public:
LinearMatchNode(int32_t len, Node *nextNode)
: ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
: ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
length(len), next(nextNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -342,7 +342,7 @@ protected:
equal[length]=NULL;
values[length]=value;
++length;
hash=(hash*37+c)*37+value;
hash=(hash*37u+c)*37u+value;
}
// Adds a unit which leads to another match node.
void add(int32_t c, Node *node) {
@ -350,7 +350,7 @@ protected:
equal[length]=node;
values[length]=0;
++length;
hash=(hash*37+c)*37+hashCode(node);
hash=(hash*37u+c)*37u+hashCode(node);
}
protected:
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
@ -365,8 +365,8 @@ protected:
class SplitBranchNode : public BranchNode {
public:
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
: BranchNode(((0x555555*37+middleUnit)*37+
hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
: BranchNode(((0x555555u*37u+middleUnit)*37u+
hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@ -382,7 +382,7 @@ protected:
class BranchHeadNode : public ValueNode {
public:
BranchHeadNode(int32_t len, Node *subNode)
: ValueNode((0x666666*37+len)*37+hashCode(subNode)),
: ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);

Просмотреть файл

@ -23,8 +23,6 @@
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
#ifndef U_HIDE_DRAFT_API
/**
* \file
* \brief Bidi Transformations
@ -60,17 +58,17 @@
* @see UBIDI_REORDER_DEFAULT
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_RUNS_ONLY
* @draft ICU 58
* @stable ICU 58
*/
typedef enum {
/** 0: Constant indicating a logical order.
* This is the default for input text.
* @draft ICU 58
* @stable ICU 58
*/
UBIDI_LOGICAL = 0,
/** 1: Constant indicating a visual order.
* This is a default for output text.
* @draft ICU 58
* @stable ICU 58
*/
UBIDI_VISUAL
} UBiDiOrder;
@ -83,20 +81,20 @@ typedef enum {
* @see ubidi_setReorderingOptions
* @see ubidi_writeReordered
* @see ubidi_writeReverse
* @draft ICU 58
* @stable ICU 58
*/
typedef enum {
/** 0: Constant indicating that character mirroring should not be
* performed.
* This is the default.
* @draft ICU 58
* @stable ICU 58
*/
UBIDI_MIRRORING_OFF = 0,
/** 1: Constant indicating that character mirroring should be performed.
* This corresponds to calling <code>ubidi_writeReordered</code> or
* <code>ubidi_writeReverse</code> with the
* <code>UBIDI_DO_MIRRORING</code> option bit set.
* @draft ICU 58
* @stable ICU 58
*/
UBIDI_MIRRORING_ON
} UBiDiMirroring;
@ -104,7 +102,7 @@ typedef enum {
/**
* Forward declaration of the <code>UBiDiTransform</code> structure that stores
* information used by the layout transformation engine.
* @draft ICU 58
* @stable ICU 58
*/
typedef struct UBiDiTransform UBiDiTransform;
@ -240,9 +238,9 @@ typedef struct UBiDiTransform UBiDiTransform;
* @see UBiDiMirroring
* @see ubidi_setPara
* @see u_shapeArabic
* @draft ICU 58
* @stable ICU 58
*/
U_DRAFT uint32_t U_EXPORT2
U_STABLE uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize,
@ -286,16 +284,16 @@ ubiditransform_transform(UBiDiTransform *pBiDiTransform,
* <code>ubiditransform_close()</code>.
*
* @return An empty <code>UBiDiTransform</code> object.
* @draft ICU 58
* @stable ICU 58
*/
U_DRAFT UBiDiTransform* U_EXPORT2
U_STABLE UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode);
/**
* Deallocates the given <code>UBiDiTransform</code> object.
* @draft ICU 58
* @stable ICU 58
*/
U_DRAFT void U_EXPORT2
U_STABLE void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBidiTransform);
#if U_SHOW_CPLUSPLUS_API
@ -309,7 +307,7 @@ U_NAMESPACE_BEGIN
*
* @see LocalPointerBase
* @see LocalPointer
* @draft ICU 58
* @stable ICU 58
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
@ -317,5 +315,4 @@ U_NAMESPACE_END
#endif
#endif /* U_HIDE_DRAFT_API */
#endif

Просмотреть файл

@ -230,7 +230,8 @@ typedef enum USentenceBreakTag {
* @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API.
* @param text The text to be iterated over.
* @param text The text to be iterated over. May be null, in which case ubrk_setText() is
* used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale.

Просмотреть файл

@ -23,6 +23,7 @@
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/stringoptions.h"
#include "unicode/ustring.h"
/**
@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
U_STABLE void U_EXPORT2
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
/**
* Do not lowercase non-initial parts of words when titlecasing.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will titlecase the first cased character
* of a word and lowercase all other characters.
* With this option, the other characters will not be modified.
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_LOWERCASE 0x100
/**
* Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
* titlecase exactly the characters at breaks from the iterator.
* Option bit for titlecasing APIs that take an options bit set.
*
* By default, titlecasing will take each break iterator index,
* adjust it by looking for the next cased character, and titlecase that one.
* Other characters are lowercased.
*
* This follows Unicode 4 & 5 section 3.13 Default Case Operations:
*
* R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
* #29, "Text Boundaries." Between each pair of word boundaries, find the first
* cased character F. If F exists, map F to default_title(F); then map each
* subsequent character C to default_lower(C).
*
* @see ucasemap_setOptions
* @see ucasemap_toTitle
* @see ucasemap_utf8ToTitle
* @see UnicodeString::toTitle
* @see U_TITLECASE_NO_LOWERCASE
* @stable ICU 3.8
*/
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
/**
* Omit unchanged text when case-mapping with Edits.
*
* @see CaseMap
* @see Edits
* @draft ICU 59
*/
#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
#if !UCONFIG_NO_BREAK_ITERATION
/**
@ -251,7 +202,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
* This function uses only the setUText(), first(), next() and close() methods of the
* This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* The result may be longer or shorter than the original.

Просмотреть файл

@ -26,6 +26,7 @@
#define UCHAR_H
#include "unicode/utypes.h"
#include "unicode/stringoptions.h"
U_CDECL_BEGIN
@ -41,7 +42,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
#define U_UNICODE_VERSION "9.0"
#define U_UNICODE_VERSION "10.0"
/**
* \file
@ -148,8 +149,9 @@ U_CDECL_BEGIN
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
* For details about the properties see http://www.unicode.org/ucd/ .
* For names of Unicode properties see the UCD file PropertyAliases.txt.
*
* For details about the properties see
* UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
*
* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
* then properties marked with "new in Unicode 3.2" are not or not fully available.
@ -427,12 +429,29 @@ typedef enum UProperty {
* @stable ICU 57
*/
UCHAR_EMOJI_MODIFIER_BASE=60,
/**
* Binary property Emoji_Component.
* See http://www.unicode.org/reports/tr51/#Emoji_Properties
*
* @stable ICU 60
*/
UCHAR_EMOJI_COMPONENT=61,
/**
* Binary property Regional_Indicator.
* @stable ICU 60
*/
UCHAR_REGIONAL_INDICATOR=62,
/**
* Binary property Prepended_Concatenation_Mark.
* @stable ICU 60
*/
UCHAR_PREPENDED_CONCATENATION_MARK=63,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UCHAR_BINARY_LIMIT=61,
UCHAR_BINARY_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class.
@ -1647,6 +1666,23 @@ enum UBlockCode {
/** @stable ICU 58 */
UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
// New blocks in Unicode 10.0
/** @stable ICU 60 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
/** @stable ICU 60 */
UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
/** @stable ICU 60 */
UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
/** @stable ICU 60 */
UBLOCK_NUSHU = 277, /*[1B170]*/
/** @stable ICU 60 */
UBLOCK_SOYOMBO = 278, /*[11A50]*/
/** @stable ICU 60 */
UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
/** @stable ICU 60 */
UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
@ -1654,7 +1690,7 @@ enum UBlockCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
UBLOCK_COUNT = 274,
UBLOCK_COUNT = 281,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
@ -1930,6 +1966,19 @@ typedef enum UJoiningGroup {
U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UJoiningGroup value.
@ -3521,27 +3570,6 @@ u_toupper(UChar32 c);
U_STABLE UChar32 U_EXPORT2
u_totitle(UChar32 c);
/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
#define U_FOLD_CASE_DEFAULT 0
/**
* Option value for case folding:
*
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*
* Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
* are to be included for default mappings and
* excluded for the Turkic-specific mappings.
*
* Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
* are to be excluded for default mappings and
* included for the Turkic-specific mappings.
*
* @stable ICU 2.0
*/
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
/**
* The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt;

Просмотреть файл

@ -148,15 +148,9 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
* @stable ICU 2.8
* @system
*/
#ifndef _MSC_VER
U_STABLE void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f,
u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
UErrorCode *status);
#else
U_STABLE void U_EXPORT2
u_setMemoryFunctions(const void *context, UMemAllocFn * a, UMemReallocFn * r, UMemFreeFn * f,
UErrorCode *status);
#endif
U_CDECL_END

Просмотреть файл

@ -76,7 +76,7 @@
#endif
/**
* Determines wheter to enable auto cleanup of libraries.
* Determines whether to enable auto cleanup of libraries.
* @internal
*/
#ifndef UCLN_NO_AUTO_CLEANUP
@ -262,7 +262,8 @@
/**
* \def UCONFIG_NO_CONVERSION
* ICU will not completely build with this switch turned on.
* ICU will not completely build (compiling the tools fails) with this
* switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
@ -320,7 +321,9 @@
*/
#ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0
#elif UCONFIG_NO_NORMALIZATION
#endif
#if UCONFIG_NO_NORMALIZATION
/* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1

Просмотреть файл

@ -44,14 +44,12 @@ enum UDisplayContextType {
* @stable ICU 54
*/
UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
#ifndef U_HIDE_DRAFT_API
/**
* Type to retrieve the substitute handling setting, e.g.
* UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
* @draft ICU 58
* @stable ICU 58
*/
UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
#endif /* U_HIDE_DRAFT_API */
};
/**
* @stable ICU 51
@ -143,7 +141,6 @@ enum UDisplayContext {
* @stable ICU 54
*/
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
#ifndef U_HIDE_DRAFT_API
/**
* ================================
* SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
@ -154,16 +151,15 @@ enum UDisplayContext {
* A possible setting for SUBSTITUTE_HANDLING:
* Returns a fallback value (e.g., the input code) when no data is available.
* This is the default value.
* @draft ICU 58
* @stable ICU 58
*/
UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
/**
* A possible setting for SUBSTITUTE_HANDLING:
* Returns a null value when no data is available.
* @draft ICU 58
* @stable ICU 58
*/
UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
#endif /* U_HIDE_DRAFT_API */
};
/**

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше