зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1499026 - Part 2: Update in-tree ICU to release 63.1. rs=Waldo
--HG-- rename : intl/icu/source/common/ulistformatter.cpp => intl/icu/source/i18n/ulistformatter.cpp
This commit is contained in:
Родитель
7820c86808
Коммит
148e9c75e6
|
@ -10,6 +10,7 @@ SOURCES += [
|
|||
'/intl/icu/source/common/bytestriebuilder.cpp',
|
||||
'/intl/icu/source/common/bytestrieiterator.cpp',
|
||||
'/intl/icu/source/common/caniter.cpp',
|
||||
'/intl/icu/source/common/characterproperties.cpp',
|
||||
'/intl/icu/source/common/chariter.cpp',
|
||||
'/intl/icu/source/common/charstr.cpp',
|
||||
'/intl/icu/source/common/cmemory.cpp',
|
||||
|
@ -25,7 +26,6 @@ SOURCES += [
|
|||
'/intl/icu/source/common/filterednormalizer2.cpp',
|
||||
'/intl/icu/source/common/icudataver.cpp',
|
||||
'/intl/icu/source/common/icuplug.cpp',
|
||||
'/intl/icu/source/common/listformatter.cpp',
|
||||
'/intl/icu/source/common/loadednormalizer2impl.cpp',
|
||||
'/intl/icu/source/common/locavailable.cpp',
|
||||
'/intl/icu/source/common/locbased.cpp',
|
||||
|
@ -115,6 +115,7 @@ SOURCES += [
|
|||
'/intl/icu/source/common/ucnvscsu.cpp',
|
||||
'/intl/icu/source/common/ucnvsel.cpp',
|
||||
'/intl/icu/source/common/ucol_swp.cpp',
|
||||
'/intl/icu/source/common/ucptrie.cpp',
|
||||
'/intl/icu/source/common/ucurr.cpp',
|
||||
'/intl/icu/source/common/udata.cpp',
|
||||
'/intl/icu/source/common/udatamem.cpp',
|
||||
|
@ -127,12 +128,12 @@ SOURCES += [
|
|||
'/intl/icu/source/common/uinvchar.cpp',
|
||||
'/intl/icu/source/common/uiter.cpp',
|
||||
'/intl/icu/source/common/ulist.cpp',
|
||||
'/intl/icu/source/common/ulistformatter.cpp',
|
||||
'/intl/icu/source/common/uloc.cpp',
|
||||
'/intl/icu/source/common/uloc_keytype.cpp',
|
||||
'/intl/icu/source/common/uloc_tag.cpp',
|
||||
'/intl/icu/source/common/umapfile.cpp',
|
||||
'/intl/icu/source/common/umath.cpp',
|
||||
'/intl/icu/source/common/umutablecptrie.cpp',
|
||||
'/intl/icu/source/common/umutex.cpp',
|
||||
'/intl/icu/source/common/unames.cpp',
|
||||
'/intl/icu/source/common/unifiedcache.cpp',
|
||||
|
@ -181,6 +182,7 @@ SOURCES += [
|
|||
'/intl/icu/source/common/utrie.cpp',
|
||||
'/intl/icu/source/common/utrie2.cpp',
|
||||
'/intl/icu/source/common/utrie2_builder.cpp',
|
||||
'/intl/icu/source/common/utrie_swap.cpp',
|
||||
'/intl/icu/source/common/uts46.cpp',
|
||||
'/intl/icu/source/common/utypes.cpp',
|
||||
'/intl/icu/source/common/uvector.cpp',
|
||||
|
@ -209,7 +211,6 @@ EXPORTS.unicode += [
|
|||
'/intl/icu/source/common/unicode/icudataver.h',
|
||||
'/intl/icu/source/common/unicode/icuplug.h',
|
||||
'/intl/icu/source/common/unicode/idna.h',
|
||||
'/intl/icu/source/common/unicode/listformatter.h',
|
||||
'/intl/icu/source/common/unicode/localpointer.h',
|
||||
'/intl/icu/source/common/unicode/locdspnm.h',
|
||||
'/intl/icu/source/common/unicode/locid.h',
|
||||
|
@ -247,6 +248,8 @@ EXPORTS.unicode += [
|
|||
'/intl/icu/source/common/unicode/ucnv_err.h',
|
||||
'/intl/icu/source/common/unicode/ucnvsel.h',
|
||||
'/intl/icu/source/common/unicode/uconfig.h',
|
||||
'/intl/icu/source/common/unicode/ucpmap.h',
|
||||
'/intl/icu/source/common/unicode/ucptrie.h',
|
||||
'/intl/icu/source/common/unicode/ucurr.h',
|
||||
'/intl/icu/source/common/unicode/udata.h',
|
||||
'/intl/icu/source/common/unicode/udisplaycontext.h',
|
||||
|
@ -254,10 +257,10 @@ EXPORTS.unicode += [
|
|||
'/intl/icu/source/common/unicode/uidna.h',
|
||||
'/intl/icu/source/common/unicode/uiter.h',
|
||||
'/intl/icu/source/common/unicode/uldnames.h',
|
||||
'/intl/icu/source/common/unicode/ulistformatter.h',
|
||||
'/intl/icu/source/common/unicode/uloc.h',
|
||||
'/intl/icu/source/common/unicode/umachine.h',
|
||||
'/intl/icu/source/common/unicode/umisc.h',
|
||||
'/intl/icu/source/common/unicode/umutablecptrie.h',
|
||||
'/intl/icu/source/common/unicode/unifilt.h',
|
||||
'/intl/icu/source/common/unicode/unifunct.h',
|
||||
'/intl/icu/source/common/unicode/unimatch.h',
|
||||
|
|
Двоичные данные
config/external/icu/data/icudt62l.dat → config/external/icu/data/icudt63l.dat
поставляемый
Двоичные данные
config/external/icu/data/icudt62l.dat → config/external/icu/data/icudt63l.dat
поставляемый
Двоичный файл не отображается.
|
@ -67,6 +67,7 @@ SOURCES += [
|
|||
'/intl/icu/source/i18n/dtitvinf.cpp',
|
||||
'/intl/icu/source/i18n/dtptngen.cpp',
|
||||
'/intl/icu/source/i18n/dtrule.cpp',
|
||||
'/intl/icu/source/i18n/erarules.cpp',
|
||||
'/intl/icu/source/i18n/esctrn.cpp',
|
||||
'/intl/icu/source/i18n/ethpccal.cpp',
|
||||
'/intl/icu/source/i18n/fmtable.cpp',
|
||||
|
@ -83,6 +84,7 @@ SOURCES += [
|
|||
'/intl/icu/source/i18n/inputext.cpp',
|
||||
'/intl/icu/source/i18n/islamcal.cpp',
|
||||
'/intl/icu/source/i18n/japancal.cpp',
|
||||
'/intl/icu/source/i18n/listformatter.cpp',
|
||||
'/intl/icu/source/i18n/measfmt.cpp',
|
||||
'/intl/icu/source/i18n/measunit.cpp',
|
||||
'/intl/icu/source/i18n/measure.cpp',
|
||||
|
@ -129,6 +131,8 @@ SOURCES += [
|
|||
'/intl/icu/source/i18n/numparse_stringsegment.cpp',
|
||||
'/intl/icu/source/i18n/numparse_symbols.cpp',
|
||||
'/intl/icu/source/i18n/numparse_validators.cpp',
|
||||
'/intl/icu/source/i18n/numrange_fluent.cpp',
|
||||
'/intl/icu/source/i18n/numrange_impl.cpp',
|
||||
'/intl/icu/source/i18n/numsys.cpp',
|
||||
'/intl/icu/source/i18n/olsontz.cpp',
|
||||
'/intl/icu/source/i18n/persncal.cpp',
|
||||
|
@ -196,6 +200,7 @@ SOURCES += [
|
|||
'/intl/icu/source/i18n/udatpg.cpp',
|
||||
'/intl/icu/source/i18n/ufieldpositer.cpp',
|
||||
'/intl/icu/source/i18n/uitercollationiterator.cpp',
|
||||
'/intl/icu/source/i18n/ulistformatter.cpp',
|
||||
'/intl/icu/source/i18n/ulocdata.cpp',
|
||||
'/intl/icu/source/i18n/umsg.cpp',
|
||||
'/intl/icu/source/i18n/unesctrn.cpp',
|
||||
|
@ -250,12 +255,14 @@ EXPORTS.unicode += [
|
|||
'/intl/icu/source/i18n/unicode/fpositer.h',
|
||||
'/intl/icu/source/i18n/unicode/gender.h',
|
||||
'/intl/icu/source/i18n/unicode/gregocal.h',
|
||||
'/intl/icu/source/i18n/unicode/listformatter.h',
|
||||
'/intl/icu/source/i18n/unicode/measfmt.h',
|
||||
'/intl/icu/source/i18n/unicode/measunit.h',
|
||||
'/intl/icu/source/i18n/unicode/measure.h',
|
||||
'/intl/icu/source/i18n/unicode/msgfmt.h',
|
||||
'/intl/icu/source/i18n/unicode/nounit.h',
|
||||
'/intl/icu/source/i18n/unicode/numberformatter.h',
|
||||
'/intl/icu/source/i18n/unicode/numberrangeformatter.h',
|
||||
'/intl/icu/source/i18n/unicode/numfmt.h',
|
||||
'/intl/icu/source/i18n/unicode/numsys.h',
|
||||
'/intl/icu/source/i18n/unicode/plurfmt.h',
|
||||
|
@ -292,6 +299,7 @@ EXPORTS.unicode += [
|
|||
'/intl/icu/source/i18n/unicode/ufieldpositer.h',
|
||||
'/intl/icu/source/i18n/unicode/uformattable.h',
|
||||
'/intl/icu/source/i18n/unicode/ugender.h',
|
||||
'/intl/icu/source/i18n/unicode/ulistformatter.h',
|
||||
'/intl/icu/source/i18n/unicode/ulocdata.h',
|
||||
'/intl/icu/source/i18n/unicode/umsg.h',
|
||||
'/intl/icu/source/i18n/unicode/unirepl.h',
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
commit 4a3ba8eee90ea1414d4f7ee36563e6c9b28fda96
|
||||
Author: Yoshito Umaoka <y.umaoka@gmail.com>
|
||||
Date: Wed Jun 20 05:34:56 2018 +0000
|
||||
commit 6cbd62e59e30f73b444be89ea71fd74275ac53a4
|
||||
Author: Shane Carr <shane@unicode.org>
|
||||
Date: Mon Oct 29 23:52:44 2018 -0700
|
||||
|
||||
ICU-13823 Merged #13840 number parser memory overflow fix (r41541) to maint-62 for 62.1 GA.
|
||||
ICU-20246 Fixing another integer overflow in number parsing.
|
||||
|
||||
X-SVN-Rev: 41542
|
||||
(cherry picked from commit 53d8c8f3d181d87a6aa925b449b51c4a2c922a51)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -48,6 +48,8 @@ ALL_PKGCONFIG_SUFFIX=uc i18n
|
|||
DOXYGEN = @DOXYGEN@
|
||||
DOCZIP = icu-docs.zip
|
||||
|
||||
INSTALL_ICU_CONFIG = @INSTALL_ICU_CONFIG@
|
||||
|
||||
## Files to remove for 'make clean'
|
||||
CLEANFILES = *~
|
||||
|
||||
|
@ -64,7 +66,9 @@ SUBDIRS = stubdata common i18n $(LAYOUTEX) $(ICUIO) $(TOOLS) $(DATASUBDIR) $(EX
|
|||
|
||||
SECTION = 1
|
||||
|
||||
ifeq ($(INSTALL_ICU_CONFIG),true)
|
||||
MANX_FILES = config/icu-config.$(SECTION)
|
||||
endif
|
||||
|
||||
ALL_MAN_FILES = $(MANX_FILES)
|
||||
|
||||
|
@ -191,13 +195,15 @@ install-icu: $(INSTALLED_BUILT_FILES)
|
|||
@$(MKINSTALLDIRS) $(DESTDIR)$(libdir)/pkgconfig
|
||||
$(INSTALL_DATA) $(ALL_PKGCONFIG_FILES) $(DESTDIR)$(libdir)/pkgconfig/
|
||||
$(INSTALL_DATA) $(top_srcdir)/../LICENSE $(DESTDIR)$(pkgdatadir)/LICENSE
|
||||
ifeq ($(INSTALL_ICU_CONFIG),true)
|
||||
$(INSTALL_SCRIPT) $(top_builddir)/config/icu-config $(DESTDIR)$(bindir)/icu-config
|
||||
endif
|
||||
$(INSTALL_DATA) $(top_builddir)/config/Makefile.inc $(DESTDIR)$(pkglibdir)/Makefile.inc
|
||||
$(INSTALL_DATA) $(top_builddir)/config/pkgdata.inc $(DESTDIR)$(pkglibdir)/pkgdata.inc
|
||||
# @echo icuinfo.xml is built after make check.
|
||||
# -$(INSTALL_DATA) $(top_builddir)/config/icuinfo.xml $(DESTDIR)$(pkglibdir)/icuinfo.xml
|
||||
cd $(DESTDIR)$(pkglibdir)/..; \
|
||||
$(RM) current && ln -s $(VERSION) current; \
|
||||
$(RMV) current && ln -s $(VERSION) current; \
|
||||
$(RM) Makefile.inc && ln -s current/Makefile.inc Makefile.inc; \
|
||||
$(RM) pkgdata.inc && ln -s current/pkgdata.inc pkgdata.inc
|
||||
|
||||
|
@ -354,7 +360,9 @@ config.status: $(srcdir)/configure $(srcdir)/common/unicode/uvernum.h
|
|||
|
||||
install-manx: $(MANX_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
ifneq ($(MANX_FILES),)
|
||||
$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
endif
|
||||
|
||||
config/%.$(SECTION): $(srcdir)/config/%.$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2018 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set configurations that are common to *all* ICU library code (common, i18n, and io). -->
|
||||
<!-- Note: These options are for *all* configurations for *all* library projects. -->
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<!-- ICU does not use exceptions in library code. -->
|
||||
<PreprocessorDefinitions>
|
||||
_HAS_EXCEPTIONS=0;
|
||||
%(PreprocessorDefinitions)
|
||||
</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<PropertyGroup>
|
||||
<!-- Disable MSBuild warning about Linker OutputFile. -->
|
||||
<!-- Ex: MSBuild complains that the common project creates "icuuc62.dll" rather than "common.dll". However, this is intentional. -->
|
||||
<MSBuildWarningsAsMessages>MSB8012</MSBuildWarningsAsMessages>
|
||||
</PropertyGroup>
|
||||
</Project>
|
|
@ -1,129 +1,129 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set default configuration options for all non-UWP Visual Studio projects. -->
|
||||
<!-- These are the default project configurations for building. -->
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the version of the MSVC tool-set to use. -->
|
||||
<!-- v140 is the Visual Studio 2015 toolset. -->
|
||||
<!-- v141 is the Visual Studio 2017 toolset. -->
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the default SDK target. -->
|
||||
<!-- Note that the Windows 8.1 SDK is backwards compatible down-level to Windows 7, so
|
||||
setting this to 8.1 does not actually imply targeting Windows 8.1. -->
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- We need to explicitly set the target version to Windows 7. -->
|
||||
<Win32_WinNTVersion>0x0601</Win32_WinNTVersion>
|
||||
</PropertyGroup>
|
||||
<!-- Options that are common to *all* configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup>
|
||||
<Midl>
|
||||
<MkTypLibCompatible>true</MkTypLibCompatible>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<!-- Note: These preprocessor defines are for *all* configurations for *all* projects. -->
|
||||
<!-- Note: See ticket #5750 for the macro '_CRT_SECURE_NO_DEPRECATE'. -->
|
||||
<PreprocessorDefinitions>
|
||||
WINVER=$(Win32_WinNTVersion);
|
||||
_WIN32_WINNT=$(Win32_WinNTVersion);
|
||||
_CRT_SECURE_NO_DEPRECATE;
|
||||
%(PreprocessorDefinitions)
|
||||
</PreprocessorDefinitions>
|
||||
<!-- We always want to treat wchar_t as a "real" C++ type, instead of a typedef. -->
|
||||
<TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<!-- Set the source encoding and runtime encoding to UTF-8 by default. -->
|
||||
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
|
||||
<!-- Enable parallel compilation for faster builds. -->
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 'Release' configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 'Debug' configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<BufferSecurityCheck>true</BufferSecurityCheck>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 32-bit configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
||||
<Midl>
|
||||
<TargetEnvironment>Win32</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 64-bit configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>WIN64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set default configuration options for all non-UWP Visual Studio projects. -->
|
||||
<!-- These are the default project configurations for building. -->
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the version of the MSVC tool-set to use. -->
|
||||
<!-- v140 is the Visual Studio 2015 toolset. -->
|
||||
<!-- v141 is the Visual Studio 2017 toolset. -->
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the default SDK target. -->
|
||||
<!-- Note that the Windows 8.1 SDK is backwards compatible down-level to Windows 7, so
|
||||
setting this to 8.1 does not actually imply targeting Windows 8.1. -->
|
||||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- We need to explicitly set the target version to Windows 7. -->
|
||||
<Win32_WinNTVersion>0x0601</Win32_WinNTVersion>
|
||||
</PropertyGroup>
|
||||
<!-- Options that are common to *all* configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup>
|
||||
<Midl>
|
||||
<MkTypLibCompatible>true</MkTypLibCompatible>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<!-- Note: These preprocessor defines are for *all* configurations for *all* projects. -->
|
||||
<!-- Note: See ticket #5750 for the macro '_CRT_SECURE_NO_DEPRECATE'. -->
|
||||
<PreprocessorDefinitions>
|
||||
WINVER=$(Win32_WinNTVersion);
|
||||
_WIN32_WINNT=$(Win32_WinNTVersion);
|
||||
_CRT_SECURE_NO_DEPRECATE;
|
||||
%(PreprocessorDefinitions)
|
||||
</PreprocessorDefinitions>
|
||||
<!-- We always want to treat wchar_t as a "real" C++ type, instead of a typedef. -->
|
||||
<TreatWChar_tAsBuiltInType>true</TreatWChar_tAsBuiltInType>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
<!-- Set the source encoding and runtime encoding to UTF-8 by default. -->
|
||||
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
|
||||
<!-- Enable parallel compilation for faster builds. -->
|
||||
<MultiProcessorCompilation>true</MultiProcessorCompilation>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<Culture>0x0409</Culture>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 'Release' configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<StringPooling>true</StringPooling>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 'Debug' configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<BufferSecurityCheck>true</BufferSecurityCheck>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 32-bit configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
||||
<Midl>
|
||||
<TargetEnvironment>Win32</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<!-- Options that are common to all 64-bit configurations for *all* projects. -->
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
||||
<Midl>
|
||||
<TargetEnvironment>X64</TargetEnvironment>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>WIN64;WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<TargetMachine>MachineX64</TargetMachine>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
</Project>
|
|
@ -1,41 +1,41 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set common configuration options for all *_uwp projects. -->
|
||||
<PropertyGroup>
|
||||
<!-- If not already set, use this version of the Win10 SDK -->
|
||||
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
|
||||
<!-- If not already set, set the minimum Win10 SDK version to TH1/RTM -->
|
||||
<WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
|
||||
|
||||
<MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
|
||||
<AppContainerApplication>true</AppContainerApplication>
|
||||
<ApplicationType>Windows Store</ApplicationType>
|
||||
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the version of the MSVC tool-set to use. -->
|
||||
<!-- v141 is the Visual Studio 2017 toolset. -->
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions)
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions);
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions)
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- This file is used to set common configuration options for all *_uwp projects. -->
|
||||
<PropertyGroup>
|
||||
<!-- If not already set, use this version of the Win10 SDK -->
|
||||
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
|
||||
<!-- If not already set, set the minimum Win10 SDK version to TH1/RTM -->
|
||||
<WindowsTargetPlatformMinVersion>10.0.10240.0</WindowsTargetPlatformMinVersion>
|
||||
|
||||
<MinimumVisualStudioVersion>14.0</MinimumVisualStudioVersion>
|
||||
<AppContainerApplication>true</AppContainerApplication>
|
||||
<ApplicationType>Windows Store</ApplicationType>
|
||||
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<!-- This is the version of the MSVC tool-set to use. -->
|
||||
<!-- v141 is the Visual Studio 2017 toolset. -->
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<Midl>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions)
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</Midl>
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions);
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<ResourceCompile>
|
||||
<PreprocessorDefinitions>
|
||||
%(PreprocessorDefinitions)
|
||||
U_PLATFORM_HAS_WINUWP_API=1;
|
||||
</PreprocessorDefinitions>
|
||||
</ResourceCompile>
|
||||
</ItemDefinitionGroup>
|
||||
</Project>
|
|
@ -1,27 +1,27 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<!--
|
||||
This file is used to copy all of the header files (*.h) from a project's "unicode" folder to a common output folder.
|
||||
-->
|
||||
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<!-- This is the location of the common output folder. -->
|
||||
<CopyDestionationPath>$(SolutionDir)\..\..\include\unicode</CopyDestionationPath>
|
||||
<BuildDependsOn>
|
||||
$(BuildDependsOn);
|
||||
CopyUnicodeHeaderFiles;
|
||||
</BuildDependsOn>
|
||||
</PropertyGroup>
|
||||
<Target Name="CopyUnicodeHeaderFiles">
|
||||
<ItemGroup>
|
||||
<!-- Generate a list of all files that end in .h from the 'unicode' folder, relative to the current project. -->
|
||||
<OutputFiles Include=".\unicode\**\*.h" />
|
||||
</ItemGroup>
|
||||
<!-- This message will be logged in the project's build output. -->
|
||||
<Message Text="Copying @(OutputFiles->Count()) header files to $(CopyDestionationPath). Files copied: @(OutputFiles)" Importance="high"/>
|
||||
<!-- Perform the copy. -->
|
||||
<Copy SourceFiles="@(OutputFiles)"
|
||||
DestinationFolder="$(CopyDestionationPath)\%(RecursiveDir)"
|
||||
SkipUnchangedFiles="false"></Copy>
|
||||
</Target>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
|
||||
<!--
|
||||
This file is used to copy all of the header files (*.h) from a project's "unicode" folder to a common output folder.
|
||||
-->
|
||||
<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<!-- This is the location of the common output folder. -->
|
||||
<CopyDestionationPath>$(SolutionDir)\..\..\include\unicode</CopyDestionationPath>
|
||||
<BuildDependsOn>
|
||||
$(BuildDependsOn);
|
||||
CopyUnicodeHeaderFiles;
|
||||
</BuildDependsOn>
|
||||
</PropertyGroup>
|
||||
<Target Name="CopyUnicodeHeaderFiles">
|
||||
<ItemGroup>
|
||||
<!-- Generate a list of all files that end in .h from the 'unicode' folder, relative to the current project. -->
|
||||
<OutputFiles Include=".\unicode\**\*.h" />
|
||||
</ItemGroup>
|
||||
<!-- This message will be logged in the project's build output. -->
|
||||
<Message Text="Copying @(OutputFiles->Count()) header files to $(CopyDestionationPath). Files copied: @(OutputFiles)" Importance="high"/>
|
||||
<!-- Perform the copy. -->
|
||||
<Copy SourceFiles="@(OutputFiles)"
|
||||
DestinationFolder="$(CopyDestionationPath)\%(RecursiveDir)"
|
||||
SkipUnchangedFiles="false"></Copy>
|
||||
</Target>
|
||||
</Project>
|
|
@ -52,7 +52,7 @@ set ICUFAILCNT=0
|
|||
@echo ==== %THT% =========================================================================
|
||||
%ICUINFO_CMD% %ICUINFO_OPTS%
|
||||
|
||||
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
|
||||
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
|
||||
@set ICUFAILED=%ICUFAILED% %THT%
|
||||
@set ICUFAILCNT=1
|
||||
:OK_icuinfo
|
||||
|
@ -63,7 +63,7 @@ set ICUFAILCNT=0
|
|||
@cd %ICU_ICUDIR%\source\test\intltest
|
||||
%INTLTEST_CMD% %INTLTEST_OPTS%
|
||||
|
||||
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
|
||||
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
|
||||
@set ICUFAILED=%ICUFAILED% %THT%
|
||||
@set ICUFAILCNT=1
|
||||
:OK_intltest
|
||||
|
@ -74,7 +74,7 @@ set ICUFAILCNT=0
|
|||
@cd %ICU_ICUDIR%\source\test\iotest
|
||||
%IOTEST_CMD% %IOTEST_OPTS%
|
||||
|
||||
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
|
||||
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
|
||||
@set ICUFAILED=%ICUFAILED% %THT%
|
||||
@set ICUFAILCNT=1
|
||||
:OK_IOTEST
|
||||
|
@ -85,7 +85,7 @@ set ICUFAILCNT=0
|
|||
@cd %ICU_ICUDIR%\source\test\cintltst
|
||||
%CINTLTST_CMD% %CINTLTST_OPTS%
|
||||
|
||||
@IF NOT ERRORLEVEL 1 GOTO OK_%THT%
|
||||
@IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
|
||||
@set ICUFAILED=%ICUFAILED% %THT%
|
||||
@set ICUFAILCNT=1
|
||||
:OK_cintltst
|
||||
|
@ -97,7 +97,7 @@ set ICUFAILCNT=0
|
|||
@REM @cd %ICU_ICUDIR%\source\test\letest
|
||||
@REM %LETST_CMD% %LETEST_OPTS%
|
||||
|
||||
@REM @IF NOT ERRORLEVEL 1 GOTO OK_%THT%
|
||||
@REM @IF %ERRORLEVEL% EQU 0 GOTO OK_%THT%
|
||||
@REM @set ICUFAILED=%ICUFAILED% %THT%
|
||||
@REM @set ICUFAILCNT=1
|
||||
@REM :OK_letest
|
||||
|
|
|
@ -81,7 +81,7 @@ LIBS = $(LIBICUDT) $(DEFAULT_LIBS)
|
|||
|
||||
OBJECTS = errorcode.o putil.o umath.o utypes.o uinvchar.o umutex.o ucln_cmn.o \
|
||||
uinit.o uobject.o cmemory.o charstr.o cstr.o \
|
||||
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o ucol_swp.o utrace.o \
|
||||
udata.o ucmndata.o udatamem.o umapfile.o udataswp.o utrie_swap.o ucol_swp.o utrace.o \
|
||||
uhash.o uhash_us.o uenum.o ustrenum.o uvector.o ustack.o uvectr32.o uvectr64.o \
|
||||
ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
|
||||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
|
@ -100,15 +100,17 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.
|
|||
unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o loadednormalizer2impl.o \
|
||||
chariter.o schriter.o uchriter.o uiter.o \
|
||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o characterproperties.o \
|
||||
ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o uscript_props.o usc_impl.o unames.o \
|
||||
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
utrie.o utrie2.o utrie2_builder.o ucptrie.o umutablecptrie.o \
|
||||
bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o filteredbrk.o \
|
||||
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o rbbi_cache.o \
|
||||
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
|
||||
uidna.o usprep.o uts46.o punycode.o \
|
||||
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
|
||||
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ulistformatter.o \
|
||||
ulist.o uloc_tag.o icudataver.o icuplug.o \
|
||||
sharedobject.o simpleformatter.o unifiedcache.o uloc_keytype.o \
|
||||
ubiditransform.o \
|
||||
pluralmap.o \
|
||||
|
|
|
@ -241,13 +241,13 @@ void BMPSet::overrideIllegal() {
|
|||
bmpBlockBits[i]|=bits;
|
||||
}
|
||||
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
|
||||
bits=1<<0xd;
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits;
|
||||
}
|
||||
} else {
|
||||
mask=~(0x10001<<0xd); // Lead byte 0xED.
|
||||
mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED.
|
||||
for(i=32; i<64; ++i) { // Second half of 4k block.
|
||||
bmpBlockBits[i]&=mask;
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
@ -120,4 +121,41 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
|
||||
}
|
||||
|
||||
CharStringByteSink::~CharStringByteSink() = default;
|
||||
|
||||
void
|
||||
CharStringByteSink::Append(const char* bytes, int32_t n) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
dest_.append(bytes, n, status);
|
||||
// Any errors are silently ignored.
|
||||
}
|
||||
|
||||
char*
|
||||
CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch,
|
||||
int32_t scratch_capacity,
|
||||
int32_t* result_capacity) {
|
||||
if (min_capacity < 1 || scratch_capacity < min_capacity) {
|
||||
*result_capacity = 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
char* result = dest_.getAppendBuffer(
|
||||
min_capacity,
|
||||
desired_capacity_hint,
|
||||
*result_capacity,
|
||||
status);
|
||||
if (U_SUCCESS(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
*result_capacity = scratch_capacity;
|
||||
return scratch;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
class CharString;
|
||||
class Edits;
|
||||
|
||||
class U_COMMON_API ByteSinkUtil {
|
||||
|
@ -58,4 +59,25 @@ private:
|
|||
ByteSink &sink, uint32_t options, Edits *edits);
|
||||
};
|
||||
|
||||
class CharStringByteSink : public ByteSink {
|
||||
public:
|
||||
CharStringByteSink(CharString* dest);
|
||||
~CharStringByteSink() override;
|
||||
|
||||
CharStringByteSink() = delete;
|
||||
CharStringByteSink(const CharStringByteSink&) = delete;
|
||||
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
|
||||
|
||||
void Append(const char* bytes, int32_t n) override;
|
||||
|
||||
char* GetAppendBuffer(int32_t min_capacity,
|
||||
int32_t desired_capacity_hint,
|
||||
char* scratch,
|
||||
int32_t scratch_capacity,
|
||||
int32_t* result_capacity) override;
|
||||
|
||||
private:
|
||||
CharString& dest_;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -339,7 +339,8 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
|
|||
|
||||
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
|
||||
: LinearMatchNode(len, nextNode), s(bytes) {
|
||||
hash=hash*37+ustr_hashCharsN(bytes, len);
|
||||
hash=static_cast<int32_t>(
|
||||
static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
|
||||
}
|
||||
|
||||
UBool
|
||||
|
|
|
@ -0,0 +1,336 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// characterproperties.cpp
|
||||
// created: 2018sep03 Markus W. Scherer
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucpmap.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/umutablecptrie.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "cmemory.h"
|
||||
#include "mutex.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uassert.h"
|
||||
#include "ubidi_props.h"
|
||||
#include "ucase.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "umutex.h"
|
||||
#include "uprops.h"
|
||||
|
||||
using icu::UInitOnce;
|
||||
using icu::UnicodeSet;
|
||||
|
||||
namespace {
|
||||
|
||||
UBool U_CALLCONV characterproperties_cleanup();
|
||||
|
||||
struct Inclusion {
|
||||
UnicodeSet *fSet;
|
||||
UInitOnce fInitOnce;
|
||||
};
|
||||
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
|
||||
|
||||
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
|
||||
|
||||
UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
|
||||
|
||||
UMutex cpMutex = U_MUTEX_INITIALIZER;
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Inclusions list
|
||||
//----------------------------------------------------------------
|
||||
|
||||
// USetAdder implementation
|
||||
// Does not use uset.h to reduce code dependencies
|
||||
void U_CALLCONV
|
||||
_set_add(USet *set, UChar32 c) {
|
||||
((UnicodeSet *)set)->add(c);
|
||||
}
|
||||
|
||||
void U_CALLCONV
|
||||
_set_addRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet *)set)->add(start, end);
|
||||
}
|
||||
|
||||
void U_CALLCONV
|
||||
_set_addString(USet *set, const UChar *str, int32_t length) {
|
||||
((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
|
||||
}
|
||||
|
||||
UBool U_CALLCONV characterproperties_cleanup() {
|
||||
for (Inclusion &in: gInclusions) {
|
||||
delete in.fSet;
|
||||
in.fSet = nullptr;
|
||||
in.fInitOnce.reset();
|
||||
}
|
||||
for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
|
||||
delete sets[i];
|
||||
sets[i] = nullptr;
|
||||
}
|
||||
for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
|
||||
ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
|
||||
maps[i] = nullptr;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
Reduce excessive reallocation, and make it easier to detect initialization problems.
|
||||
Usually you don't see smaller sets than this for Unicode 5.0.
|
||||
*/
|
||||
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;
|
||||
|
||||
void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
||||
// This function is invoked only via umtx_initOnce().
|
||||
// This function is a friend of class UnicodeSet.
|
||||
|
||||
U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
|
||||
if (src == UPROPS_SRC_NONE) {
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
UnicodeSet * &incl = gInclusions[src].fSet;
|
||||
U_ASSERT(incl == nullptr);
|
||||
|
||||
incl = new UnicodeSet();
|
||||
if (incl == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
USetAdder sa = {
|
||||
(USet *)incl,
|
||||
_set_add,
|
||||
_set_addRange,
|
||||
_set_addString,
|
||||
nullptr, // don't need remove()
|
||||
nullptr // don't need removeRange()
|
||||
};
|
||||
|
||||
incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
|
||||
switch(src) {
|
||||
case UPROPS_SRC_CHAR:
|
||||
uchar_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_PROPSVEC:
|
||||
upropsvec_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_CHAR_AND_PROPSVEC:
|
||||
uchar_addPropertyStarts(&sa, &errorCode);
|
||||
upropsvec_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UPROPS_SRC_CASE_AND_NORM: {
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
impl->addPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
ucase_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_NFC: {
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
impl->addPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_NFKC: {
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
impl->addPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_NFKC_CF: {
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
impl->addPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_NFC_CANON_ITER: {
|
||||
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
impl->addCanonIterPropertyStarts(&sa, errorCode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case UPROPS_SRC_CASE:
|
||||
ucase_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_BIDI:
|
||||
ubidi_addPropertyStarts(&sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_INPC:
|
||||
case UPROPS_SRC_INSC:
|
||||
case UPROPS_SRC_VO:
|
||||
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
|
||||
break;
|
||||
default:
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (U_FAILURE(errorCode)) {
|
||||
delete incl;
|
||||
incl = nullptr;
|
||||
return;
|
||||
}
|
||||
// Compact for caching
|
||||
incl->compact();
|
||||
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
|
||||
}
|
||||
|
||||
const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
if (src < 0 || UPROPS_SRC_COUNT <= src) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
Inclusion &i = gInclusions[src];
|
||||
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
|
||||
return i.fSet;
|
||||
}
|
||||
|
||||
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
|
||||
UProperty prop, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
UPropertySource src = uprops_getSource(prop);
|
||||
return getInclusionsForSource(src, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
namespace {
|
||||
|
||||
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
|
||||
if (set.isNull()) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
const UnicodeSet *inclusions =
|
||||
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
int32_t numRanges = inclusions->getRangeCount();
|
||||
UChar32 startHasProperty = -1;
|
||||
|
||||
for (int32_t i = 0; i < numRanges; ++i) {
|
||||
UChar32 rangeEnd = inclusions->getRangeEnd(i);
|
||||
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
|
||||
// TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
|
||||
if (u_hasBinaryProperty(c, property)) {
|
||||
if (startHasProperty < 0) {
|
||||
// Transition from false to true.
|
||||
startHasProperty = c;
|
||||
}
|
||||
} else if (startHasProperty >= 0) {
|
||||
// Transition from true to false.
|
||||
set->add(startHasProperty, c - 1);
|
||||
startHasProperty = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (startHasProperty >= 0) {
|
||||
set->add(startHasProperty, 0x10FFFF);
|
||||
}
|
||||
set->freeze();
|
||||
return set.orphan();
|
||||
}
|
||||
|
||||
UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
|
||||
icu::LocalUMutableCPTriePointer mutableTrie(
|
||||
umutablecptrie_open(nullValue, nullValue, &errorCode));
|
||||
const UnicodeSet *inclusions =
|
||||
icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return nullptr; }
|
||||
int32_t numRanges = inclusions->getRangeCount();
|
||||
UChar32 start = 0;
|
||||
uint32_t value = nullValue;
|
||||
|
||||
for (int32_t i = 0; i < numRanges; ++i) {
|
||||
UChar32 rangeEnd = inclusions->getRangeEnd(i);
|
||||
for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
|
||||
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
|
||||
uint32_t nextValue = u_getIntPropertyValue(c, property);
|
||||
if (value != nextValue) {
|
||||
if (value != nullValue) {
|
||||
umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
|
||||
}
|
||||
start = c;
|
||||
value = nextValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (value != 0) {
|
||||
umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
|
||||
}
|
||||
|
||||
UCPTrieType type;
|
||||
if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
|
||||
type = UCPTRIE_TYPE_FAST;
|
||||
} else {
|
||||
type = UCPTRIE_TYPE_SMALL;
|
||||
}
|
||||
UCPTrieValueWidth valueWidth;
|
||||
// TODO: UCharacterProperty.IntProperty
|
||||
int32_t max = u_getIntPropertyMaxValue(property);
|
||||
if (max <= 0xff) {
|
||||
valueWidth = UCPTRIE_VALUE_BITS_8;
|
||||
} else if (max <= 0xffff) {
|
||||
valueWidth = UCPTRIE_VALUE_BITS_16;
|
||||
} else {
|
||||
valueWidth = UCPTRIE_VALUE_BITS_32;
|
||||
}
|
||||
return reinterpret_cast<UCPMap *>(
|
||||
umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI const USet * U_EXPORT2
|
||||
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||
if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
Mutex m(&cpMutex);
|
||||
UnicodeSet *set = sets[property];
|
||||
if (set == nullptr) {
|
||||
sets[property] = set = makeSet(property, *pErrorCode);
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||
return set->toUSet();
|
||||
}
|
||||
|
||||
U_CAPI const UCPMap * U_EXPORT2
|
||||
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) { return nullptr; }
|
||||
if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
Mutex m(&cpMutex);
|
||||
UCPMap *map = maps[property - UCHAR_INT_START];
|
||||
if (map == nullptr) {
|
||||
maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
|
||||
}
|
||||
return map;
|
||||
}
|
|
@ -79,7 +79,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
|
|||
return *this;
|
||||
}
|
||||
if(sLength<0) {
|
||||
sLength=uprv_strlen(s);
|
||||
sLength= static_cast<int32_t>(uprv_strlen(s));
|
||||
}
|
||||
if(sLength>0) {
|
||||
if(s==(buffer.getAlias()+len)) {
|
||||
|
@ -126,15 +126,21 @@ char *CharString::getAppendBuffer(int32_t minCapacity,
|
|||
}
|
||||
|
||||
CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) {
|
||||
return appendInvariantChars(s.getBuffer(), s.length(), errorCode);
|
||||
}
|
||||
|
||||
CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return *this;
|
||||
}
|
||||
if (!uprv_isInvariantUnicodeString(s)) {
|
||||
if (!uprv_isInvariantUString(uchars, ucharsLen)) {
|
||||
errorCode = U_INVARIANT_CONVERSION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
if(ensureCapacity(len+s.length()+1, 0, errorCode)) {
|
||||
len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV);
|
||||
if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) {
|
||||
u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen);
|
||||
len += ucharsLen;
|
||||
buffer[len] = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -123,6 +123,7 @@ public:
|
|||
UErrorCode &errorCode);
|
||||
|
||||
CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
|
||||
CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode);
|
||||
|
||||
/**
|
||||
* Appends a filename/path part, e.g., a directory name.
|
||||
|
|
|
@ -172,7 +172,7 @@ public:
|
|||
* @return *this
|
||||
*/
|
||||
LocalMemory<T> &moveFrom(LocalMemory<T> &src) U_NOEXCEPT {
|
||||
delete[] LocalPointerBase<T>::ptr;
|
||||
uprv_free(LocalPointerBase<T>::ptr);
|
||||
LocalPointerBase<T>::ptr=src.ptr;
|
||||
src.ptr=NULL;
|
||||
return *this;
|
||||
|
@ -279,6 +279,10 @@ inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t le
|
|||
*
|
||||
* Unlike LocalMemory and LocalArray, this class never adopts
|
||||
* (takes ownership of) another array.
|
||||
*
|
||||
* WARNING: MaybeStackArray only works with primitive (plain-old data) types.
|
||||
* It does NOT know how to call a destructor! If you work with classes with
|
||||
* destructors, consider LocalArray in localpointer.h.
|
||||
*/
|
||||
template<typename T, int32_t stackCapacity>
|
||||
class MaybeStackArray {
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- The following import will include the 'default' configuration options for VS projects. -->
|
||||
<Import Project="..\allinone\Build.Windows.ProjectConfiguration.props" />
|
||||
|
||||
<!-- The following import will include the library configuration options for VS projects. -->
|
||||
<Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{73C0A65B-D1F2-4DE1-B3A6-15DAD2C23F3D}</ProjectGuid>
|
||||
</PropertyGroup>
|
||||
|
@ -85,7 +86,7 @@
|
|||
<ProgramDataBaseFileName>.\x86\Release/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin\icuuc62.dll</OutputFile>
|
||||
<OutputFile>..\..\bin\icuuc63.dll</OutputFile>
|
||||
<AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<ProgramDatabaseFile>.\..\..\lib\icuuc.pdb</ProgramDatabaseFile>
|
||||
<DataExecutionPrevention>
|
||||
|
@ -105,7 +106,7 @@
|
|||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin\icuuc62d.dll</OutputFile>
|
||||
<OutputFile>..\..\bin\icuuc63d.dll</OutputFile>
|
||||
<AdditionalLibraryDirectories>.\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<ProgramDatabaseFile>.\..\..\lib\icuucd.pdb</ProgramDatabaseFile>
|
||||
<DataExecutionPrevention>
|
||||
|
@ -124,7 +125,7 @@
|
|||
<ProgramDataBaseFileName>.\x64\Release/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin64\icuuc62.dll</OutputFile>
|
||||
<OutputFile>..\..\bin64\icuuc63.dll</OutputFile>
|
||||
<AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<ProgramDatabaseFile>.\..\..\lib64\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib64\icuuc.lib</ImportLibrary>
|
||||
|
@ -142,7 +143,7 @@
|
|||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin64\icuuc62d.dll</OutputFile>
|
||||
<OutputFile>..\..\bin64\icuuc63d.dll</OutputFile>
|
||||
<AdditionalLibraryDirectories>.\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<ProgramDatabaseFile>.\..\..\lib64\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib64\icuucd.lib</ImportLibrary>
|
||||
|
@ -182,6 +183,7 @@
|
|||
<ClCompile Include="ustack.cpp" />
|
||||
<ClCompile Include="ustrenum.cpp" />
|
||||
<ClCompile Include="utrie.cpp" />
|
||||
<ClCompile Include="utrie_swap.cpp" />
|
||||
<ClCompile Include="utrie2.cpp" />
|
||||
<ClCompile Include="utrie2_builder.cpp" />
|
||||
<ClCompile Include="uvector.cpp" />
|
||||
|
@ -268,6 +270,7 @@
|
|||
<ClCompile Include="ruleiter.cpp" />
|
||||
<ClCompile Include="ucase.cpp" />
|
||||
<ClCompile Include="uchar.cpp" />
|
||||
<ClCompile Include="characterproperties.cpp" />
|
||||
<ClCompile Include="unames.cpp" />
|
||||
<ClCompile Include="unifiedcache.cpp" />
|
||||
<ClCompile Include="unifilt.cpp" />
|
||||
|
@ -315,8 +318,10 @@
|
|||
<ClCompile Include="ucharstriebuilder.cpp" />
|
||||
<ClCompile Include="ucharstrieiterator.cpp" />
|
||||
<ClCompile Include="uchriter.cpp" />
|
||||
<ClCompile Include="ucptrie.cpp" />
|
||||
<ClCompile Include="uinvchar.cpp" />
|
||||
<ClCompile Include="uiter.cpp" />
|
||||
<ClCompile Include="umutablecptrie.cpp" />
|
||||
<ClCompile Include="unistr.cpp" />
|
||||
<ClCompile Include="unistr_case.cpp" />
|
||||
<ClCompile Include="unistr_case_locale.cpp" />
|
||||
|
@ -332,8 +337,6 @@
|
|||
<ClCompile Include="ustrtrns.cpp" />
|
||||
<ClCompile Include="utext.cpp" />
|
||||
<ClCompile Include="utf_impl.cpp" />
|
||||
<ClCompile Include="listformatter.cpp" />
|
||||
<ClCompile Include="ulistformatter.cpp" />
|
||||
<ClCompile Include="static_unicode_sets.cpp" />
|
||||
<ClInclude Include="localsvc.h" />
|
||||
<ClInclude Include="msvcres.h" />
|
||||
|
|
|
@ -139,6 +139,9 @@
|
|||
<ClCompile Include="utrie.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utrie_swap.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="utrie2.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
|
@ -385,6 +388,9 @@
|
|||
<ClCompile Include="bmpset.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="characterproperties.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="propname.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
|
@ -562,12 +568,6 @@
|
|||
<ClCompile Include="bytestriebuilder.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="listformatter.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="ulistformatter.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="messagepattern.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
|
@ -589,6 +589,12 @@
|
|||
<ClCompile Include="ucharstrieiterator.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="ucptrie.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="umutablecptrie.cpp">
|
||||
<Filter>collections</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="patternprops.cpp">
|
||||
<Filter>properties & sets</Filter>
|
||||
</ClCompile>
|
||||
|
@ -1186,12 +1192,6 @@
|
|||
<CustomBuild Include="unicode\messagepattern.h">
|
||||
<Filter>formatting</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\listformatter.h">
|
||||
<Filter>formatting</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\ulistformatter.h">
|
||||
<Filter>formatting</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\appendable.h">
|
||||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
|
@ -1204,6 +1204,12 @@
|
|||
<CustomBuild Include="unicode\ucharstriebuilder.h">
|
||||
<Filter>collections</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\ucptrie.h">
|
||||
<Filter>collections</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\umutablecptrie.h">
|
||||
<Filter>collections</Filter>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\enumset.h">
|
||||
<Filter>data & memory</Filter>
|
||||
</CustomBuild>
|
||||
|
@ -1217,4 +1223,4 @@
|
|||
<Filter>strings</Filter>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<!-- The following import will include the UWP configuration options for VS projects. -->
|
||||
<Import Project="..\allinone\Build.Windows.UWP.ProjectConfiguration.props" />
|
||||
<!-- The following import will include the library configuration options for VS projects. -->
|
||||
<Import Project="..\allinone\Build.Windows.Library.ProjectConfiguration.props" />
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
|
@ -184,7 +187,7 @@
|
|||
<ProgramDataBaseFileName>.\x86\ReleaseUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin32uwp\icuuc62.dll</OutputFile>
|
||||
<OutputFile>..\..\bin32uwp\icuuc63.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\lib32uwp\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib32uwp\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -200,7 +203,7 @@
|
|||
<ProgramDataBaseFileName>.\x86\DebugUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin32uwp\icuuc62d.dll</OutputFile>
|
||||
<OutputFile>..\..\bin32uwp\icuuc63d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\lib32uwp\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib32uwp\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -216,7 +219,7 @@
|
|||
<ProgramDataBaseFileName>.\x64\ReleaseUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin64uwp\icuuc62.dll</OutputFile>
|
||||
<OutputFile>..\..\bin64uwp\icuuc63.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\lib64uwp\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib64uwp\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -232,7 +235,7 @@
|
|||
<ProgramDataBaseFileName>.\x64\DebugUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\bin64uwp\icuuc62d.dll</OutputFile>
|
||||
<OutputFile>..\..\bin64uwp\icuuc63d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\lib64uwp\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\lib64uwp\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -248,7 +251,7 @@
|
|||
<ProgramDataBaseFileName>.\ARM\ReleaseUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\binARMuwp\icuuc62.dll</OutputFile>
|
||||
<OutputFile>..\..\binARMuwp\icuuc63.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\libARMuwp\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\libARMuwp\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -264,7 +267,7 @@
|
|||
<ProgramDataBaseFileName>.\ARM\DebugUWP/</ProgramDataBaseFileName>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\binARMuwp\icuuc62d.dll</OutputFile>
|
||||
<OutputFile>..\..\binARMuwp\icuuc63d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\libARMuwp\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\libARMuwp\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -304,6 +307,7 @@
|
|||
<ClCompile Include="ustack.cpp" />
|
||||
<ClCompile Include="ustrenum.cpp" />
|
||||
<ClCompile Include="utrie.cpp" />
|
||||
<ClCompile Include="utrie_swap.cpp" />
|
||||
<ClCompile Include="utrie2.cpp" />
|
||||
<ClCompile Include="utrie2_builder.cpp" />
|
||||
<ClCompile Include="uvector.cpp" />
|
||||
|
@ -319,9 +323,7 @@
|
|||
<ClCompile Include="umutex.cpp" />
|
||||
<ClCompile Include="utrace.cpp" />
|
||||
<ClCompile Include="utypes.cpp" />
|
||||
<ClCompile Include="wintz.cpp">
|
||||
<ExcludedFromBuild>true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="wintz.cpp" />
|
||||
<ClCompile Include="ucnv.cpp" />
|
||||
<ClCompile Include="ucnv2022.cpp" />
|
||||
<ClCompile Include="ucnv_bld.cpp" />
|
||||
|
@ -394,6 +396,7 @@
|
|||
<ClCompile Include="ruleiter.cpp" />
|
||||
<ClCompile Include="ucase.cpp" />
|
||||
<ClCompile Include="uchar.cpp" />
|
||||
<ClCompile Include="characterproperties.cpp" />
|
||||
<ClCompile Include="unames.cpp" />
|
||||
<ClCompile Include="unifiedcache.cpp" />
|
||||
<ClCompile Include="unifilt.cpp" />
|
||||
|
@ -439,9 +442,11 @@
|
|||
<ClCompile Include="ucharstrie.cpp" />
|
||||
<ClCompile Include="ucharstriebuilder.cpp" />
|
||||
<ClCompile Include="ucharstrieiterator.cpp" />
|
||||
<ClCompile Include="ucptrie.cpp" />
|
||||
<ClCompile Include="uchriter.cpp" />
|
||||
<ClCompile Include="uinvchar.cpp" />
|
||||
<ClCompile Include="uiter.cpp" />
|
||||
<ClCompile Include="umutablecptrie.cpp" />
|
||||
<ClCompile Include="unistr.cpp" />
|
||||
<ClCompile Include="unistr_case.cpp" />
|
||||
<ClCompile Include="unistr_case_locale.cpp" />
|
||||
|
@ -457,8 +462,6 @@
|
|||
<ClCompile Include="ustrtrns.cpp" />
|
||||
<ClCompile Include="utext.cpp" />
|
||||
<ClCompile Include="utf_impl.cpp" />
|
||||
<ClCompile Include="listformatter.cpp" />
|
||||
<ClCompile Include="ulistformatter.cpp" />
|
||||
<ClCompile Include="static_unicode_sets.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
|
|
@ -325,9 +325,9 @@ foundBest:
|
|||
// two characters after uc were not 0x0E4C THANTHAKHAT before
|
||||
// checking the dictionary. That is just a performance filter,
|
||||
// but it's not clear it's faster than checking the trie.
|
||||
int32_t candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
utext_setNativeIndex(text, current + cuWordLength + chars);
|
||||
if (candidates > 0) {
|
||||
if (num_candidates > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -555,9 +555,9 @@ foundBest:
|
|||
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
|
||||
// Maybe. See if it's in the dictionary.
|
||||
// TODO: this looks iffy; compare with old code.
|
||||
int32_t candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
utext_setNativeIndex(text, current + cuWordLength + chars);
|
||||
if (candidates > 0) {
|
||||
if (num_candidates > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -748,9 +748,9 @@ foundBest:
|
|||
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
|
||||
// Maybe. See if it's in the dictionary.
|
||||
// TODO: this looks iffy; compare with old code.
|
||||
int32_t candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
utext_setNativeIndex(text, current + cuWordLength + chars);
|
||||
if (candidates > 0) {
|
||||
if (num_candidates > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -953,9 +953,9 @@ foundBest:
|
|||
uc = utext_current32(text);
|
||||
if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
|
||||
// Maybe. See if it's in the dictionary.
|
||||
int32_t candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
utext_setNativeIndex(text, current+cuWordLength+chars);
|
||||
if (candidates > 0) {
|
||||
if (num_candidates > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -276,7 +276,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
|
|||
// ab deletions meet bc insertions at the same intermediate-string index.
|
||||
// Some users expect the bc insertions to come first, so we fetch from bc first.
|
||||
if (bc_bLength == 0) {
|
||||
if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
|
||||
if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
|
||||
bc_bLength = bcIter.oldLength();
|
||||
cLength = bcIter.newLength();
|
||||
if (bc_bLength == 0) {
|
||||
|
@ -293,7 +293,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error
|
|||
// else see if the other iterator is done, too.
|
||||
}
|
||||
if (ab_bLength == 0) {
|
||||
if (abHasNext && (abHasNext = abIter.next(errorCode))) {
|
||||
if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
|
||||
aLength = abIter.oldLength();
|
||||
ab_bLength = abIter.newLength();
|
||||
if (ab_bLength == 0) {
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "unicode/udata.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "cstring.h"
|
||||
|
@ -42,12 +43,12 @@ private:
|
|||
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
||||
|
||||
UDataMemory *memory;
|
||||
UTrie2 *ownedTrie;
|
||||
UCPTrie *ownedTrie;
|
||||
};
|
||||
|
||||
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
|
||||
udata_close(memory);
|
||||
utrie2_close(ownedTrie);
|
||||
ucptrie_close(ownedTrie);
|
||||
}
|
||||
|
||||
UBool U_CALLCONV
|
||||
|
@ -62,7 +63,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
|||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6d &&
|
||||
pInfo->dataFormat[3]==0x32 &&
|
||||
pInfo->formatVersion[0]==3
|
||||
pInfo->formatVersion[0]==4
|
||||
) {
|
||||
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
||||
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
||||
|
@ -91,9 +92,9 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod
|
|||
|
||||
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
||||
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
||||
ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
|
||||
inBytes+offset, nextOffset-offset, NULL,
|
||||
&errorCode);
|
||||
ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
|
||||
inBytes+offset, nextOffset-offset, NULL,
|
||||
&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -131,15 +132,26 @@ U_CDECL_BEGIN
|
|||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static UHashtable *cache=NULL;
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
static Norm2AllModes *nfcSingleton;
|
||||
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
|
||||
#endif
|
||||
|
||||
static Norm2AllModes *nfkcSingleton;
|
||||
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
static UHashtable *cache=NULL;
|
||||
|
||||
// UInitOnce singleton initialization function
|
||||
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
if (uprv_strcmp(what, "nfc") == 0) {
|
||||
nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
|
||||
} else
|
||||
#endif
|
||||
if (uprv_strcmp(what, "nfkc") == 0) {
|
||||
nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
|
@ -157,19 +169,36 @@ static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
|||
}
|
||||
|
||||
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
delete nfcSingleton;
|
||||
nfcSingleton = NULL;
|
||||
nfcInitOnce.reset();
|
||||
#endif
|
||||
|
||||
delete nfkcSingleton;
|
||||
nfkcSingleton = NULL;
|
||||
nfkcInitOnce.reset();
|
||||
|
||||
delete nfkc_cfSingleton;
|
||||
nfkc_cfSingleton = NULL;
|
||||
nfkc_cfInitOnce.reset();
|
||||
|
||||
uhash_close(cache);
|
||||
cache=NULL;
|
||||
nfkcInitOnce.reset();
|
||||
nfkc_cfInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
||||
return nfcSingleton;
|
||||
}
|
||||
#endif
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
|
@ -184,6 +213,36 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
|||
return nfkc_cfSingleton;
|
||||
}
|
||||
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->comp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->decomp : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->fcd : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? &allModes->fcc : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
|
@ -247,7 +306,7 @@ Normalizer2::getInstance(const char *packageName,
|
|||
}
|
||||
void *temp=uhash_get(cache, name);
|
||||
if(temp==NULL) {
|
||||
int32_t keyLength=uprv_strlen(name)+1;
|
||||
int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
|
||||
char *nameCopy=(char *)uprv_malloc(keyLength);
|
||||
if(nameCopy==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
|
|
|
@ -45,9 +45,9 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) {
|
|||
}
|
||||
|
||||
va_start(args, buflen);
|
||||
while ((str = va_arg(args, char *))) {
|
||||
while ((str = va_arg(args, char *)) != 0) {
|
||||
char c;
|
||||
while (p != e && (c = *str++)) {
|
||||
while (p != e && (c = *str++) != 0) {
|
||||
*p++ = c;
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
|
|||
: path(NULL), locale(Locale::getRoot())
|
||||
{
|
||||
if (path) {
|
||||
int32_t len = uprv_strlen(path);
|
||||
int32_t len = static_cast<int32_t>(uprv_strlen(path));
|
||||
this->path = (const char*) uprv_malloc(len + 1);
|
||||
if (this->path) {
|
||||
uprv_strcpy((char *)this->path, path);
|
||||
|
@ -560,21 +560,21 @@ LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage,
|
|||
}
|
||||
|
||||
UnicodeString&
|
||||
LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
|
||||
LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
|
||||
UnicodeString& result) const {
|
||||
if (locale.isBogus()) {
|
||||
if (loc.isBogus()) {
|
||||
result.setToBogus();
|
||||
return result;
|
||||
}
|
||||
UnicodeString resultName;
|
||||
|
||||
const char* lang = locale.getLanguage();
|
||||
const char* lang = loc.getLanguage();
|
||||
if (uprv_strlen(lang) == 0) {
|
||||
lang = "root";
|
||||
}
|
||||
const char* script = locale.getScript();
|
||||
const char* country = locale.getCountry();
|
||||
const char* variant = locale.getVariant();
|
||||
const char* script = loc.getScript();
|
||||
const char* country = loc.getCountry();
|
||||
const char* variant = loc.getVariant();
|
||||
|
||||
UBool hasScript = uprv_strlen(script) > 0;
|
||||
UBool hasCountry = uprv_strlen(country) > 0;
|
||||
|
@ -630,14 +630,14 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
|
|||
resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen);
|
||||
resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen);
|
||||
|
||||
LocalPointer<StringEnumeration> e(locale.createKeywords(status));
|
||||
LocalPointer<StringEnumeration> e(loc.createKeywords(status));
|
||||
if (e.isValid() && U_SUCCESS(status)) {
|
||||
UnicodeString temp2;
|
||||
char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
|
||||
const char* key;
|
||||
while ((key = e->next((int32_t *)0, status)) != NULL) {
|
||||
value[0] = 0;
|
||||
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
|
||||
loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
|
||||
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -31,9 +31,12 @@
|
|||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "putilimp.h"
|
||||
#include "mutex.h"
|
||||
|
@ -43,9 +46,11 @@
|
|||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
#include "ulocimp.h"
|
||||
#include "ucln_cmn.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "bytesinkutil.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV locale_cleanup(void);
|
||||
|
@ -424,49 +429,70 @@ Locale::Locale(const Locale &other)
|
|||
*this = other;
|
||||
}
|
||||
|
||||
Locale &Locale::operator=(const Locale &other)
|
||||
{
|
||||
Locale::Locale(Locale&& other) U_NOEXCEPT
|
||||
: UObject(other), fullName(fullNameBuffer), baseName(fullName) {
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
||||
Locale& Locale::operator=(const Locale& other) {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
/* Free our current storage */
|
||||
if (baseName != fullName) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
if(fullName != fullNameBuffer) {
|
||||
uprv_free(fullName);
|
||||
fullName = fullNameBuffer;
|
||||
setToBogus();
|
||||
|
||||
if (other.fullName == other.fullNameBuffer) {
|
||||
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
|
||||
} else if (other.fullName == nullptr) {
|
||||
fullName = nullptr;
|
||||
} else {
|
||||
fullName = uprv_strdup(other.fullName);
|
||||
if (fullName == nullptr) return *this;
|
||||
}
|
||||
|
||||
/* Allocate the full name if necessary */
|
||||
if(other.fullName != other.fullNameBuffer) {
|
||||
fullName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(other.fullName)+1));
|
||||
if (fullName == NULL) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
/* Copy the full name */
|
||||
uprv_strcpy(fullName, other.fullName);
|
||||
|
||||
/* Copy the baseName if it differs from fullName. */
|
||||
if (other.baseName == other.fullName) {
|
||||
baseName = fullName;
|
||||
} else {
|
||||
if (other.baseName) {
|
||||
baseName = uprv_strdup(other.baseName);
|
||||
}
|
||||
} else if (other.baseName != nullptr) {
|
||||
baseName = uprv_strdup(other.baseName);
|
||||
if (baseName == nullptr) return *this;
|
||||
}
|
||||
|
||||
/* Copy the language and country fields */
|
||||
uprv_strcpy(language, other.language);
|
||||
uprv_strcpy(script, other.script);
|
||||
uprv_strcpy(country, other.country);
|
||||
|
||||
/* The variantBegin is an offset, just copy it */
|
||||
variantBegin = other.variantBegin;
|
||||
fIsBogus = other.fIsBogus;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
|
||||
if (baseName != fullName) uprv_free(baseName);
|
||||
if (fullName != fullNameBuffer) uprv_free(fullName);
|
||||
|
||||
if (other.fullName == other.fullNameBuffer) {
|
||||
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
|
||||
fullName = fullNameBuffer;
|
||||
} else {
|
||||
fullName = other.fullName;
|
||||
}
|
||||
|
||||
if (other.baseName == other.fullName) {
|
||||
baseName = fullName;
|
||||
} else {
|
||||
baseName = other.baseName;
|
||||
}
|
||||
|
||||
uprv_strcpy(language, other.language);
|
||||
uprv_strcpy(script, other.script);
|
||||
uprv_strcpy(country, other.country);
|
||||
|
||||
variantBegin = other.variantBegin;
|
||||
fIsBogus = other.fIsBogus;
|
||||
|
||||
other.baseName = other.fullName = other.fullNameBuffer;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -545,7 +571,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
|||
/* after uloc_getName/canonicalize() we know that only '_' are separators */
|
||||
separator = field[0] = fullName;
|
||||
fieldIdx = 1;
|
||||
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) && fieldIdx < UPRV_LENGTHOF(field)-1) {
|
||||
while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && fieldIdx < UPRV_LENGTHOF(field)-1) {
|
||||
field[fieldIdx] = separator + 1;
|
||||
fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
|
||||
fieldIdx++;
|
||||
|
@ -652,7 +678,7 @@ Locale::initBaseName(UErrorCode &status) {
|
|||
int32_t
|
||||
Locale::hashCode() const
|
||||
{
|
||||
return ustr_hashCharsN(fullName, uprv_strlen(fullName));
|
||||
return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -704,6 +730,276 @@ Locale::setDefault( const Locale& newLocale,
|
|||
locale_set_default_internal(localeID, status);
|
||||
}
|
||||
|
||||
void
|
||||
Locale::addLikelySubtags(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The maximized locale ID string is often longer, but there is no good
|
||||
// heuristic to estimate just how much longer. Leave that to CharString.
|
||||
CharString maximizedLocaleID;
|
||||
int32_t maximizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
|
||||
|
||||
char* buffer;
|
||||
int32_t reslen;
|
||||
|
||||
for (;;) {
|
||||
buffer = maximizedLocaleID.getAppendBuffer(
|
||||
/*minCapacity=*/maximizedLocaleIDCapacity,
|
||||
/*desiredCapacityHint=*/maximizedLocaleIDCapacity,
|
||||
maximizedLocaleIDCapacity,
|
||||
status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
reslen = uloc_addLikelySubtags(
|
||||
fullName,
|
||||
buffer,
|
||||
maximizedLocaleIDCapacity,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
maximizedLocaleIDCapacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
maximizedLocaleID.append(buffer, reslen, status);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
|
||||
if (isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Locale::minimizeSubtags(UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Except for a few edge cases (like the empty string, that is minimized to
|
||||
// "en__POSIX"), minimized locale ID strings will be either the same length
|
||||
// or shorter than their input.
|
||||
CharString minimizedLocaleID;
|
||||
int32_t minimizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
|
||||
|
||||
char* buffer;
|
||||
int32_t reslen;
|
||||
|
||||
for (;;) {
|
||||
buffer = minimizedLocaleID.getAppendBuffer(
|
||||
/*minCapacity=*/minimizedLocaleIDCapacity,
|
||||
/*desiredCapacityHint=*/minimizedLocaleIDCapacity,
|
||||
minimizedLocaleIDCapacity,
|
||||
status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
reslen = uloc_minimizeSubtags(
|
||||
fullName,
|
||||
buffer,
|
||||
minimizedLocaleIDCapacity,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Because of the internal minimal buffer size of CharString, I can't
|
||||
// think of any input data for which this could possibly ever happen.
|
||||
// Maybe it would be better replaced with an assertion instead?
|
||||
minimizedLocaleIDCapacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
minimizedLocaleID.append(buffer, reslen, status);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
|
||||
if (isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
Locale U_EXPORT2
|
||||
Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
|
||||
{
|
||||
Locale result(Locale::eBOGUS);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// If a BCP-47 language tag is passed as the language parameter to the
|
||||
// normal Locale constructor, it will actually fall back to invoking
|
||||
// uloc_forLanguageTag() to parse it if it somehow is able to detect that
|
||||
// the string actually is BCP-47. This works well for things like strings
|
||||
// using BCP-47 extensions, but it does not at all work for things like
|
||||
// BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
|
||||
// interpret as ICU locale IDs and because of that won't trigger the BCP-47
|
||||
// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
|
||||
// and then Locale::init(), instead of just calling the normal constructor.
|
||||
|
||||
// All simple language tags will have the exact same length as ICU locale
|
||||
// ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
|
||||
CharString localeID;
|
||||
int32_t resultCapacity = tag.size();
|
||||
|
||||
char* buffer;
|
||||
int32_t parsedLength, reslen;
|
||||
|
||||
for (;;) {
|
||||
buffer = localeID.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
reslen = ulocimp_forLanguageTag(
|
||||
tag.data(),
|
||||
tag.length(),
|
||||
buffer,
|
||||
resultCapacity,
|
||||
&parsedLength,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// For all BCP-47 language tags that use extensions, the corresponding
|
||||
// ICU locale ID will be longer but uloc_forLanguageTag() does compute
|
||||
// the exact length needed so this memory reallocation will be done at
|
||||
// most once.
|
||||
resultCapacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
if (parsedLength != tag.size()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return result;
|
||||
}
|
||||
|
||||
localeID.append(buffer, reslen, status);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result.init(localeID.data(), /*canonicalize=*/FALSE);
|
||||
if (result.isBogus()) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fIsBogus) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// All simple language tags will have the exact same length as BCP-47
|
||||
// strings as they have as ICU locale IDs (like "en-US" for "en_US").
|
||||
LocalMemory<char> scratch;
|
||||
int32_t scratch_capacity = static_cast<int32_t>(uprv_strlen(fullName));
|
||||
|
||||
if (scratch_capacity == 0) {
|
||||
scratch_capacity = 3; // "und"
|
||||
}
|
||||
|
||||
char* buffer;
|
||||
int32_t result_capacity, reslen;
|
||||
|
||||
for (;;) {
|
||||
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
buffer = sink.GetAppendBuffer(
|
||||
/*min_capacity=*/scratch_capacity,
|
||||
/*desired_capacity_hint=*/scratch_capacity,
|
||||
scratch.getAlias(),
|
||||
scratch_capacity,
|
||||
&result_capacity);
|
||||
|
||||
reslen = uloc_toLanguageTag(
|
||||
fullName,
|
||||
buffer,
|
||||
result_capacity,
|
||||
/*strict=*/FALSE,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// For some very few edge cases a language tag will be longer as a
|
||||
// BCP-47 string than it is as an ICU locale ID. Most notoriously "C"
|
||||
// expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and
|
||||
// it'll take several calls to uloc_toLanguageTag() to figure that out.
|
||||
// https://unicode-org.atlassian.net/browse/ICU-20132
|
||||
scratch_capacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sink.Append(buffer, reslen);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators not used.
|
||||
}
|
||||
}
|
||||
|
||||
Locale U_EXPORT2
|
||||
Locale::createFromName (const char *name)
|
||||
{
|
||||
|
@ -1010,20 +1306,84 @@ KeywordEnumeration::~KeywordEnumeration() {
|
|||
uprv_free(keywords);
|
||||
}
|
||||
|
||||
// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
|
||||
// the next() method for each keyword before returning it.
|
||||
class UnicodeKeywordEnumeration : public KeywordEnumeration {
|
||||
public:
|
||||
using KeywordEnumeration::KeywordEnumeration;
|
||||
virtual ~UnicodeKeywordEnumeration();
|
||||
|
||||
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
|
||||
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
|
||||
if (U_SUCCESS(status) && legacy_key != nullptr) {
|
||||
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
|
||||
if (key == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
if (resultLength != nullptr) {
|
||||
*resultLength = static_cast<int32_t>(uprv_strlen(key));
|
||||
}
|
||||
return key;
|
||||
}
|
||||
}
|
||||
if (resultLength != nullptr) *resultLength = 0;
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
// Out-of-line virtual destructor to serve as the "key function".
|
||||
UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
|
||||
|
||||
StringEnumeration *
|
||||
Locale::createKeywords(UErrorCode &status) const
|
||||
{
|
||||
char keywords[256];
|
||||
int32_t keywordCapacity = 256;
|
||||
int32_t keywordCapacity = sizeof keywords;
|
||||
StringEnumeration *result = NULL;
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const char* variantStart = uprv_strchr(fullName, '@');
|
||||
const char* assignment = uprv_strchr(fullName, '=');
|
||||
if(variantStart) {
|
||||
if(assignment > variantStart) {
|
||||
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
|
||||
if(keyLen) {
|
||||
if(U_SUCCESS(status) && keyLen) {
|
||||
result = new KeywordEnumeration(keywords, keyLen, 0, status);
|
||||
if (!result) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
StringEnumeration *
|
||||
Locale::createUnicodeKeywords(UErrorCode &status) const
|
||||
{
|
||||
char keywords[256];
|
||||
int32_t keywordCapacity = sizeof keywords;
|
||||
StringEnumeration *result = NULL;
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
const char* variantStart = uprv_strchr(fullName, '@');
|
||||
const char* assignment = uprv_strchr(fullName, '=');
|
||||
if(variantStart) {
|
||||
if(assignment > variantStart) {
|
||||
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
|
||||
if(U_SUCCESS(status) && keyLen) {
|
||||
result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
|
||||
if (!result) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
|
@ -1038,6 +1398,105 @@ Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, U
|
|||
return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
|
||||
}
|
||||
|
||||
void
|
||||
Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fIsBogus) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Remove the need for a const char* to a NUL terminated buffer.
|
||||
const CharString keywordName_nul(keywordName, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
LocalMemory<char> scratch;
|
||||
int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
|
||||
|
||||
char* buffer;
|
||||
int32_t result_capacity, reslen;
|
||||
|
||||
for (;;) {
|
||||
if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
buffer = sink.GetAppendBuffer(
|
||||
/*min_capacity=*/scratch_capacity,
|
||||
/*desired_capacity_hint=*/scratch_capacity,
|
||||
scratch.getAlias(),
|
||||
scratch_capacity,
|
||||
&result_capacity);
|
||||
|
||||
reslen = uloc_getKeywordValue(
|
||||
fullName,
|
||||
keywordName_nul.data(),
|
||||
buffer,
|
||||
result_capacity,
|
||||
&status);
|
||||
|
||||
if (status != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
scratch_capacity = reslen;
|
||||
status = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sink.Append(buffer, reslen);
|
||||
if (status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
status = U_ZERO_ERROR; // Terminators not used.
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Locale::getUnicodeKeywordValue(StringPiece keywordName,
|
||||
ByteSink& sink,
|
||||
UErrorCode& status) const {
|
||||
// TODO: Remove the need for a const char* to a NUL terminated buffer.
|
||||
const CharString keywordName_nul(keywordName, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
|
||||
|
||||
if (legacy_key == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
CharString legacy_value;
|
||||
{
|
||||
CharStringByteSink sink(&legacy_value);
|
||||
getKeywordValue(legacy_key, sink, status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char* unicode_value = uloc_toUnicodeLocaleType(
|
||||
keywordName_nul.data(), legacy_value.data());
|
||||
|
||||
if (unicode_value == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
|
||||
}
|
||||
|
||||
void
|
||||
Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
|
||||
{
|
||||
|
@ -1048,6 +1507,46 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Locale::setKeywordValue(StringPiece keywordName,
|
||||
StringPiece keywordValue,
|
||||
UErrorCode& status) {
|
||||
// TODO: Remove the need for a const char* to a NUL terminated buffer.
|
||||
const CharString keywordName_nul(keywordName, status);
|
||||
const CharString keywordValue_nul(keywordValue, status);
|
||||
setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
|
||||
}
|
||||
|
||||
void
|
||||
Locale::setUnicodeKeywordValue(StringPiece keywordName,
|
||||
StringPiece keywordValue,
|
||||
UErrorCode& status) {
|
||||
// TODO: Remove the need for a const char* to a NUL terminated buffer.
|
||||
const CharString keywordName_nul(keywordName, status);
|
||||
const CharString keywordValue_nul(keywordValue, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
|
||||
|
||||
if (legacy_key == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
const char* legacy_value =
|
||||
uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
|
||||
|
||||
if (legacy_value == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
setKeywordValue(legacy_key, legacy_value, status);
|
||||
}
|
||||
|
||||
const char *
|
||||
Locale::getBaseName() const {
|
||||
return baseName;
|
||||
|
|
|
@ -34,9 +34,9 @@ U_NAMESPACE_BEGIN
|
|||
// private mutex where possible.
|
||||
|
||||
// For example:
|
||||
//
|
||||
// UMutex myMutex;
|
||||
//
|
||||
//
|
||||
// UMutex myMutex = U_MUTEX_INITIALIZER;
|
||||
//
|
||||
// void Function(int arg1, int arg2)
|
||||
// {
|
||||
// static Object* foo; // Shared read-write object
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -34,9 +34,11 @@
|
|||
|
||||
using icu::Normalizer2Impl;
|
||||
|
||||
#if NORM2_HARDCODE_NFC_DATA
|
||||
// NFC/NFD data machine-generated by gennorm2 --csource
|
||||
#define INCLUDED_FROM_NORMALIZER2_CPP
|
||||
#include "norm2_nfc_data.h"
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -176,6 +178,36 @@ FCDNormalizer2::~FCDNormalizer2() {}
|
|||
|
||||
// instance cache ---------------------------------------------------------- ***
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
static Normalizer2 *noopSingleton;
|
||||
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
noopSingleton=new NoopNormalizer2;
|
||||
if(noopSingleton==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
|
||||
return noopSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
|
||||
return &((Normalizer2WithImpl *)norm2)->impl;
|
||||
}
|
||||
|
||||
Norm2AllModes::~Norm2AllModes() {
|
||||
delete impl;
|
||||
}
|
||||
|
@ -195,6 +227,7 @@ Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
|||
return allModes;
|
||||
}
|
||||
|
||||
#if NORM2_HARDCODE_NFC_DATA
|
||||
Norm2AllModes *
|
||||
Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
|
@ -210,48 +243,15 @@ Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
|
|||
return createInstance(impl, errorCode);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup();
|
||||
U_CDECL_END
|
||||
|
||||
static Norm2AllModes *nfcSingleton;
|
||||
static Normalizer2 *noopSingleton;
|
||||
|
||||
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
|
||||
static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
|
||||
|
||||
// UInitOnce singleton initialization functions
|
||||
static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
|
||||
nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
noopSingleton=new NoopNormalizer2;
|
||||
if(noopSingleton==NULL) {
|
||||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
|
||||
}
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
|
||||
delete nfcSingleton;
|
||||
nfcSingleton = NULL;
|
||||
delete noopSingleton;
|
||||
noopSingleton = NULL;
|
||||
nfcInitOnce.reset();
|
||||
noopInitOnce.reset();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
|
@ -281,23 +281,29 @@ const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
|||
return allModes!=NULL ? &allModes->fcc : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
|
||||
return noopSingleton;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
||||
return allModes!=NULL ? allModes->impl : NULL;
|
||||
}
|
||||
#endif // NORM2_HARDCODE_NFC_DATA
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
|
||||
return &((Normalizer2WithImpl *)norm2)->impl;
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static UBool U_CALLCONV uprv_normalizer2_cleanup() {
|
||||
delete noopSingleton;
|
||||
noopSingleton = NULL;
|
||||
noopInitOnce.reset();
|
||||
#if NORM2_HARDCODE_NFC_DATA
|
||||
delete nfcSingleton;
|
||||
nfcSingleton = NULL;
|
||||
nfcInitOnce.reset();
|
||||
#endif
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
// C API ------------------------------------------------------------------- ***
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
// #define UCPTRIE_DEBUG
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
@ -24,7 +26,9 @@
|
|||
#include "unicode/edits.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/umutablecptrie.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/utf8.h"
|
||||
|
@ -34,8 +38,8 @@
|
|||
#include "normalizer2impl.h"
|
||||
#include "putilimp.h"
|
||||
#include "uassert.h"
|
||||
#include "ucptrie_impl.h"
|
||||
#include "uset_imp.h"
|
||||
#include "utrie2.h"
|
||||
#include "uvector.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -62,7 +66,7 @@ inline uint8_t leadByteForCP(UChar32 c) {
|
|||
* Returns the code point from one single well-formed UTF-8 byte sequence
|
||||
* between cpStart and cpLimit.
|
||||
*
|
||||
* UTrie2 UTF-8 macros do not assemble whole code points (for efficiency).
|
||||
* Trie UTF-8 macros do not assemble whole code points (for efficiency).
|
||||
* When we do need the code point, we call this function.
|
||||
* We should not need it for normalization-inert data (norm16==0).
|
||||
* Illegal sequences yield the error value norm16==0 just like real normalization-inert code points.
|
||||
|
@ -122,7 +126,7 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) {
|
|||
}
|
||||
} else if (src[1] == 0x87) {
|
||||
uint8_t t = src[2];
|
||||
if ((int8_t)t <= (int8_t)0x82) {
|
||||
if ((int8_t)t <= (int8_t)0x82u) {
|
||||
return t - (0xa7 - 0x40);
|
||||
}
|
||||
}
|
||||
|
@ -253,7 +257,7 @@ UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &e
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
UBool ReorderingBuffer::append(const UChar *s, int32_t length,
|
||||
UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD,
|
||||
uint8_t leadCC, uint8_t trailCC,
|
||||
UErrorCode &errorCode) {
|
||||
if(length==0) {
|
||||
|
@ -280,8 +284,11 @@ UBool ReorderingBuffer::append(const UChar *s, int32_t length,
|
|||
while(i<length) {
|
||||
U16_NEXT(s, i, length, c);
|
||||
if(i<length) {
|
||||
// s must be in NFD, otherwise we need to use getCC().
|
||||
leadCC=Normalizer2Impl::getCCFromYesOrMaybe(impl.getNorm16(c));
|
||||
if (isNFD) {
|
||||
leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c));
|
||||
} else {
|
||||
leadCC = impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
} else {
|
||||
leadCC=trailCC;
|
||||
}
|
||||
|
@ -411,7 +418,8 @@ struct CanonIterData : public UMemory {
|
|||
CanonIterData(UErrorCode &errorCode);
|
||||
~CanonIterData();
|
||||
void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode);
|
||||
UTrie2 *trie;
|
||||
UMutableCPTrie *mutableTrie;
|
||||
UCPTrie *trie;
|
||||
UVector canonStartSets; // contains UnicodeSet *
|
||||
};
|
||||
|
||||
|
@ -420,22 +428,22 @@ Normalizer2Impl::~Normalizer2Impl() {
|
|||
}
|
||||
|
||||
void
|
||||
Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
|
||||
Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie,
|
||||
const uint16_t *inExtraData, const uint8_t *inSmallFCD) {
|
||||
minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
|
||||
minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
|
||||
minLcccCP=inIndexes[IX_MIN_LCCC_CP];
|
||||
minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]);
|
||||
minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]);
|
||||
minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]);
|
||||
|
||||
minYesNo=inIndexes[IX_MIN_YES_NO];
|
||||
minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
|
||||
minNoNo=inIndexes[IX_MIN_NO_NO];
|
||||
minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
|
||||
minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
|
||||
minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
|
||||
limitNoNo=inIndexes[IX_LIMIT_NO_NO];
|
||||
minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
|
||||
U_ASSERT((minMaybeYes&7)==0); // 8-aligned for noNoDelta bit fields
|
||||
centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
|
||||
minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]);
|
||||
minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]);
|
||||
minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]);
|
||||
minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]);
|
||||
minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]);
|
||||
minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]);
|
||||
limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]);
|
||||
minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]);
|
||||
U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields
|
||||
centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1;
|
||||
|
||||
normTrie=inTrie;
|
||||
|
||||
|
@ -445,75 +453,8 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie,
|
|||
smallFCD=inSmallFCD;
|
||||
}
|
||||
|
||||
class LcccContext {
|
||||
public:
|
||||
LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {}
|
||||
|
||||
void handleRange(UChar32 start, UChar32 end, uint16_t norm16) {
|
||||
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
|
||||
norm16 != Normalizer2Impl::JAMO_VT) {
|
||||
set.add(start, end);
|
||||
} else if (impl.minNoNoCompNoMaybeCC <= norm16 && norm16 < impl.limitNoNo) {
|
||||
uint16_t fcd16=impl.getFCD16(start);
|
||||
if(fcd16>0xff) { set.add(start, end); }
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const Normalizer2Impl &impl;
|
||||
UnicodeSet &set;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
struct PropertyStartsContext {
|
||||
PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder)
|
||||
: impl(ni), sa(adder) {}
|
||||
|
||||
const Normalizer2Impl &impl;
|
||||
const USetAdder *sa;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
static UBool U_CALLCONV
|
||||
enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
|
||||
((LcccContext *)context)->handleRange(start, end, (uint16_t)value);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
|
||||
/* add the start code point to the USet */
|
||||
const PropertyStartsContext *ctx=(const PropertyStartsContext *)context;
|
||||
const USetAdder *sa=ctx->sa;
|
||||
sa->add(sa->set, start);
|
||||
if (start != end && ctx->impl.isAlgorithmicNoNo((uint16_t)value) &&
|
||||
(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
|
||||
// Range of code points with same-norm16-value algorithmic decompositions.
|
||||
// They might have different non-zero FCD16 values.
|
||||
uint16_t prevFCD16=ctx->impl.getFCD16(start);
|
||||
while(++start<=end) {
|
||||
uint16_t fcd16=ctx->impl.getFCD16(start);
|
||||
if(fcd16!=prevFCD16) {
|
||||
sa->add(sa->set, start);
|
||||
prevFCD16=fcd16;
|
||||
}
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) {
|
||||
/* add the start code point to the USet */
|
||||
const USetAdder *sa=(const USetAdder *)context;
|
||||
sa->add(sa->set, start);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static uint32_t U_CALLCONV
|
||||
segmentStarterMapper(const void * /*context*/, uint32_t value) {
|
||||
return value&CANON_NOT_SEGMENT_STARTER;
|
||||
|
@ -523,15 +464,44 @@ U_CDECL_END
|
|||
|
||||
void
|
||||
Normalizer2Impl::addLcccChars(UnicodeSet &set) const {
|
||||
LcccContext context(*this, set);
|
||||
utrie2_enum(normTrie, NULL, enumLcccRange, &context);
|
||||
UChar32 start = 0, end;
|
||||
uint32_t norm16;
|
||||
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
||||
nullptr, nullptr, &norm16)) >= 0) {
|
||||
if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES &&
|
||||
norm16 != Normalizer2Impl::JAMO_VT) {
|
||||
set.add(start, end);
|
||||
} else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
|
||||
uint16_t fcd16 = getFCD16(start);
|
||||
if (fcd16 > 0xff) { set.add(start, end); }
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
|
||||
/* add the start code point of each same-value range of each trie */
|
||||
PropertyStartsContext context(*this, sa);
|
||||
utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context);
|
||||
// Add the start code point of each same-value range of the trie.
|
||||
UChar32 start = 0, end;
|
||||
uint32_t value;
|
||||
while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT,
|
||||
nullptr, nullptr, &value)) >= 0) {
|
||||
sa->add(sa->set, start);
|
||||
if (start != end && isAlgorithmicNoNo((uint16_t)value) &&
|
||||
(value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) {
|
||||
// Range of code points with same-norm16-value algorithmic decompositions.
|
||||
// They might have different non-zero FCD16 values.
|
||||
uint16_t prevFCD16 = getFCD16(start);
|
||||
while (++start <= end) {
|
||||
uint16_t fcd16 = getFCD16(start);
|
||||
if (fcd16 != prevFCD16) {
|
||||
sa->add(sa->set, start);
|
||||
prevFCD16 = fcd16;
|
||||
}
|
||||
}
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
|
||||
/* add Hangul LV syllables and LV+1 because of skippables */
|
||||
for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) {
|
||||
|
@ -543,10 +513,15 @@ Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode
|
|||
|
||||
void
|
||||
Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const {
|
||||
/* add the start code point of each same-value range of the canonical iterator data trie */
|
||||
if(ensureCanonIterData(errorCode)) {
|
||||
// currently only used for the SEGMENT_STARTER property
|
||||
utrie2_enum(fCanonIterData->trie, segmentStarterMapper, enumPropertyStartsRange, sa);
|
||||
// Add the start code point of each same-value range of the canonical iterator data trie.
|
||||
if (!ensureCanonIterData(errorCode)) { return; }
|
||||
// Currently only used for the SEGMENT_STARTER property.
|
||||
UChar32 start = 0, end;
|
||||
uint32_t value;
|
||||
while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
segmentStarterMapper, nullptr, &value)) >= 0) {
|
||||
sa->add(sa->set, start);
|
||||
start = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -633,27 +608,23 @@ Normalizer2Impl::decompose(const UChar *src, const UChar *limit,
|
|||
// count code units below the minimum or with irrelevant data for the quick check
|
||||
for(prevSrc=src; src!=limit;) {
|
||||
if( (c=*src)<minNoCP ||
|
||||
isMostDecompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
|
||||
isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
|
||||
) {
|
||||
++src;
|
||||
} else if(!U16_IS_SURROGATE(c)) {
|
||||
} else if(!U16_IS_LEAD(c)) {
|
||||
break;
|
||||
} else {
|
||||
UChar c2;
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
|
||||
if(isMostDecompYesAndZeroCC(norm16)) {
|
||||
src+=2;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else /* trail surrogate */ {
|
||||
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
|
||||
--src;
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
}
|
||||
}
|
||||
if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
|
||||
src+=U16_LENGTH(c);
|
||||
} else {
|
||||
break;
|
||||
++src; // unpaired lead surrogate: inert
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -713,7 +684,7 @@ Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit,
|
|||
const UChar *prevSrc = src;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
|
||||
if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
|
||||
return prevSrc;
|
||||
}
|
||||
|
@ -737,7 +708,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
|
|||
}
|
||||
// Maps to an isCompYesAndZeroCC.
|
||||
c=mapAlgorithmic(c, norm16);
|
||||
norm16=getNorm16(c);
|
||||
norm16=getRawNorm16(c);
|
||||
}
|
||||
if (norm16 < minYesNo) {
|
||||
// c does not decompose
|
||||
|
@ -758,7 +729,7 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
|
|||
} else {
|
||||
leadCC=0;
|
||||
}
|
||||
return buffer.append((const UChar *)mapping+1, length, leadCC, trailCC, errorCode);
|
||||
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
|
||||
}
|
||||
|
||||
const uint8_t *
|
||||
|
@ -771,7 +742,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
while (src < limit) {
|
||||
const uint8_t *prevSrc = src;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
|
||||
// Get the decomposition and the lead and trail cc's.
|
||||
UChar32 c = U_SENTINEL;
|
||||
if (norm16 >= limitNoNo) {
|
||||
|
@ -789,7 +760,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
}
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
c = mapAlgorithmic(c, norm16);
|
||||
norm16 = getNorm16(c);
|
||||
norm16 = getRawNorm16(c);
|
||||
} else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
|
||||
return prevSrc;
|
||||
}
|
||||
|
@ -828,7 +799,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
|||
} else {
|
||||
leadCC = 0;
|
||||
}
|
||||
if (!buffer.append((const char16_t *)mapping+1, length, leadCC, trailCC, errorCode)) {
|
||||
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
@ -854,7 +825,7 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c
|
|||
length=0;
|
||||
U16_APPEND_UNSAFE(buffer, length, c);
|
||||
// The mapping might decompose further.
|
||||
norm16 = getNorm16(c);
|
||||
norm16 = getRawNorm16(c);
|
||||
}
|
||||
if (norm16 < minYesNo) {
|
||||
return decomp;
|
||||
|
@ -926,19 +897,30 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit,
|
|||
return;
|
||||
}
|
||||
// Just merge the strings at the boundary.
|
||||
ForwardUTrie2StringIterator iter(normTrie, src, limit);
|
||||
uint8_t firstCC, prevCC, cc;
|
||||
firstCC=prevCC=cc=getCC(iter.next16());
|
||||
while(cc!=0) {
|
||||
prevCC=cc;
|
||||
cc=getCC(iter.next16());
|
||||
};
|
||||
bool isFirst = true;
|
||||
uint8_t firstCC = 0, prevCC = 0, cc;
|
||||
const UChar *p = src;
|
||||
while (p != limit) {
|
||||
const UChar *codePointStart = p;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
|
||||
if ((cc = getCC(norm16)) == 0) {
|
||||
p = codePointStart;
|
||||
break;
|
||||
}
|
||||
if (isFirst) {
|
||||
firstCC = cc;
|
||||
isFirst = false;
|
||||
}
|
||||
prevCC = cc;
|
||||
}
|
||||
if(limit==NULL) { // appendZeroCC() needs limit!=NULL
|
||||
limit=u_strchr(iter.codePointStart, 0);
|
||||
limit=u_strchr(p, 0);
|
||||
}
|
||||
|
||||
if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) {
|
||||
buffer.appendZeroCC(iter.codePointStart, limit, errorCode);
|
||||
if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) {
|
||||
buffer.appendZeroCC(p, limit, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1085,7 +1067,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const
|
|||
}
|
||||
UChar32 composite=compositeAndFwd>>1;
|
||||
if((compositeAndFwd&1)!=0) {
|
||||
addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
|
||||
addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set);
|
||||
}
|
||||
set.add(composite);
|
||||
} while((firstUnit&COMP_1_LAST_TUPLE)==0);
|
||||
|
@ -1124,7 +1106,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
|
|||
prevCC=0;
|
||||
|
||||
for(;;) {
|
||||
UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
|
||||
cc=getCCFromYesOrMaybe(norm16);
|
||||
if( // this character combines backward and
|
||||
isMaybe(norm16) &&
|
||||
|
@ -1229,7 +1211,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
|
|||
// Is the composite a starter that combines forward?
|
||||
if(compositeAndFwd&1) {
|
||||
compositionsList=
|
||||
getCompositionsListForComposite(getNorm16(composite));
|
||||
getCompositionsListForComposite(getRawNorm16(composite));
|
||||
} else {
|
||||
compositionsList=NULL;
|
||||
}
|
||||
|
@ -1268,7 +1250,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart
|
|||
|
||||
UChar32
|
||||
Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
|
||||
uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0
|
||||
uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16
|
||||
const uint16_t *list;
|
||||
if(isInert(norm16)) {
|
||||
return U_SENTINEL;
|
||||
|
@ -1359,28 +1341,22 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
|
|||
return TRUE;
|
||||
}
|
||||
if( (c=*src)<minNoMaybeCP ||
|
||||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
|
||||
isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
|
||||
) {
|
||||
++src;
|
||||
} else {
|
||||
prevSrc = src++;
|
||||
if(!U16_IS_SURROGATE(c)) {
|
||||
if(!U16_IS_LEAD(c)) {
|
||||
break;
|
||||
} else {
|
||||
UChar c2;
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
|
||||
++src;
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
|
||||
++src;
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
|
||||
if(!isCompYesAndZeroCC(norm16)) {
|
||||
break;
|
||||
}
|
||||
} else /* trail surrogate */ {
|
||||
if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
|
||||
--prevSrc;
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
}
|
||||
}
|
||||
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1529,7 +1505,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
|
|||
}
|
||||
uint8_t prevCC = cc;
|
||||
nextSrc = src;
|
||||
UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, n16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16);
|
||||
if (n16 >= MIN_YES_YES_WITH_CC) {
|
||||
cc = getCCFromNormalYesOrMaybe(n16);
|
||||
if (prevCC > cc) {
|
||||
|
@ -1559,7 +1535,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit,
|
|||
// decompose and recompose.
|
||||
if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
|
||||
const UChar *p = prevSrc;
|
||||
UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, norm16);
|
||||
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16);
|
||||
if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
prevSrc = p;
|
||||
}
|
||||
|
@ -1626,28 +1602,22 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
|
|||
return src;
|
||||
}
|
||||
if( (c=*src)<minNoMaybeCP ||
|
||||
isCompYesAndZeroCC(norm16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(normTrie, c))
|
||||
isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c))
|
||||
) {
|
||||
++src;
|
||||
} else {
|
||||
prevSrc = src++;
|
||||
if(!U16_IS_SURROGATE(c)) {
|
||||
if(!U16_IS_LEAD(c)) {
|
||||
break;
|
||||
} else {
|
||||
UChar c2;
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
|
||||
++src;
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
if(src!=limit && U16_IS_TRAIL(c2=*src)) {
|
||||
++src;
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c);
|
||||
if(!isCompYesAndZeroCC(norm16)) {
|
||||
break;
|
||||
}
|
||||
} else /* trail surrogate */ {
|
||||
if(prevBoundary<prevSrc && U16_IS_LEAD(c2=*(prevSrc-1))) {
|
||||
--prevSrc;
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
}
|
||||
}
|
||||
if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1665,7 +1635,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
|
|||
} else {
|
||||
const UChar *p = prevSrc;
|
||||
uint16_t n16;
|
||||
UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, n16);
|
||||
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16);
|
||||
if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
|
||||
prevBoundary = prevSrc;
|
||||
} else {
|
||||
|
@ -1699,7 +1669,7 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit,
|
|||
}
|
||||
uint8_t prevCC = cc;
|
||||
nextSrc = src;
|
||||
UTRIE2_U16_NEXT16(normTrie, nextSrc, limit, c, norm16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16);
|
||||
if (isMaybeOrNonZeroCC(norm16)) {
|
||||
cc = getCCFromYesOrMaybe(norm16);
|
||||
if (!(prevCC <= cc || cc == 0)) {
|
||||
|
@ -1786,7 +1756,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
|
|||
++src;
|
||||
} else {
|
||||
prevSrc = src;
|
||||
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
|
||||
if (!isCompYesAndZeroCC(norm16)) {
|
||||
break;
|
||||
}
|
||||
|
@ -1945,7 +1915,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
|
|||
}
|
||||
uint8_t prevCC = cc;
|
||||
nextSrc = src;
|
||||
UTRIE2_U8_NEXT16(normTrie, nextSrc, limit, n16);
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16);
|
||||
if (n16 >= MIN_YES_YES_WITH_CC) {
|
||||
cc = getCCFromNormalYesOrMaybe(n16);
|
||||
if (prevCC > cc) {
|
||||
|
@ -1975,7 +1945,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
|
|||
// decompose and recompose.
|
||||
if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
|
||||
const uint8_t *p = prevSrc;
|
||||
UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16);
|
||||
UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16);
|
||||
if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
prevSrc = p;
|
||||
}
|
||||
|
@ -2023,7 +1993,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limi
|
|||
}
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16);
|
||||
return norm16HasCompBoundaryBefore(norm16);
|
||||
}
|
||||
|
||||
|
@ -2032,7 +2002,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *
|
|||
return TRUE;
|
||||
}
|
||||
uint16_t norm16;
|
||||
UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
|
||||
return norm16HasCompBoundaryBefore(norm16);
|
||||
}
|
||||
|
||||
|
@ -2043,7 +2013,7 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p,
|
|||
}
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
|
||||
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
|
||||
return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
|
||||
}
|
||||
|
||||
|
@ -2053,36 +2023,42 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t
|
|||
return TRUE;
|
||||
}
|
||||
uint16_t norm16;
|
||||
UTRIE2_U8_PREV16(normTrie, start, p, norm16);
|
||||
UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16);
|
||||
return norm16HasCompBoundaryAfter(norm16, onlyContiguous);
|
||||
}
|
||||
|
||||
const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p,
|
||||
UBool onlyContiguous) const {
|
||||
BackwardUTrie2StringIterator iter(normTrie, start, p);
|
||||
for(;;) {
|
||||
uint16_t norm16=iter.previous16();
|
||||
while (p != start) {
|
||||
const UChar *codePointLimit = p;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
|
||||
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
return iter.codePointLimit;
|
||||
return codePointLimit;
|
||||
}
|
||||
if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
|
||||
return iter.codePointStart;
|
||||
if (hasCompBoundaryBefore(c, norm16)) {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit,
|
||||
UBool onlyContiguous) const {
|
||||
ForwardUTrie2StringIterator iter(normTrie, p, limit);
|
||||
for(;;) {
|
||||
uint16_t norm16=iter.next16();
|
||||
if (hasCompBoundaryBefore(iter.codePoint, norm16)) {
|
||||
return iter.codePointStart;
|
||||
while (p != limit) {
|
||||
const UChar *codePointStart = p;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
|
||||
if (hasCompBoundaryBefore(c, norm16)) {
|
||||
return codePointStart;
|
||||
}
|
||||
if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
return iter.codePointLimit;
|
||||
return p;
|
||||
}
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const {
|
||||
|
@ -2130,7 +2106,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
|
|||
}
|
||||
// Maps to an isCompYesAndZeroCC.
|
||||
c=mapAlgorithmic(c, norm16);
|
||||
norm16=getNorm16(c);
|
||||
norm16=getRawNorm16(c);
|
||||
}
|
||||
}
|
||||
if(norm16<=minYesNo || isHangulLVT(norm16)) {
|
||||
|
@ -2195,17 +2171,10 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
|
|||
prevFCD16=0;
|
||||
++src;
|
||||
} else {
|
||||
if(U16_IS_SURROGATE(c)) {
|
||||
if(U16_IS_LEAD(c)) {
|
||||
UChar c2;
|
||||
if(U16_IS_SURROGATE_LEAD(c)) {
|
||||
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
}
|
||||
} else /* trail surrogate */ {
|
||||
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
|
||||
--src;
|
||||
c=U16_GET_SUPPLEMENTARY(c2, c);
|
||||
}
|
||||
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
|
||||
c=U16_GET_SUPPLEMENTARY(c, c2);
|
||||
}
|
||||
}
|
||||
if((fcd16=getFCD16FromNormData(c))<=0xff) {
|
||||
|
@ -2336,7 +2305,7 @@ const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const
|
|||
const UChar *codePointLimit = p;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U16_PREV16(normTrie, start, p, c, norm16);
|
||||
UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16);
|
||||
if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) {
|
||||
return codePointLimit;
|
||||
}
|
||||
|
@ -2352,7 +2321,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
|
|||
const UChar *codePointStart=p;
|
||||
UChar32 c;
|
||||
uint16_t norm16;
|
||||
UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16);
|
||||
UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16);
|
||||
if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) {
|
||||
return codePointStart;
|
||||
}
|
||||
|
@ -2366,19 +2335,20 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
|
|||
// CanonicalIterator data -------------------------------------------------- ***
|
||||
|
||||
CanonIterData::CanonIterData(UErrorCode &errorCode) :
|
||||
trie(utrie2_open(0, 0, &errorCode)),
|
||||
mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr),
|
||||
canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
|
||||
|
||||
CanonIterData::~CanonIterData() {
|
||||
utrie2_close(trie);
|
||||
umutablecptrie_close(mutableTrie);
|
||||
ucptrie_close(trie);
|
||||
}
|
||||
|
||||
void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) {
|
||||
uint32_t canonValue=utrie2_get32(trie, decompLead);
|
||||
uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead);
|
||||
if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
|
||||
// origin is the first character whose decomposition starts with
|
||||
// the character for which we are setting the value.
|
||||
utrie2_set32(trie, decompLead, canonValue|origin, &errorCode);
|
||||
umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode);
|
||||
} else {
|
||||
// origin is not the first character, or it is U+0000.
|
||||
UnicodeSet *set;
|
||||
|
@ -2390,7 +2360,7 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
|
|||
}
|
||||
UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
|
||||
canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
|
||||
utrie2_set32(trie, decompLead, canonValue, &errorCode);
|
||||
umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
|
||||
canonStartSets.addElement(set, errorCode);
|
||||
if(firstOrigin!=0) {
|
||||
set->add(firstOrigin);
|
||||
|
@ -2406,7 +2376,6 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
|
|||
class InitCanonIterData {
|
||||
public:
|
||||
static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode);
|
||||
static void handleRange(Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode);
|
||||
};
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
@ -2417,18 +2386,6 @@ initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
|||
InitCanonIterData::doInit(impl, errorCode);
|
||||
}
|
||||
|
||||
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
|
||||
// context: the Normalizer2Impl
|
||||
static UBool U_CALLCONV
|
||||
enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
if (value != Normalizer2Impl::INERT) {
|
||||
Normalizer2Impl *impl = (Normalizer2Impl *)context;
|
||||
InitCanonIterData::handleRange(impl, start, end, (uint16_t)value, errorCode);
|
||||
}
|
||||
return U_SUCCESS(errorCode);
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
||||
|
@ -2438,8 +2395,24 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
|||
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
utrie2_enum(impl->normTrie, NULL, enumCIDRangeHandler, impl);
|
||||
utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode);
|
||||
UChar32 start = 0, end;
|
||||
uint32_t value;
|
||||
while ((end = ucptrie_getRange(impl->normTrie, start,
|
||||
UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT,
|
||||
nullptr, nullptr, &value)) >= 0) {
|
||||
// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters.
|
||||
if (value != Normalizer2Impl::INERT) {
|
||||
impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData");
|
||||
#endif
|
||||
impl->fCanonIterData->trie = umutablecptrie_buildImmutable(
|
||||
impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode);
|
||||
umutablecptrie_close(impl->fCanonIterData->mutableTrie);
|
||||
impl->fCanonIterData->mutableTrie = nullptr;
|
||||
}
|
||||
if (U_FAILURE(errorCode)) {
|
||||
delete impl->fCanonIterData;
|
||||
|
@ -2447,11 +2420,6 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) {
|
|||
}
|
||||
}
|
||||
|
||||
void InitCanonIterData::handleRange(
|
||||
Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode) {
|
||||
impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode);
|
||||
}
|
||||
|
||||
void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16,
|
||||
CanonIterData &newData,
|
||||
UErrorCode &errorCode) const {
|
||||
|
@ -2465,7 +2433,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
|
|||
return;
|
||||
}
|
||||
for(UChar32 c=start; c<=end; ++c) {
|
||||
uint32_t oldValue=utrie2_get32(newData.trie, c);
|
||||
uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c);
|
||||
uint32_t newValue=oldValue;
|
||||
if(isMaybeOrNonZeroCC(norm16)) {
|
||||
// not a segment starter if it occurs in a decomposition or has cc!=0
|
||||
|
@ -2483,7 +2451,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
|
|||
if (isDecompNoAlgorithmic(norm16_2)) {
|
||||
// Maps to an isCompYesAndZeroCC.
|
||||
c2 = mapAlgorithmic(c2, norm16_2);
|
||||
norm16_2 = getNorm16(c2);
|
||||
norm16_2 = getRawNorm16(c2);
|
||||
// No compatibility mappings for the CanonicalIterator.
|
||||
U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
|
||||
}
|
||||
|
@ -2510,10 +2478,10 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
|
|||
if(norm16_2>=minNoNo) {
|
||||
while(i<length) {
|
||||
U16_NEXT_UNSAFE(mapping, i, c2);
|
||||
uint32_t c2Value=utrie2_get32(newData.trie, c2);
|
||||
uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2);
|
||||
if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
|
||||
utrie2_set32(newData.trie, c2, c2Value|CANON_NOT_SEGMENT_STARTER,
|
||||
&errorCode);
|
||||
umutablecptrie_set(newData.mutableTrie, c2,
|
||||
c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2524,7 +2492,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co
|
|||
}
|
||||
}
|
||||
if(newValue!=oldValue) {
|
||||
utrie2_set32(newData.trie, c, newValue, &errorCode);
|
||||
umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2537,7 +2505,7 @@ UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const {
|
|||
}
|
||||
|
||||
int32_t Normalizer2Impl::getCanonValue(UChar32 c) const {
|
||||
return (int32_t)utrie2_get32(fCanonIterData->trie, c);
|
||||
return (int32_t)ucptrie_get(fCanonIterData->trie, c);
|
||||
}
|
||||
|
||||
const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const {
|
||||
|
@ -2561,7 +2529,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const {
|
|||
set.add(value);
|
||||
}
|
||||
if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
|
||||
uint16_t norm16=getNorm16(c);
|
||||
uint16_t norm16=getRawNorm16(c);
|
||||
if(norm16==JAMO_L) {
|
||||
UChar32 syllable=
|
||||
(UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT);
|
||||
|
@ -2608,7 +2576,7 @@ unorm2_swap(const UDataSwapper *ds,
|
|||
pInfo->dataFormat[1]==0x72 &&
|
||||
pInfo->dataFormat[2]==0x6d &&
|
||||
pInfo->dataFormat[3]==0x32 &&
|
||||
(1<=formatVersion0 && formatVersion0<=3)
|
||||
(1<=formatVersion0 && formatVersion0<=4)
|
||||
)) {
|
||||
udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n",
|
||||
pInfo->dataFormat[0], pInfo->dataFormat[1],
|
||||
|
@ -2669,9 +2637,9 @@ unorm2_swap(const UDataSwapper *ds,
|
|||
ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode);
|
||||
offset=nextOffset;
|
||||
|
||||
/* swap the UTrie2 */
|
||||
/* swap the trie */
|
||||
nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET];
|
||||
utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
|
||||
utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode);
|
||||
offset=nextOffset;
|
||||
|
||||
/* swap the uint16_t extraData[] */
|
||||
|
|
|
@ -24,12 +24,20 @@
|
|||
#if !UCONFIG_NO_NORMALIZATION
|
||||
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "mutex.h"
|
||||
#include "udataswp.h"
|
||||
#include "uset_imp.h"
|
||||
#include "utrie2.h"
|
||||
|
||||
// When the nfc.nrm data is *not* hardcoded into the common library
|
||||
// (with this constant set to 0),
|
||||
// then it needs to be built into the data package:
|
||||
// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT
|
||||
#define NORM2_HARDCODE_NFC_DATA 1
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -118,7 +126,7 @@ public:
|
|||
buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT);
|
||||
buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT);
|
||||
} else {
|
||||
buffer[0]=orig-c2; // LV syllable
|
||||
buffer[0]=(UChar)(orig-c2); // LV syllable
|
||||
buffer[1]=(UChar)(JAMO_T_BASE+c2);
|
||||
}
|
||||
}
|
||||
|
@ -158,8 +166,7 @@ public:
|
|||
appendBMP((UChar)c, cc, errorCode) :
|
||||
appendSupplementary(c, cc, errorCode);
|
||||
}
|
||||
// s must be in NFD, otherwise change the implementation.
|
||||
UBool append(const UChar *s, int32_t length,
|
||||
UBool append(const UChar *s, int32_t length, UBool isNFD,
|
||||
uint8_t leadCC, uint8_t trailCC,
|
||||
UErrorCode &errorCode);
|
||||
UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) {
|
||||
|
@ -243,7 +250,7 @@ public:
|
|||
}
|
||||
virtual ~Normalizer2Impl();
|
||||
|
||||
void init(const int32_t *inIndexes, const UTrie2 *inTrie,
|
||||
void init(const int32_t *inIndexes, const UCPTrie *inTrie,
|
||||
const uint16_t *inExtraData, const uint8_t *inSmallFCD);
|
||||
|
||||
void addLcccChars(UnicodeSet &set) const;
|
||||
|
@ -254,7 +261,12 @@ public:
|
|||
|
||||
UBool ensureCanonIterData(UErrorCode &errorCode) const;
|
||||
|
||||
uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
|
||||
// The trie stores values for lead surrogate code *units*.
|
||||
// Surrogate code *points* are inert.
|
||||
uint16_t getNorm16(UChar32 c) const {
|
||||
return U_IS_LEAD(c) ? INERT : UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
|
||||
}
|
||||
uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
|
||||
|
||||
UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
|
||||
if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
|
||||
|
@ -704,7 +716,7 @@ private:
|
|||
uint16_t centerNoNoDelta;
|
||||
uint16_t minMaybeYes;
|
||||
|
||||
const UTrie2 *normTrie;
|
||||
const UCPTrie *normTrie;
|
||||
const uint16_t *maybeYesCompositions;
|
||||
const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters
|
||||
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
|
||||
|
@ -764,7 +776,7 @@ unorm_getFCD16(UChar32 c);
|
|||
|
||||
/**
|
||||
* Format of Normalizer2 .nrm data files.
|
||||
* Format version 3.0.
|
||||
* Format version 4.0.
|
||||
*
|
||||
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
|
||||
* ICU ships with data files for standard Unicode Normalization Forms
|
||||
|
@ -818,7 +830,7 @@ unorm_getFCD16(UChar32 c);
|
|||
* minMaybeYes=indexes[IX_MIN_MAYBE_YES];
|
||||
* See the normTrie description below and the design doc for details.
|
||||
*
|
||||
* UTrie2 normTrie; -- see utrie2_impl.h and utrie2.h
|
||||
* UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie
|
||||
*
|
||||
* The trie holds the main normalization data. Each code point is mapped to a 16-bit value.
|
||||
* Rather than using independent bits in the value (which would require more than 16 bits),
|
||||
|
@ -946,6 +958,20 @@ unorm_getFCD16(UChar32 c);
|
|||
* which is artificially assigned "worst case" values lccc=1 and tccc=255.
|
||||
*
|
||||
* - A mapping to an empty string has explicit lccc=1 and tccc=255 values.
|
||||
*
|
||||
* Changes from format version 3 to format version 4 (ICU 63) ------------------
|
||||
*
|
||||
* Switched from UTrie2 to UCPTrie/CodePointTrie.
|
||||
*
|
||||
* The new trie no longer stores different values for surrogate code *units* vs.
|
||||
* surrogate code *points*.
|
||||
* Lead surrogates still have values for optimized UTF-16 string processing.
|
||||
* When looking up code point properties, the code now checks for lead surrogates and
|
||||
* treats them as inert.
|
||||
*
|
||||
* gennorm2 now has to reject mappings for surrogate code points.
|
||||
* UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its
|
||||
* custom normalization data file.
|
||||
*/
|
||||
|
||||
#endif /* !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -102,9 +102,8 @@
|
|||
# define NOMCX
|
||||
# include <windows.h>
|
||||
# include "unicode/uloc.h"
|
||||
#if U_PLATFORM_HAS_WINUWP_API == 0
|
||||
# include "wintz.h"
|
||||
#else // U_PLATFORM_HAS_WINUWP_API
|
||||
#if U_PLATFORM_HAS_WINUWP_API
|
||||
typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef
|
||||
#include <Windows.Globalization.h>
|
||||
#include <windows.system.userprofile.h>
|
||||
|
@ -1062,53 +1061,13 @@ uprv_tzname_clear_cache()
|
|||
#endif
|
||||
}
|
||||
|
||||
// With the Universal Windows Platform we can just ask Windows for the name
|
||||
#if U_PLATFORM_HAS_WINUWP_API
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uprv_getWindowsTimeZone()
|
||||
{
|
||||
// Get default Windows timezone.
|
||||
ComPtr<IInspectable> calendar;
|
||||
HRESULT hr = RoActivateInstance(
|
||||
HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(),
|
||||
&calendar);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
ComPtr<ABI::Windows::Globalization::ITimeZoneOnCalendar> timezone;
|
||||
hr = calendar.As(&timezone);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
HString timezoneString;
|
||||
hr = timezone->GetTimeZone(timezoneString.GetAddressOf());
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
int32_t length = static_cast<int32_t>(wcslen(timezoneString.GetRawBuffer(NULL)));
|
||||
char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char));
|
||||
if (asciiId != nullptr)
|
||||
{
|
||||
u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length);
|
||||
return asciiId;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Failed
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uprv_tzname(int n)
|
||||
{
|
||||
(void)n; // Avoid unreferenced parameter warning.
|
||||
const char *tzid = NULL;
|
||||
#if U_PLATFORM_USES_ONLY_WIN32_API
|
||||
#if U_PLATFORM_HAS_WINUWP_API > 0
|
||||
tzid = uprv_getWindowsTimeZone();
|
||||
#else
|
||||
tzid = uprv_detectWindowsTimeZone();
|
||||
#endif
|
||||
|
||||
if (tzid != NULL) {
|
||||
return tzid;
|
||||
|
@ -1366,6 +1325,43 @@ uprv_pathIsAbsolute(const char *path)
|
|||
# endif
|
||||
#endif
|
||||
|
||||
#if U_PLATFORM_HAS_WINUWP_API != 0
|
||||
// Helper function to get the ICU Data Directory under the Windows directory location.
|
||||
static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
|
||||
{
|
||||
#if defined(ICU_DATA_DIR_WINDOWS)
|
||||
wchar_t windowsPath[MAX_PATH];
|
||||
char windowsPathUtf8[MAX_PATH];
|
||||
|
||||
UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
|
||||
if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
|
||||
// Convert UTF-16 to a UTF-8 string.
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t windowsPathUtf8Len = 0;
|
||||
u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
|
||||
&windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
|
||||
|
||||
if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
|
||||
(windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
|
||||
// Ensure it always has a separator, so we can append the ICU data path.
|
||||
if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
|
||||
windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
|
||||
windowsPathUtf8[windowsPathUtf8Len] = '\0';
|
||||
}
|
||||
// Check if the concatenated string will fit.
|
||||
if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
|
||||
uprv_strcpy(directoryBuffer, windowsPathUtf8);
|
||||
uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void U_CALLCONV dataDirectoryInitFn() {
|
||||
/* If we already have the directory, then return immediately. Will happen if user called
|
||||
* u_setDataDirectory().
|
||||
|
@ -1425,24 +1421,10 @@ static void U_CALLCONV dataDirectoryInitFn() {
|
|||
}
|
||||
#endif
|
||||
|
||||
#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0
|
||||
// Use data from the %windir%\globalization\icu directory
|
||||
// This is only available if ICU is built as a system component
|
||||
#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS)
|
||||
char datadir_path_buffer[MAX_PATH];
|
||||
UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer));
|
||||
if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1))
|
||||
{
|
||||
if (datadir_path_buffer[length - 1] != '\\')
|
||||
{
|
||||
datadir_path_buffer[length++] = '\\';
|
||||
datadir_path_buffer[length] = '\0';
|
||||
}
|
||||
|
||||
if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer))
|
||||
{
|
||||
uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS);
|
||||
path = datadir_path_buffer;
|
||||
}
|
||||
if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
|
||||
path = datadir_path_buffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1491,20 +1473,30 @@ static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
|
|||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
#if U_PLATFORM_HAS_WINUWP_API == 0
|
||||
const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
|
||||
#else
|
||||
// TODO: UWP does not support alternate timezone data directories at this time
|
||||
|
||||
const char *dir = "";
|
||||
|
||||
#if U_PLATFORM_HAS_WINUWP_API != 0
|
||||
// The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory.
|
||||
char datadir_path_buffer[MAX_PATH];
|
||||
if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
|
||||
dir = datadir_path_buffer;
|
||||
}
|
||||
#else
|
||||
dir = getenv("ICU_TIMEZONE_FILES_DIR");
|
||||
#endif // U_PLATFORM_HAS_WINUWP_API
|
||||
|
||||
#if defined(U_TIMEZONE_FILES_DIR)
|
||||
if (dir == NULL) {
|
||||
// Build time configuration setting.
|
||||
dir = TO_STRING(U_TIMEZONE_FILES_DIR);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (dir == NULL) {
|
||||
dir = "";
|
||||
}
|
||||
|
||||
setTimeZoneFilesDir(dir, status);
|
||||
}
|
||||
|
||||
|
@ -1676,7 +1668,8 @@ The leftmost codepage (.xxx) wins.
|
|||
/* Note that we scan the *uncorrected* ID. */
|
||||
if ((p = uprv_strrchr(posixID, '@')) != NULL) {
|
||||
if (correctedPOSIXLocale == NULL) {
|
||||
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
|
||||
/* new locale can be 1 char longer than old one if @ -> __ */
|
||||
correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+2));
|
||||
/* Exit on memory allocation error. */
|
||||
if (correctedPOSIXLocale == NULL) {
|
||||
return NULL;
|
||||
|
@ -1693,7 +1686,7 @@ The leftmost codepage (.xxx) wins.
|
|||
}
|
||||
|
||||
if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
|
||||
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
|
||||
uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
|
||||
}
|
||||
else {
|
||||
uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
|
||||
|
@ -1747,70 +1740,22 @@ The leftmost codepage (.xxx) wins.
|
|||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
#define POSIX_LOCALE_CAPACITY 64
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
char *correctedPOSIXLocale = 0;
|
||||
char *correctedPOSIXLocale = nullptr;
|
||||
|
||||
// If we have already figured this out just use the cached value
|
||||
if (gCorrectedPOSIXLocale != NULL) {
|
||||
if (gCorrectedPOSIXLocale != nullptr) {
|
||||
return gCorrectedPOSIXLocale;
|
||||
}
|
||||
|
||||
// No cached value, need to determine the current value
|
||||
static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH];
|
||||
#if U_PLATFORM_HAS_WINUWP_API == 0
|
||||
// If not a Universal Windows App, we'll need user default language.
|
||||
// Vista and above should use Locale Names instead of LCIDs
|
||||
int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale));
|
||||
#else
|
||||
// In a UWP app, we want the top language that the application and user agreed upon
|
||||
ComPtr<ABI::Windows::Foundation::Collections::IVectorView<HSTRING>> languageList;
|
||||
static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
|
||||
|
||||
ComPtr<ABI::Windows::Globalization::IApplicationLanguagesStatics> applicationLanguagesStatics;
|
||||
HRESULT hr = GetActivationFactory(
|
||||
HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(),
|
||||
&applicationLanguagesStatics);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = applicationLanguagesStatics->get_Languages(&languageList);
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
// If there is no application context, then use the top language from the user language profile
|
||||
ComPtr<ABI::Windows::System::UserProfile::IGlobalizationPreferencesStatics> globalizationPreferencesStatics;
|
||||
hr = GetActivationFactory(
|
||||
HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(),
|
||||
&globalizationPreferencesStatics);
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = globalizationPreferencesStatics->get_Languages(&languageList);
|
||||
}
|
||||
}
|
||||
|
||||
// We have a list of languages, ICU knows one, so use the top one for our locale
|
||||
HString topLanguage;
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = languageList->GetAt(0, topLanguage.GetAddressOf());
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
// Unexpected, use en-US by default
|
||||
if (gCorrectedPOSIXLocale == NULL) {
|
||||
gCorrectedPOSIXLocale = "en_US";
|
||||
}
|
||||
|
||||
return gCorrectedPOSIXLocale;
|
||||
}
|
||||
|
||||
// ResolveLocaleName will get a likely subtags form consistent with Windows behavior.
|
||||
int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale));
|
||||
#endif
|
||||
// Now we should have a Windows locale name that needs converted to the POSIX style,
|
||||
if (length > 0)
|
||||
// Now we should have a Windows locale name that needs converted to the POSIX style.
|
||||
if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
|
||||
{
|
||||
// First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
|
||||
char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH];
|
||||
char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
|
||||
|
@ -1858,7 +1803,7 @@ The leftmost codepage (.xxx) wins.
|
|||
}
|
||||
|
||||
// If unable to find a locale we can agree upon, use en-US by default
|
||||
if (gCorrectedPOSIXLocale == NULL) {
|
||||
if (gCorrectedPOSIXLocale == nullptr) {
|
||||
gCorrectedPOSIXLocale = "en_US";
|
||||
}
|
||||
return gCorrectedPOSIXLocale;
|
||||
|
|
|
@ -94,7 +94,7 @@ typedef size_t uintptr_t;
|
|||
# define U_NL_LANGINFO_CODESET CODESET
|
||||
#endif
|
||||
|
||||
#ifdef U_TZSET
|
||||
#if defined(U_TZSET) || defined(U_HAVE_TZSET)
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
// UWP doesn't support tzset or environment variables for tz
|
||||
|
@ -132,7 +132,7 @@ typedef size_t uintptr_t;
|
|||
# define U_TIMEZONE timezone
|
||||
#endif
|
||||
|
||||
#ifdef U_TZNAME
|
||||
#if defined(U_TZNAME) || defined(U_HAVE_TZNAME)
|
||||
/* Use the predefined value. */
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
/* not usable on all windows platforms */
|
||||
|
@ -204,30 +204,18 @@ typedef size_t uintptr_t;
|
|||
|
||||
/**
|
||||
* \def U_HAVE_STD_ATOMICS
|
||||
* Defines whether the standard C++11 <atomic> is available.
|
||||
* ICU will use this when available,
|
||||
* otherwise will fall back to compiler or platform specific alternatives.
|
||||
* Defines whether to use the standard C++11 <atomic> functions
|
||||
* If false, ICU will fall back to compiler or platform specific alternatives.
|
||||
* Note: support for these fall back options for atomics will be removed in a future version
|
||||
* of ICU, and the use of C++ 11 atomics will be required.
|
||||
* @internal
|
||||
*/
|
||||
#ifdef U_HAVE_STD_ATOMICS
|
||||
/* Use the predefined value. */
|
||||
#elif U_CPLUSPLUS_VERSION < 11
|
||||
/* Not C++11, disable use of atomics */
|
||||
# define U_HAVE_STD_ATOMICS 0
|
||||
#elif __clang__ && __clang_major__==3 && __clang_minor__<=1
|
||||
/* Clang 3.1, has atomic variable initializer bug. */
|
||||
# define U_HAVE_STD_ATOMICS 0
|
||||
#else
|
||||
/* U_HAVE_ATOMIC is typically set by an autoconf test of #include <atomic> */
|
||||
/* Can be set manually, or left undefined, on platforms without autoconf. */
|
||||
# if defined(U_HAVE_ATOMIC) && U_HAVE_ATOMIC
|
||||
# define U_HAVE_STD_ATOMICS 1
|
||||
# else
|
||||
# define U_HAVE_STD_ATOMICS 0
|
||||
# endif
|
||||
#else
|
||||
# define U_HAVE_STD_ATOMICS 1
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \def U_HAVE_CLANG_ATOMICS
|
||||
* Defines whether Clang c11 style built-in atomics are available.
|
||||
|
@ -586,6 +574,49 @@ U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base);
|
|||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Pin a buffer capacity such that doing pointer arithmetic
|
||||
* on the destination pointer and capacity cannot overflow.
|
||||
*
|
||||
* The pinned capacity must fulfill the following conditions (for positive capacities):
|
||||
* - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
|
||||
* - (dest + capacity) >= dest
|
||||
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
|
||||
*
|
||||
* @param dest the destination buffer pointer.
|
||||
* @param capacity the requested buffer capacity, in units of type T.
|
||||
* @return the pinned capacity.
|
||||
* @internal
|
||||
*/
|
||||
template <typename T>
|
||||
inline int32_t pinCapacity(T *dest, int32_t capacity) {
|
||||
if (capacity <= 0) { return capacity; }
|
||||
|
||||
uintptr_t destInt = (uintptr_t)dest;
|
||||
uintptr_t maxInt;
|
||||
|
||||
# if U_PLATFORM == U_PF_OS390 && !defined(_LP64)
|
||||
// We have 31-bit pointers.
|
||||
maxInt = 0x7fffffff;
|
||||
# elif U_PLATFORM == U_PF_OS400
|
||||
maxInt = (uintptr_t)uprv_maximumPtr((void *)dest);
|
||||
# else
|
||||
maxInt = destInt + 0x7fffffffu;
|
||||
if (maxInt < destInt) {
|
||||
// Less than 2GB to the end of the address space.
|
||||
// Pin to that to prevent address overflow.
|
||||
maxInt = (uintptr_t)-1;
|
||||
}
|
||||
# endif
|
||||
|
||||
uintptr_t maxBytes = maxInt - destInt; // max. 2GB
|
||||
int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T));
|
||||
return capacity <= maxCapacity ? capacity : maxCapacity;
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
/* Dynamic Library Functions */
|
||||
|
||||
typedef void (UVoidFunction)(void);
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
|
@ -628,7 +630,7 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
|||
// or on a trail byte if the input is UTF-8.
|
||||
|
||||
utext_setNativeIndex(&fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(&fText);
|
||||
int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fBreakCache->preceding(adjustedOffset, status);
|
||||
|
@ -655,7 +657,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
|
|||
// But we still need the side effect of leaving iteration at the following boundary.
|
||||
|
||||
utext_setNativeIndex(&fText, offset);
|
||||
int32_t adjustedOffset = utext_getNativeIndex(&fText);
|
||||
int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText));
|
||||
|
||||
bool result = false;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -848,7 +850,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
|||
|
||||
#ifdef RBBI_DEBUG
|
||||
if (gTrace) {
|
||||
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText));
|
||||
RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText));
|
||||
if (0x20<=c && c<0x7f) {
|
||||
RBBIDebugPrintf("\"%c\" ", c);
|
||||
} else {
|
||||
|
|
|
@ -603,7 +603,7 @@ void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t
|
|||
fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1.
|
||||
}
|
||||
fBoundaries[nextIdx] = position;
|
||||
fStatuses[nextIdx] = ruleStatusIdx;
|
||||
fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
|
||||
fEndBufIdx = nextIdx;
|
||||
if (update == UpdateCachePosition) {
|
||||
// Set current position to the newly added boundary.
|
||||
|
@ -631,7 +631,7 @@ bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t
|
|||
fEndBufIdx = modChunkSize(fEndBufIdx - 1);
|
||||
}
|
||||
fBoundaries[nextIdx] = position;
|
||||
fStatuses[nextIdx] = ruleStatusIdx;
|
||||
fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
|
||||
fStartBufIdx = nextIdx;
|
||||
if (update == UpdateCachePosition) {
|
||||
fBufIdx = nextIdx;
|
||||
|
|
|
@ -303,17 +303,24 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
|
|||
}
|
||||
|
||||
void RBBIRuleBuilder::optimizeTables() {
|
||||
bool didSomething;
|
||||
do {
|
||||
didSomething = false;
|
||||
|
||||
// Begin looking for duplicates with char class 3.
|
||||
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
|
||||
// and should not have other categories merged into them.
|
||||
IntPair duplPair = {3, 0};
|
||||
// Begin looking for duplicates with char class 3.
|
||||
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
|
||||
// and should not have other categories merged into them.
|
||||
IntPair duplPair = {3, 0};
|
||||
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
|
||||
fSetBuilder->mergeCategories(duplPair);
|
||||
fForwardTable->removeColumn(duplPair.second);
|
||||
didSomething = true;
|
||||
}
|
||||
|
||||
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
|
||||
fSetBuilder->mergeCategories(duplPair);
|
||||
fForwardTable->removeColumn(duplPair.second);
|
||||
}
|
||||
fForwardTable->removeDuplicateStates();
|
||||
while (fForwardTable->removeDuplicateStates() > 0) {
|
||||
didSomething = true;
|
||||
}
|
||||
} while (didSomething);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -380,7 +380,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
|||
// with the current rule expression (on the Node Stack)
|
||||
// with the resulting OR expression going to *destRules
|
||||
//
|
||||
RBBINode *thisRule = fNodeStack[fNodeStackPtr];
|
||||
thisRule = fNodeStack[fNodeStackPtr];
|
||||
RBBINode *prevRules = *destRules;
|
||||
RBBINode *orNode = pushNewNode(RBBINode::opOr);
|
||||
if (U_FAILURE(*fRB->fStatus)) {
|
||||
|
|
|
@ -428,8 +428,8 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) {
|
|||
addRuleRootNodes(&ruleRootNodes, tree);
|
||||
|
||||
UVector matchStartNodes(*fStatus);
|
||||
for (int i=0; i<ruleRootNodes.size(); ++i) {
|
||||
RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(i));
|
||||
for (int j=0; j<ruleRootNodes.size(); ++j) {
|
||||
RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
|
||||
if (node->fChainIn) {
|
||||
setAdd(&matchStartNodes, node->fFirstPosSet);
|
||||
}
|
||||
|
@ -1082,21 +1082,22 @@ bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
|
|||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
uint16_t table_base;
|
||||
uint16_t table_dupl;
|
||||
for (; categories->first < numCols-1; categories->first++) {
|
||||
for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
|
||||
for (int32_t state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
|
||||
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
|
||||
if (table_base != table_dupl) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (table_base == table_dupl) {
|
||||
return true;
|
||||
}
|
||||
// Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
|
||||
uint16_t table_base = 0;
|
||||
uint16_t table_dupl = 1;
|
||||
for (int32_t state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
|
||||
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
|
||||
if (table_base != table_dupl) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (table_base == table_dupl) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -1236,7 +1237,7 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) {
|
|||
} else if (existingVal > duplState) {
|
||||
newVal = existingVal - 1;
|
||||
}
|
||||
sd->setCharAt(col, newVal);
|
||||
sd->setCharAt(col, static_cast<char16_t>(newVal));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1245,12 +1246,16 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) {
|
|||
/*
|
||||
* RemoveDuplicateStates
|
||||
*/
|
||||
void RBBITableBuilder::removeDuplicateStates() {
|
||||
int32_t RBBITableBuilder::removeDuplicateStates() {
|
||||
IntPair dupls = {3, 0};
|
||||
int32_t numStatesRemoved = 0;
|
||||
|
||||
while (findDuplicateState(&dupls)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
|
||||
removeState(dupls);
|
||||
++numStatesRemoved;
|
||||
}
|
||||
return numStatesRemoved;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1411,7 +1416,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
|
|||
UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
|
||||
for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
|
||||
// Note: +2 for the start & stop state.
|
||||
startState.setCharAt(charClass, charClass+2);
|
||||
startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
|
||||
}
|
||||
|
||||
// Initially make every other state table row look like the start state row,
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
#define RBBITBLB_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "rbbirb.h"
|
||||
|
@ -66,8 +69,11 @@ public:
|
|||
*/
|
||||
void removeColumn(int32_t column);
|
||||
|
||||
/** Check for, and remove dupicate states (table rows). */
|
||||
void removeDuplicateStates();
|
||||
/**
|
||||
* Check for, and remove dupicate states (table rows).
|
||||
* @return the number of states removed.
|
||||
*/
|
||||
int32_t removeDuplicateStates();
|
||||
|
||||
/** Build the safe reverse table from the already-constructed forward table. */
|
||||
void buildSafeReverseTable(UErrorCode &status);
|
||||
|
@ -204,4 +210,7 @@ private:
|
|||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -702,9 +702,9 @@ ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const
|
|||
}
|
||||
|
||||
// fallback
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
status = U_ZERO_ERROR;
|
||||
ICUServiceKey* fallbackKey = createKey(&id, status);
|
||||
while (fallbackKey->fallback()) {
|
||||
while (fallbackKey != NULL && fallbackKey->fallback()) {
|
||||
UnicodeString us;
|
||||
fallbackKey->currentID(us);
|
||||
f = (ICUServiceFactory*)map->get(us);
|
||||
|
|
|
@ -104,7 +104,7 @@ public:
|
|||
/**
|
||||
* Deletes this object if it has no references.
|
||||
* Available for non-cached SharedObjects only. Ownership of cached objects
|
||||
* is with the UnifiedCache, which is soley responsible for eviction and deletion.
|
||||
* is with the UnifiedCache, which is solely responsible for eviction and deletion.
|
||||
*/
|
||||
void deleteIfZeroRefCount() const;
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ UnicodeSet* gUnicodeSets[COUNT] = {};
|
|||
|
||||
// Save the empty instance in static memory to have well-defined behavior if a
|
||||
// regular UnicodeSet cannot be allocated.
|
||||
alignas(UnicodeSet)
|
||||
char gEmptyUnicodeSet[sizeof(UnicodeSet)];
|
||||
|
||||
// Whether the gEmptyUnicodeSet is initialized and ready to use.
|
||||
|
|
|
@ -373,7 +373,7 @@ StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
|
|||
return newNode;
|
||||
}
|
||||
|
||||
UBool
|
||||
int32_t
|
||||
StringTrieBuilder::hashNode(const void *node) {
|
||||
return ((const Node *)node)->hashCode();
|
||||
}
|
||||
|
|
|
@ -624,7 +624,7 @@ getDirProps(UBiDi *pBiDi) {
|
|||
pBiDi->paras[pBiDi->paraCount-1].level=1;
|
||||
}
|
||||
if(isDefaultLevel) {
|
||||
pBiDi->paraLevel=pBiDi->paras[0].level;
|
||||
pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
|
||||
}
|
||||
/* The following is needed to resolve the text direction for default level
|
||||
paragraphs containing no strong character */
|
||||
|
@ -825,28 +825,28 @@ bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
|
|||
N0c1. */
|
||||
|
||||
if((direction==0 && pOpening->flags&FOUND_L) ||
|
||||
(direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
|
||||
newProp=direction;
|
||||
(direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
|
||||
newProp=static_cast<DirProp>(direction);
|
||||
}
|
||||
else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
|
||||
else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
|
||||
/* it is stable if there is no containing pair or in
|
||||
conditions too complicated and not worth checking */
|
||||
stable=(openIdx==pLastIsoRun->start);
|
||||
if(direction!=pOpening->contextDir)
|
||||
newProp=pOpening->contextDir; /* N0c1 */
|
||||
newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */
|
||||
else
|
||||
newProp=direction; /* N0c2 */
|
||||
newProp= static_cast<DirProp>(direction); /* N0c2 */
|
||||
} else {
|
||||
/* forget this and any brackets nested within this pair */
|
||||
pLastIsoRun->limit=openIdx;
|
||||
return ON; /* N0d */
|
||||
pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
|
||||
return ON; /* N0d */
|
||||
}
|
||||
bd->pBiDi->dirProps[pOpening->position]=newProp;
|
||||
bd->pBiDi->dirProps[position]=newProp;
|
||||
/* Update nested N0c pairs that may be affected */
|
||||
fixN0c(bd, openIdx, pOpening->position, newProp);
|
||||
if(stable) {
|
||||
pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
|
||||
pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
|
||||
/* remove lower located synonyms if any */
|
||||
while(pLastIsoRun->limit>pLastIsoRun->start &&
|
||||
bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
|
||||
|
@ -918,7 +918,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
|
|||
bracket or it is a case of N0d */
|
||||
/* Now see if it is an opening bracket */
|
||||
if(c)
|
||||
match=u_getBidiPairedBracket(c); /* get the matching char */
|
||||
match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */
|
||||
else
|
||||
match=0;
|
||||
if(match!=c && /* has a matching char */
|
||||
|
@ -948,7 +948,7 @@ bracketProcessChar(BracketData *bd, int32_t position) {
|
|||
pLastIsoRun->contextPos=position;
|
||||
}
|
||||
else if(dirProp<=R || dirProp==AL) {
|
||||
newProp=DIR_FROM_STRONG(dirProp);
|
||||
newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
|
||||
pLastIsoRun->lastBase=dirProp;
|
||||
pLastIsoRun->lastStrong=dirProp;
|
||||
pLastIsoRun->contextDir=(UBiDiDirection)newProp;
|
||||
|
@ -1101,7 +1101,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
|||
else
|
||||
start=pBiDi->paras[paraIndex-1].limit;
|
||||
limit=pBiDi->paras[paraIndex].limit;
|
||||
level=pBiDi->paras[paraIndex].level;
|
||||
level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
|
||||
for(i=start; i<limit; i++)
|
||||
levels[i]=level;
|
||||
}
|
||||
|
@ -1119,7 +1119,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
|||
else
|
||||
start=pBiDi->paras[paraIndex-1].limit;
|
||||
limit=pBiDi->paras[paraIndex].limit;
|
||||
level=pBiDi->paras[paraIndex].level;
|
||||
level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
|
||||
for(i=start; i<limit; i++) {
|
||||
levels[i]=level;
|
||||
dirProp=dirProps[i];
|
||||
|
@ -2827,7 +2827,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
|
|||
DirProp dirProp;
|
||||
for(i=0; i<pBiDi->paraCount; i++) {
|
||||
last=(pBiDi->paras[i].limit)-1;
|
||||
level=pBiDi->paras[i].level;
|
||||
level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
|
||||
if(level==0)
|
||||
continue; /* LTR paragraph */
|
||||
start= i==0 ? 0 : pBiDi->paras[i-1].limit;
|
||||
|
|
|
@ -146,7 +146,7 @@ static UBool
|
|||
action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode)
|
||||
{
|
||||
ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize,
|
||||
pTransform->reorderingOptions, pErrorCode);
|
||||
static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode);
|
||||
|
||||
*pTransform->pDestLength = pTransform->srcLength;
|
||||
pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT;
|
||||
|
@ -393,9 +393,9 @@ resolveBaseDirection(const UChar *text, uint32_t length,
|
|||
switch (*pInLevel) {
|
||||
case UBIDI_DEFAULT_LTR:
|
||||
case UBIDI_DEFAULT_RTL: {
|
||||
UBiDiLevel level = ubidi_getBaseDirection(text, length);
|
||||
*pInLevel = level != UBIDI_NEUTRAL ? level
|
||||
: *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR;
|
||||
UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length));
|
||||
*pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level
|
||||
: *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
@ -45,6 +45,7 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_CURRENCY,
|
||||
UCLN_COMMON_LOADED_NORMALIZER2,
|
||||
UCLN_COMMON_NORMALIZER2,
|
||||
UCLN_COMMON_CHARACTERPROPERTIES,
|
||||
UCLN_COMMON_USET,
|
||||
UCLN_COMMON_UNAMES,
|
||||
UCLN_COMMON_UPROPS,
|
||||
|
@ -52,7 +53,6 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_UCNV_IO,
|
||||
UCLN_COMMON_UDATA,
|
||||
UCLN_COMMON_PUTIL,
|
||||
UCLN_COMMON_LIST_FORMATTER,
|
||||
UCLN_COMMON_UINIT,
|
||||
|
||||
/*
|
||||
|
|
|
@ -1743,13 +1743,9 @@ ucnv_fromUChars(UConverter *cnv,
|
|||
}
|
||||
if(srcLength>0) {
|
||||
srcLimit=src+srcLength;
|
||||
destCapacity=pinCapacity(dest, destCapacity);
|
||||
destLimit=dest+destCapacity;
|
||||
|
||||
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
|
||||
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
|
||||
destLimit=(char *)U_MAX_PTR(dest);
|
||||
}
|
||||
|
||||
/* perform the conversion */
|
||||
ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
|
||||
destLength=(int32_t)(dest-originalDest);
|
||||
|
@ -1803,13 +1799,9 @@ ucnv_toUChars(UConverter *cnv,
|
|||
}
|
||||
if(srcLength>0) {
|
||||
srcLimit=src+srcLength;
|
||||
destCapacity=pinCapacity(dest, destCapacity);
|
||||
destLimit=dest+destCapacity;
|
||||
|
||||
/* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
|
||||
if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
|
||||
destLimit=(UChar *)U_MAX_PTR(dest);
|
||||
}
|
||||
|
||||
/* perform the conversion */
|
||||
ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
|
||||
destLength=(int32_t)(dest-originalDest);
|
||||
|
|
|
@ -2772,7 +2772,7 @@ getTrailByte:
|
|||
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
|
||||
++mySource;
|
||||
/* add another bit so that the code below writes 2 bytes in case of error */
|
||||
mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
|
||||
mySourceChar = static_cast<UChar>(0x10000 | (mySourceChar << 8) | trailByte);
|
||||
}
|
||||
} else {
|
||||
args->converter->toUBytes[0] = (uint8_t)mySourceChar;
|
||||
|
@ -3304,7 +3304,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
|
|||
myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
|
||||
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
|
||||
args->converter->toUCallbackReason = UCNV_IRREGULAR;
|
||||
args->converter->toUBytes[0] = mySourceChar;
|
||||
args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
|
||||
args->converter->toULength = 1;
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
|
|
|
@ -180,7 +180,7 @@ _CompoundTextgetName(const UConverter* cnv);
|
|||
|
||||
|
||||
static int32_t findNextEsc(const char *source, const char *sourceLimit) {
|
||||
int32_t length = sourceLimit - source;
|
||||
int32_t length = static_cast<int32_t>(sourceLimit - source);
|
||||
int32_t i;
|
||||
for (i = 1; i < length; i++) {
|
||||
if (*(source + i) == 0x1B) {
|
||||
|
|
|
@ -71,7 +71,7 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ (char)0xfe, (char)0xff };
|
||||
static const char bom[]={ (char)0xfeu, (char)0xffu };
|
||||
ucnv_fromUWriteBytes(cnv,
|
||||
bom, 2,
|
||||
&pArgs->target, pArgs->targetLimit,
|
||||
|
@ -672,7 +672,7 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ (char)0xff, (char)0xfe };
|
||||
static const char bom[]={ (char)0xffu, (char)0xfeu };
|
||||
ucnv_fromUWriteBytes(cnv,
|
||||
bom, 2,
|
||||
&pArgs->target, pArgs->targetLimit,
|
||||
|
|
|
@ -228,7 +228,7 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
|
||||
static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
|
||||
ucnv_fromUWriteBytes(args->converter,
|
||||
bom, 4,
|
||||
&args->target, args->targetLimit,
|
||||
|
@ -331,7 +331,7 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
|
||||
static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu };
|
||||
ucnv_fromUWriteBytes(args->converter,
|
||||
bom, 4,
|
||||
&args->target, args->targetLimit,
|
||||
|
@ -706,7 +706,7 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
|
||||
static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
|
||||
ucnv_fromUWriteBytes(args->converter,
|
||||
bom, 4,
|
||||
&args->target, args->targetLimit,
|
||||
|
@ -817,7 +817,7 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
|
|||
|
||||
/* write the BOM if necessary */
|
||||
if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
|
||||
static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
|
||||
static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 };
|
||||
ucnv_fromUWriteBytes(args->converter,
|
||||
bom, 4,
|
||||
&args->target, args->targetLimit,
|
||||
|
@ -1043,7 +1043,7 @@ _UTF32Open(UConverter *cnv,
|
|||
_UTF32Reset(cnv, UCNV_RESET_BOTH);
|
||||
}
|
||||
|
||||
static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 };
|
||||
static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 };
|
||||
|
||||
static void U_CALLCONV
|
||||
_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
||||
|
@ -1071,7 +1071,7 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
|||
b=*source;
|
||||
if(b==0) {
|
||||
state=1; /* could be 00 00 FE FF */
|
||||
} else if(b==(char)0xff) {
|
||||
} else if(b==(char)0xffu) {
|
||||
state=5; /* could be FF FE 00 00 */
|
||||
} else {
|
||||
state=8; /* default to UTF-32BE */
|
||||
|
|
|
@ -108,7 +108,7 @@ morebytes:
|
|||
if (mySource < sourceLimit)
|
||||
{
|
||||
toUBytes[i] = (char) (ch2 = *mySource);
|
||||
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
|
||||
if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
|
||||
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
|
||||
{
|
||||
break; /* i < inBytes */
|
||||
|
@ -225,7 +225,7 @@ morebytes:
|
|||
if (mySource < sourceLimit)
|
||||
{
|
||||
toUBytes[i] = (char) (ch2 = *mySource);
|
||||
if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) &&
|
||||
if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) &&
|
||||
!(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2)))
|
||||
{
|
||||
break; /* i < inBytes */
|
||||
|
|
|
@ -199,7 +199,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
|
|||
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
|
||||
args->converter->toUCallbackReason = UCNV_IRREGULAR;
|
||||
args->converter->toUBytes[0] = UCNV_TILDE;
|
||||
args->converter->toUBytes[1] = mySourceChar;
|
||||
args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
|
||||
args->converter->toULength = 2;
|
||||
args->target = myTarget;
|
||||
args->source = mySource;
|
||||
|
@ -229,7 +229,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
|
|||
--mySource;
|
||||
} else {
|
||||
/* Include the current byte in the illegal sequence. */
|
||||
args->converter->toUBytes[1] = mySourceChar;
|
||||
args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
|
||||
args->converter->toULength = 2;
|
||||
}
|
||||
args->target = myTarget;
|
||||
|
|
|
@ -4164,8 +4164,8 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
|||
nextSourceIndex=0;
|
||||
|
||||
/* Get the SI/SO character for the converter */
|
||||
siLength = getSISOBytes(SI, cnv->options, siBytes);
|
||||
soLength = getSISOBytes(SO, cnv->options, soBytes);
|
||||
siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes));
|
||||
soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes));
|
||||
|
||||
/* conversion loop */
|
||||
/*
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include "propsvec.h"
|
||||
#include "uassert.h"
|
||||
#include "ucmndata.h"
|
||||
#include "udataswp.h"
|
||||
#include "uenumimp.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -72,7 +73,7 @@ static void generateSelectorData(UConverterSelector* result,
|
|||
// set errorValue to all-ones
|
||||
for (int32_t col = 0; col < columns; col++) {
|
||||
upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
|
||||
col, ~0, ~0, status);
|
||||
col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < result->encodingsCount; ++i) {
|
||||
|
@ -109,7 +110,7 @@ static void generateSelectorData(UConverterSelector* result,
|
|||
// this will be reached for the converters that fill the set with
|
||||
// strings. Those should be ignored by our system
|
||||
} else {
|
||||
upvec_setValue(upvec, start_char, end_char, column, ~0, mask,
|
||||
upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask,
|
||||
status);
|
||||
}
|
||||
}
|
||||
|
@ -130,7 +131,7 @@ static void generateSelectorData(UConverterSelector* result,
|
|||
uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
|
||||
status);
|
||||
for (int32_t col = 0; col < columns; col++) {
|
||||
upvec_setValue(upvec, start_char, end_char, col, ~0, ~0,
|
||||
upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0),
|
||||
status);
|
||||
}
|
||||
}
|
||||
|
@ -684,7 +685,7 @@ static int16_t countOnes(uint32_t* mask, int32_t len) {
|
|||
ent &= ent - 1; // clear the least significant bit set
|
||||
}
|
||||
}
|
||||
return totalOnes;
|
||||
return static_cast<int16_t>(totalOnes);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -28,81 +28,6 @@
|
|||
|
||||
/* swapping ----------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* This performs data swapping for a folded trie (see utrie.c for details).
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UTrieHeader *inTrie;
|
||||
UTrieHeader trie;
|
||||
int32_t size;
|
||||
UBool dataIs32;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* setup and swapping */
|
||||
if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inTrie=(const UTrieHeader *)inData;
|
||||
trie.signature=ds->readUInt32(inTrie->signature);
|
||||
trie.options=ds->readUInt32(inTrie->options);
|
||||
trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
|
||||
trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
|
||||
|
||||
if( trie.signature!=0x54726965 ||
|
||||
(trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
|
||||
((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
|
||||
trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
|
||||
(trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
|
||||
trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
|
||||
(trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
|
||||
((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
|
||||
) {
|
||||
*pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
|
||||
return 0;
|
||||
}
|
||||
|
||||
dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
|
||||
size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
|
||||
|
||||
if(length>=0) {
|
||||
UTrieHeader *outTrie;
|
||||
|
||||
if(length<size) {
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
outTrie=(UTrieHeader *)outData;
|
||||
|
||||
/* swap the header */
|
||||
ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
|
||||
|
||||
/* swap the index and the data */
|
||||
if(dataIs32) {
|
||||
ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
|
||||
ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
|
||||
(uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
|
||||
} else {
|
||||
ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_COLLATION
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
|
|
@ -0,0 +1,590 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucptrie.cpp (modified from utrie2.cpp)
|
||||
// created: 2017dec29 Markus W. Scherer
|
||||
|
||||
// #define UCPTRIE_DEBUG
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucptrie.h"
|
||||
#include "unicode/utf.h"
|
||||
#include "unicode/utf8.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "cmemory.h"
|
||||
#include "uassert.h"
|
||||
#include "ucptrie_impl.h"
|
||||
|
||||
U_CAPI UCPTrie * U_EXPORT2
|
||||
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
|
||||
const void *data, int32_t length, int32_t *pActualLength,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) ||
|
||||
type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type ||
|
||||
valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Enough data for a trie header?
|
||||
if (length < (int32_t)sizeof(UCPTrieHeader)) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Check the signature.
|
||||
const UCPTrieHeader *header = (const UCPTrieHeader *)data;
|
||||
if (header->signature != UCPTRIE_SIG) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int32_t options = header->options;
|
||||
int32_t typeInt = (options >> 6) & 3;
|
||||
int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK;
|
||||
if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 ||
|
||||
(options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
UCPTrieType actualType = (UCPTrieType)typeInt;
|
||||
UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt;
|
||||
if (type < 0) {
|
||||
type = actualType;
|
||||
}
|
||||
if (valueWidth < 0) {
|
||||
valueWidth = actualValueWidth;
|
||||
}
|
||||
if (type != actualType || valueWidth != actualValueWidth) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Get the length values and offsets.
|
||||
UCPTrie tempTrie;
|
||||
uprv_memset(&tempTrie, 0, sizeof(tempTrie));
|
||||
tempTrie.indexLength = header->indexLength;
|
||||
tempTrie.dataLength =
|
||||
((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength;
|
||||
tempTrie.index3NullOffset = header->index3NullOffset;
|
||||
tempTrie.dataNullOffset =
|
||||
((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset;
|
||||
|
||||
tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2;
|
||||
tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12;
|
||||
tempTrie.type = type;
|
||||
tempTrie.valueWidth = valueWidth;
|
||||
|
||||
// Calculate the actual length.
|
||||
int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2;
|
||||
if (valueWidth == UCPTRIE_VALUE_BITS_16) {
|
||||
actualLength += tempTrie.dataLength * 2;
|
||||
} else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
|
||||
actualLength += tempTrie.dataLength * 4;
|
||||
} else {
|
||||
actualLength += tempTrie.dataLength;
|
||||
}
|
||||
if (length < actualLength) {
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Allocate the trie.
|
||||
UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie));
|
||||
if (trie == nullptr) {
|
||||
*pErrorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
trie->name = "fromSerialized";
|
||||
#endif
|
||||
|
||||
// Set the pointers to its index and data arrays.
|
||||
const uint16_t *p16 = (const uint16_t *)(header + 1);
|
||||
trie->index = p16;
|
||||
p16 += trie->indexLength;
|
||||
|
||||
// Get the data.
|
||||
int32_t nullValueOffset = trie->dataNullOffset;
|
||||
if (nullValueOffset >= trie->dataLength) {
|
||||
nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
|
||||
}
|
||||
switch (valueWidth) {
|
||||
case UCPTRIE_VALUE_BITS_16:
|
||||
trie->data.ptr16 = p16;
|
||||
trie->nullValue = trie->data.ptr16[nullValueOffset];
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_32:
|
||||
trie->data.ptr32 = (const uint32_t *)p16;
|
||||
trie->nullValue = trie->data.ptr32[nullValueOffset];
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_8:
|
||||
trie->data.ptr8 = (const uint8_t *)p16;
|
||||
trie->nullValue = trie->data.ptr8[nullValueOffset];
|
||||
break;
|
||||
default:
|
||||
// Unreachable because valueWidth was checked above.
|
||||
*pErrorCode = U_INVALID_FORMAT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (pActualLength != nullptr) {
|
||||
*pActualLength = actualLength;
|
||||
}
|
||||
return trie;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucptrie_close(UCPTrie *trie) {
|
||||
uprv_free(trie);
|
||||
}
|
||||
|
||||
U_CAPI UCPTrieType U_EXPORT2
|
||||
ucptrie_getType(const UCPTrie *trie) {
|
||||
return (UCPTrieType)trie->type;
|
||||
}
|
||||
|
||||
U_CAPI UCPTrieValueWidth U_EXPORT2
|
||||
ucptrie_getValueWidth(const UCPTrie *trie) {
|
||||
return (UCPTrieValueWidth)trie->valueWidth;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) {
|
||||
int32_t i1 = c >> UCPTRIE_SHIFT_1;
|
||||
if (trie->type == UCPTRIE_TYPE_FAST) {
|
||||
U_ASSERT(0xffff < c && c < trie->highStart);
|
||||
i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
|
||||
} else {
|
||||
U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
|
||||
i1 += UCPTRIE_SMALL_INDEX_LENGTH;
|
||||
}
|
||||
int32_t i3Block = trie->index[
|
||||
(int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
|
||||
int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
|
||||
int32_t dataBlock;
|
||||
if ((i3Block & 0x8000) == 0) {
|
||||
// 16-bit indexes
|
||||
dataBlock = trie->index[i3Block + i3];
|
||||
} else {
|
||||
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
|
||||
i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
|
||||
i3 &= 7;
|
||||
dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000;
|
||||
dataBlock |= trie->index[i3Block + i3];
|
||||
}
|
||||
return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) {
|
||||
UChar32 c = (lt1 << 12) | (t2 << 6) | t3;
|
||||
if (c >= trie->highStart) {
|
||||
// Possible because the UTF-8 macro compares with shifted12HighStart which may be higher.
|
||||
return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
|
||||
}
|
||||
return ucptrie_internalSmallIndex(trie, c);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
|
||||
const uint8_t *start, const uint8_t *src) {
|
||||
int32_t i, length;
|
||||
// Support 64-bit pointers by avoiding cast of arbitrary difference.
|
||||
if ((src - start) <= 7) {
|
||||
i = length = (int32_t)(src - start);
|
||||
} else {
|
||||
i = length = 7;
|
||||
start = src - 7;
|
||||
}
|
||||
c = utf8_prevCharSafeBody(start, 0, &i, c, -1);
|
||||
i = length - i; // Number of bytes read backward from src.
|
||||
int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c);
|
||||
return (idx << 3) | i;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) {
|
||||
switch (valueWidth) {
|
||||
case UCPTRIE_VALUE_BITS_16:
|
||||
return data.ptr16[dataIndex];
|
||||
case UCPTRIE_VALUE_BITS_32:
|
||||
return data.ptr32[dataIndex];
|
||||
case UCPTRIE_VALUE_BITS_8:
|
||||
return data.ptr8[dataIndex];
|
||||
default:
|
||||
// Unreachable if the trie is properly initialized.
|
||||
return 0xffffffff;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucptrie_get(const UCPTrie *trie, UChar32 c) {
|
||||
int32_t dataIndex;
|
||||
if ((uint32_t)c <= 0x7f) {
|
||||
// linear ASCII
|
||||
dataIndex = c;
|
||||
} else {
|
||||
UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX;
|
||||
dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c);
|
||||
}
|
||||
return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int32_t MAX_UNICODE = 0x10ffff;
|
||||
|
||||
inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue,
|
||||
UCPMapValueFilter *filter, const void *context) {
|
||||
if (value == trieNullValue) {
|
||||
value = nullValue;
|
||||
} else if (filter != nullptr) {
|
||||
value = filter(context, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
UChar32 getRange(const void *t, UChar32 start,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||
if ((uint32_t)start > MAX_UNICODE) {
|
||||
return U_SENTINEL;
|
||||
}
|
||||
const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t);
|
||||
UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
|
||||
if (start >= trie->highStart) {
|
||||
if (pValue != nullptr) {
|
||||
int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
|
||||
uint32_t value = getValue(trie->data, valueWidth, di);
|
||||
if (filter != nullptr) { value = filter(context, value); }
|
||||
*pValue = value;
|
||||
}
|
||||
return MAX_UNICODE;
|
||||
}
|
||||
|
||||
uint32_t nullValue = trie->nullValue;
|
||||
if (filter != nullptr) { nullValue = filter(context, nullValue); }
|
||||
const uint16_t *index = trie->index;
|
||||
|
||||
int32_t prevI3Block = -1;
|
||||
int32_t prevBlock = -1;
|
||||
UChar32 c = start;
|
||||
uint32_t value;
|
||||
bool haveValue = false;
|
||||
do {
|
||||
int32_t i3Block;
|
||||
int32_t i3;
|
||||
int32_t i3BlockLength;
|
||||
int32_t dataBlockLength;
|
||||
if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) {
|
||||
i3Block = 0;
|
||||
i3 = c >> UCPTRIE_FAST_SHIFT;
|
||||
i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ?
|
||||
UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
|
||||
dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
|
||||
} else {
|
||||
// Use the multi-stage index.
|
||||
int32_t i1 = c >> UCPTRIE_SHIFT_1;
|
||||
if (trie->type == UCPTRIE_TYPE_FAST) {
|
||||
U_ASSERT(0xffff < c && c < trie->highStart);
|
||||
i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH;
|
||||
} else {
|
||||
U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT);
|
||||
i1 += UCPTRIE_SMALL_INDEX_LENGTH;
|
||||
}
|
||||
i3Block = trie->index[
|
||||
(int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)];
|
||||
if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) {
|
||||
// The index-3 block is the same as the previous one, and filled with value.
|
||||
U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
|
||||
c += UCPTRIE_CP_PER_INDEX_2_ENTRY;
|
||||
continue;
|
||||
}
|
||||
prevI3Block = i3Block;
|
||||
if (i3Block == trie->index3NullOffset) {
|
||||
// This is the index-3 null block.
|
||||
if (haveValue) {
|
||||
if (nullValue != value) {
|
||||
return c - 1;
|
||||
}
|
||||
} else {
|
||||
value = nullValue;
|
||||
if (pValue != nullptr) { *pValue = nullValue; }
|
||||
haveValue = true;
|
||||
}
|
||||
prevBlock = trie->dataNullOffset;
|
||||
c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
|
||||
continue;
|
||||
}
|
||||
i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK;
|
||||
i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH;
|
||||
dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
|
||||
}
|
||||
// Enumerate data blocks for one index-3 block.
|
||||
do {
|
||||
int32_t block;
|
||||
if ((i3Block & 0x8000) == 0) {
|
||||
block = index[i3Block + i3];
|
||||
} else {
|
||||
// 18-bit indexes stored in groups of 9 entries per 8 indexes.
|
||||
int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3);
|
||||
int32_t gi = i3 & 7;
|
||||
block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000;
|
||||
block |= index[group + gi];
|
||||
}
|
||||
if (block == prevBlock && (c - start) >= dataBlockLength) {
|
||||
// The block is the same as the previous one, and filled with value.
|
||||
U_ASSERT((c & (dataBlockLength - 1)) == 0);
|
||||
c += dataBlockLength;
|
||||
} else {
|
||||
int32_t dataMask = dataBlockLength - 1;
|
||||
prevBlock = block;
|
||||
if (block == trie->dataNullOffset) {
|
||||
// This is the data null block.
|
||||
if (haveValue) {
|
||||
if (nullValue != value) {
|
||||
return c - 1;
|
||||
}
|
||||
} else {
|
||||
value = nullValue;
|
||||
if (pValue != nullptr) { *pValue = nullValue; }
|
||||
haveValue = true;
|
||||
}
|
||||
c = (c + dataBlockLength) & ~dataMask;
|
||||
} else {
|
||||
int32_t di = block + (c & dataMask);
|
||||
uint32_t value2 = getValue(trie->data, valueWidth, di);
|
||||
value2 = maybeFilterValue(value2, trie->nullValue, nullValue,
|
||||
filter, context);
|
||||
if (haveValue) {
|
||||
if (value2 != value) {
|
||||
return c - 1;
|
||||
}
|
||||
} else {
|
||||
value = value2;
|
||||
if (pValue != nullptr) { *pValue = value; }
|
||||
haveValue = true;
|
||||
}
|
||||
while ((++c & dataMask) != 0) {
|
||||
if (maybeFilterValue(getValue(trie->data, valueWidth, ++di),
|
||||
trie->nullValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (++i3 < i3BlockLength);
|
||||
} while (c < trie->highStart);
|
||||
U_ASSERT(haveValue);
|
||||
int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET;
|
||||
uint32_t highValue = getValue(trie->data, valueWidth, di);
|
||||
if (maybeFilterValue(highValue, trie->nullValue, nullValue,
|
||||
filter, context) != value) {
|
||||
return c - 1;
|
||||
} else {
|
||||
return MAX_UNICODE;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC UChar32
|
||||
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
||||
const void *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||
if (option == UCPMAP_RANGE_NORMAL) {
|
||||
return getRange(trie, start, filter, context, pValue);
|
||||
}
|
||||
uint32_t value;
|
||||
if (pValue == nullptr) {
|
||||
// We need to examine the range value even if the caller does not want it.
|
||||
pValue = &value;
|
||||
}
|
||||
UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff;
|
||||
UChar32 end = getRange(trie, start, filter, context, pValue);
|
||||
if (end < 0xd7ff || start > surrEnd) {
|
||||
return end;
|
||||
}
|
||||
// The range overlaps with surrogates, or ends just before the first one.
|
||||
if (*pValue == surrogateValue) {
|
||||
if (end >= surrEnd) {
|
||||
// Surrogates followed by a non-surrogateValue range,
|
||||
// or surrogates are part of a larger surrogateValue range.
|
||||
return end;
|
||||
}
|
||||
} else {
|
||||
if (start <= 0xd7ff) {
|
||||
return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates.
|
||||
}
|
||||
// Start is a surrogate with a non-surrogateValue code *unit* value.
|
||||
// Return a surrogateValue code *point* range.
|
||||
*pValue = surrogateValue;
|
||||
if (end > surrEnd) {
|
||||
return surrEnd; // Surrogate range ends before non-surrogateValue rest of range.
|
||||
}
|
||||
}
|
||||
// See if the surrogateValue surrogate range can be merged with
|
||||
// an immediately following range.
|
||||
uint32_t value2;
|
||||
UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2);
|
||||
if (value2 == surrogateValue) {
|
||||
return end2;
|
||||
}
|
||||
return surrEnd;
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||
return ucptrie_internalGetRange(getRange, trie, start,
|
||||
option, surrogateValue,
|
||||
filter, context, pValue);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_toBinary(const UCPTrie *trie,
|
||||
void *data, int32_t capacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
UCPTrieType type = (UCPTrieType)trie->type;
|
||||
UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth;
|
||||
if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
|
||||
valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth ||
|
||||
capacity < 0 ||
|
||||
(capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) {
|
||||
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2;
|
||||
switch (valueWidth) {
|
||||
case UCPTRIE_VALUE_BITS_16:
|
||||
length += trie->dataLength * 2;
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_32:
|
||||
length += trie->dataLength * 4;
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_8:
|
||||
length += trie->dataLength;
|
||||
break;
|
||||
default:
|
||||
// unreachable
|
||||
break;
|
||||
}
|
||||
if (capacity < length) {
|
||||
*pErrorCode = U_BUFFER_OVERFLOW_ERROR;
|
||||
return length;
|
||||
}
|
||||
|
||||
char *bytes = (char *)data;
|
||||
UCPTrieHeader *header = (UCPTrieHeader *)bytes;
|
||||
header->signature = UCPTRIE_SIG; // "Tri3"
|
||||
header->options = (uint16_t)(
|
||||
((trie->dataLength & 0xf0000) >> 4) |
|
||||
((trie->dataNullOffset & 0xf0000) >> 8) |
|
||||
(trie->type << 6) |
|
||||
valueWidth);
|
||||
header->indexLength = (uint16_t)trie->indexLength;
|
||||
header->dataLength = (uint16_t)trie->dataLength;
|
||||
header->index3NullOffset = trie->index3NullOffset;
|
||||
header->dataNullOffset = (uint16_t)trie->dataNullOffset;
|
||||
header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2;
|
||||
bytes += sizeof(UCPTrieHeader);
|
||||
|
||||
uprv_memcpy(bytes, trie->index, trie->indexLength * 2);
|
||||
bytes += trie->indexLength * 2;
|
||||
|
||||
switch (valueWidth) {
|
||||
case UCPTRIE_VALUE_BITS_16:
|
||||
uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2);
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_32:
|
||||
uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4);
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_8:
|
||||
uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength);
|
||||
break;
|
||||
default:
|
||||
// unreachable
|
||||
break;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
long countNull(const UCPTrie *trie) {
|
||||
uint32_t nullValue=trie->nullValue;
|
||||
int32_t length=trie->dataLength;
|
||||
long count=0;
|
||||
switch (trie->valueWidth) {
|
||||
case UCPTRIE_VALUE_BITS_16:
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
if(trie->data.ptr16[i]==nullValue) { ++count; }
|
||||
}
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_32:
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
if(trie->data.ptr32[i]==nullValue) { ++count; }
|
||||
}
|
||||
break;
|
||||
case UCPTRIE_VALUE_BITS_8:
|
||||
for(int32_t i=0; i<length; ++i) {
|
||||
if(trie->data.ptr8[i]==nullValue) { ++count; }
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// unreachable
|
||||
break;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
ucptrie_printLengths(const UCPTrie *trie, const char *which) {
|
||||
long indexLength=trie->indexLength;
|
||||
long dataLength=(long)trie->dataLength;
|
||||
long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+
|
||||
dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 :
|
||||
trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1);
|
||||
printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n",
|
||||
which, trie->name, indexLength, dataLength, countNull(trie), totalLength);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
// UCPMap ----
|
||||
// Initially, this is the same as UCPTrie. This may well change.
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucpmap_get(const UCPMap *map, UChar32 c) {
|
||||
return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c);
|
||||
}
|
||||
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
|
||||
return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start,
|
||||
option, surrogateValue,
|
||||
filter, context, pValue);
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucptrie_impl.h (modified from utrie2_impl.h)
|
||||
// created: 2017dec29 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPTRIE_IMPL_H__
|
||||
#define __UCPTRIE_IMPL_H__
|
||||
|
||||
#include "unicode/ucptrie.h"
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
#include "unicode/umutablecptrie.h"
|
||||
#endif
|
||||
|
||||
// UCPTrie signature values, in platform endianness and opposite endianness.
|
||||
// The UCPTrie signature ASCII byte values spell "Tri3".
|
||||
#define UCPTRIE_SIG 0x54726933
|
||||
#define UCPTRIE_OE_SIG 0x33697254
|
||||
|
||||
/**
|
||||
* Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
|
||||
* @internal
|
||||
*/
|
||||
struct UCPTrieHeader {
|
||||
/** "Tri3" in big-endian US-ASCII (0x54726933) */
|
||||
uint32_t signature;
|
||||
|
||||
/**
|
||||
* Options bit field:
|
||||
* Bits 15..12: Data length bits 19..16.
|
||||
* Bits 11..8: Data null block offset bits 19..16.
|
||||
* Bits 7..6: UCPTrieType
|
||||
* Bits 5..3: Reserved (0).
|
||||
* Bits 2..0: UCPTrieValueWidth
|
||||
*/
|
||||
uint16_t options;
|
||||
|
||||
/** Total length of the index tables. */
|
||||
uint16_t indexLength;
|
||||
|
||||
/** Data length bits 15..0. */
|
||||
uint16_t dataLength;
|
||||
|
||||
/** Index-3 null block offset, 0x7fff or 0xffff if none. */
|
||||
uint16_t index3NullOffset;
|
||||
|
||||
/** Data null block offset bits 15..0, 0xfffff if none. */
|
||||
uint16_t dataNullOffset;
|
||||
|
||||
/**
|
||||
* First code point of the single-value range ending with U+10ffff,
|
||||
* rounded up and then shifted right by UCPTRIE_SHIFT_2.
|
||||
*/
|
||||
uint16_t shiftedHighStart;
|
||||
};
|
||||
|
||||
/**
|
||||
* Constants for use with UCPTrieHeader.options.
|
||||
* @internal
|
||||
*/
|
||||
enum {
|
||||
UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
|
||||
UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
|
||||
UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
|
||||
UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
|
||||
/**
|
||||
* Value for index3NullOffset which indicates that there is no index-3 null block.
|
||||
* Bit 15 is unused for this value because this bit is used if the index-3 contains
|
||||
* 18-bit indexes.
|
||||
*/
|
||||
UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
|
||||
UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
|
||||
};
|
||||
|
||||
// Internal constants.
|
||||
enum {
|
||||
/** The length of the BMP index table. 1024=0x400 */
|
||||
UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
|
||||
|
||||
UCPTRIE_SMALL_LIMIT = 0x1000,
|
||||
UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
|
||||
|
||||
/** Shift size for getting the index-3 table offset. */
|
||||
UCPTRIE_SHIFT_3 = 4,
|
||||
|
||||
/** Shift size for getting the index-2 table offset. */
|
||||
UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
|
||||
|
||||
/** Shift size for getting the index-1 table offset. */
|
||||
UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
|
||||
|
||||
/**
|
||||
* Difference between two shift sizes,
|
||||
* for getting an index-2 offset from an index-3 offset. 5=9-4
|
||||
*/
|
||||
UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
|
||||
|
||||
/**
|
||||
* Difference between two shift sizes,
|
||||
* for getting an index-1 offset from an index-2 offset. 5=14-9
|
||||
*/
|
||||
UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
|
||||
|
||||
/**
|
||||
* Number of index-1 entries for the BMP. (4)
|
||||
* This part of the index-1 table is omitted from the serialized form.
|
||||
*/
|
||||
UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
|
||||
|
||||
/** Number of entries in an index-2 block. 32=0x20 */
|
||||
UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
|
||||
|
||||
/** Mask for getting the lower bits for the in-index-2-block offset. */
|
||||
UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
|
||||
|
||||
/** Number of code points per index-2 table entry. 512=0x200 */
|
||||
UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
|
||||
|
||||
/** Number of entries in an index-3 block. 32=0x20 */
|
||||
UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
|
||||
|
||||
/** Mask for getting the lower bits for the in-index-3-block offset. */
|
||||
UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
|
||||
|
||||
/** Number of entries in a small data block. 16=0x10 */
|
||||
UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
|
||||
|
||||
/** Mask for getting the lower bits for the in-small-data-block offset. */
|
||||
UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
|
||||
};
|
||||
|
||||
typedef UChar32
|
||||
UCPTrieGetRange(const void *trie, UChar32 start,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
U_CFUNC UChar32
|
||||
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
|
||||
const void *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
U_CFUNC void
|
||||
ucptrie_printLengths(const UCPTrie *trie, const char *which);
|
||||
|
||||
U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
|
||||
* For overview information see http://site.icu-project.org/design/struct/utrie
|
||||
*
|
||||
* The binary trie data should be 32-bit-aligned.
|
||||
* The overall layout is:
|
||||
*
|
||||
* UCPTrieHeader header; -- 16 bytes, see struct definition above
|
||||
* uint16_t index[header.indexLength];
|
||||
* uintXY_t data[header.dataLength];
|
||||
*
|
||||
* The trie data array is an array of uint16_t, uint32_t, or uint8_t,
|
||||
* specified via the UCPTrieValueWidth when building the trie.
|
||||
* The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
|
||||
* The overall length of the trie data is a multiple of 4 bytes.
|
||||
* (Padding is added at the end of the index array and/or near the end of the data array as needed.)
|
||||
*
|
||||
* The length of the data array (dataLength) is stored as an integer split across two fields
|
||||
* of the header struct (high bits in header.options).
|
||||
*
|
||||
* The trie type can be "fast" or "small" which determines the index structure,
|
||||
* specified via the UCPTrieType when building the trie.
|
||||
*
|
||||
* The type and valueWidth are stored in the header.options.
|
||||
* There are reserved type and valueWidth values, and reserved header.options bits.
|
||||
* They could be used in future format extensions.
|
||||
* Code reading the trie structure must fail with an error when unknown values or options are set.
|
||||
*
|
||||
* Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
|
||||
*
|
||||
* Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
|
||||
* For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
|
||||
* For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
|
||||
*
|
||||
* All code points in the range highStart..U+10FFFF map to a single highValue
|
||||
* which is stored at the second-to-last position of the data array.
|
||||
* (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
|
||||
* The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
|
||||
* (UCPTRIE_SHIFT_2=9)
|
||||
*
|
||||
* Values for code points fast_limit..highStart-1 are found via four-stage lookup.
|
||||
* The data block size is smaller for this range than for the fast range.
|
||||
* This together with more index stages with small blocks makes this range
|
||||
* more easily compactable.
|
||||
*
|
||||
* There is also a trie error value stored at the last position of the data array.
|
||||
* (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
|
||||
* It is intended to be returned for inputs that are not Unicode code points
|
||||
* (outside U+0000..U+10FFFF), or in string processing for ill-formed input
|
||||
* (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
|
||||
*
|
||||
* For a "fast" trie:
|
||||
*
|
||||
* The index array starts with the BMP index table for BMP code point lookup.
|
||||
* Its length is 1024=0x400.
|
||||
*
|
||||
* The supplementary index-1 table follows the BMP index table.
|
||||
* Variable length, for code points up to highStart-1.
|
||||
* Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
|
||||
* (For 0x100000 supplementary code points U+10000..U+10ffff.)
|
||||
*
|
||||
* After this index-1 table follow the variable-length index-3 and index-2 tables.
|
||||
*
|
||||
* The supplementary index tables are omitted completely
|
||||
* if there is only BMP data (highStart<=U+10000).
|
||||
*
|
||||
* For a "small" trie:
|
||||
*
|
||||
* The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
|
||||
*
|
||||
* The "supplementary" index tables are always stored.
|
||||
* The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
|
||||
*
|
||||
* For both trie types:
|
||||
*
|
||||
* The last index-2 block may be a partial block, storing indexes only for code points
|
||||
* below highStart.
|
||||
*
|
||||
* Lookup for ASCII code point c:
|
||||
*
|
||||
* Linear access from the start of the data array.
|
||||
*
|
||||
* value = data[c];
|
||||
*
|
||||
* Lookup for fast-range code point c:
|
||||
*
|
||||
* Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
|
||||
* fetch the index array value at that offset,
|
||||
* add the lower code point bits, index into the data array.
|
||||
*
|
||||
* value = data[index[c>>6] + (c&0x3f)];
|
||||
*
|
||||
* (This works for ASCII as well.)
|
||||
*
|
||||
* Lookup for small-range code point c below highStart:
|
||||
*
|
||||
* Split the code point into four bit fields using several sets of shifts & masks
|
||||
* to read consecutive values from the index-1, index-2, index-3 and data tables.
|
||||
*
|
||||
* If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
|
||||
* then the data block offsets are stored directly as uint16_t.
|
||||
*
|
||||
* Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
|
||||
* has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
|
||||
* an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
|
||||
* in the additional word.
|
||||
*
|
||||
* See ucptrie_internalSmallIndex() for details.
|
||||
*
|
||||
* (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
|
||||
*
|
||||
* Compaction:
|
||||
*
|
||||
* Multiple code point ranges ("blocks") that are aligned on certain boundaries
|
||||
* (determined by the shifting/bit fields of code points) and
|
||||
* map to the same data values normally share a single subsequence of the data array.
|
||||
* Data blocks can also overlap partially.
|
||||
* (Depending on the builder code finding duplicate and overlapping blocks.)
|
||||
*
|
||||
* Iteration over same-value ranges:
|
||||
*
|
||||
* Range iteration (ucptrie_getRange()) walks the structure from a start code point
|
||||
* until some code point is found that maps to a different value;
|
||||
* the end of the returned range is just before that.
|
||||
*
|
||||
* The header.dataNullOffset (split across two header fields, high bits in header.options)
|
||||
* is the offset of a widely shared data block filled with one single value.
|
||||
* It helps quickly skip over large ranges of data with that value.
|
||||
* The builder must ensure that if the start of any data block (fast or small)
|
||||
* matches the dataNullOffset, then the whole block must be filled with the null value.
|
||||
* Special care must be taken if there is no fast null data block
|
||||
* but a small one, which is shorter, and it matches the *start* of some fast data block.
|
||||
*
|
||||
* Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
|
||||
* where all index entries point to the dataNullOffset.
|
||||
* If there is no such data or index-3 block, then these offsets are set to
|
||||
* values that cannot be reached (data offset out of range/reserved index offset),
|
||||
* normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
|
||||
*/
|
||||
|
||||
#endif
|
|
@ -1077,11 +1077,11 @@ collectCurrencyNames(const char* locale,
|
|||
}
|
||||
|
||||
// currency plurals
|
||||
UErrorCode ec3 = U_ZERO_ERROR;
|
||||
UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3);
|
||||
UErrorCode ec5 = U_ZERO_ERROR;
|
||||
UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5);
|
||||
n = ures_getSize(curr_p);
|
||||
for (int32_t i=0; i<n; ++i) {
|
||||
UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec3);
|
||||
UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec5);
|
||||
iso = (char*)ures_getKey(names);
|
||||
// Using hash to remove duplicated ISO codes in fallback chain.
|
||||
if (localeLevel == 0) {
|
||||
|
@ -1099,7 +1099,7 @@ collectCurrencyNames(const char* locale,
|
|||
for (int32_t j = 0; j < num; ++j) {
|
||||
// TODO: remove duplicates between singular name and
|
||||
// currency long name?
|
||||
s = ures_getStringByIndex(names, j, &len, &ec3);
|
||||
s = ures_getStringByIndex(names, j, &len, &ec5);
|
||||
(*currencyNames)[*total_currency_name_count].IsoCode = iso;
|
||||
UChar* upperName = toUpperCase(s, len, locale);
|
||||
(*currencyNames)[*total_currency_name_count].currencyName = upperName;
|
||||
|
@ -1449,7 +1449,7 @@ getCacheEntry(const char* locale, UErrorCode& ec) {
|
|||
umtx_lock(&gCurrencyCacheMutex);
|
||||
// in order to handle racing correctly,
|
||||
// not putting 'search' in a separate function.
|
||||
int8_t found = -1;
|
||||
int8_t found = -1;
|
||||
for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
|
||||
if (currCache[i]!= NULL &&
|
||||
uprv_strcmp(locale, currCache[i]->locale) == 0) {
|
||||
|
@ -1469,7 +1469,6 @@ getCacheEntry(const char* locale, UErrorCode& ec) {
|
|||
}
|
||||
umtx_lock(&gCurrencyCacheMutex);
|
||||
// check again.
|
||||
int8_t found = -1;
|
||||
for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
|
||||
if (currCache[i]!= NULL &&
|
||||
uprv_strcmp(locale, currCache[i]->locale) == 0) {
|
||||
|
|
|
@ -418,7 +418,8 @@ private:
|
|||
const char *path; /* working path (u_icudata_Dir) */
|
||||
const char *nextPath; /* path following this one */
|
||||
const char *basename; /* item's basename (icudt22e_mt.res)*/
|
||||
const char *suffix; /* item suffix (can be null) */
|
||||
|
||||
StringPiece suffix; /* item suffix (can be null) */
|
||||
|
||||
uint32_t basenameLen; /* length of basename */
|
||||
|
||||
|
@ -432,13 +433,15 @@ private:
|
|||
};
|
||||
|
||||
/**
|
||||
* @param iter The iterator to be initialized. Its current state does not matter.
|
||||
* @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
|
||||
* @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l
|
||||
* @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
|
||||
* @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
|
||||
* Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
|
||||
* '/blarg/stuff.dat' would also be found.
|
||||
* @param iter The iterator to be initialized. Its current state does not matter.
|
||||
* @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME
|
||||
* @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l
|
||||
* @param item Item to be searched for. Can include full path, such as /a/b/foo.dat
|
||||
* @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
|
||||
* Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
|
||||
* '/blarg/stuff.dat' would also be found.
|
||||
* Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
|
||||
* the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
|
||||
*/
|
||||
UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
|
||||
const char *item, const char *inSuffix, UBool doCheckLastFour,
|
||||
|
@ -566,7 +569,7 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode)
|
|||
|
||||
if(checkLastFour == TRUE &&
|
||||
(pathLen>=4) &&
|
||||
uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix, 4)==0 && /* suffix matches */
|
||||
uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
|
||||
uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */
|
||||
uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
|
||||
|
||||
|
@ -602,8 +605,13 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode)
|
|||
/* + basename */
|
||||
pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
|
||||
|
||||
if(*suffix) /* tack on suffix */
|
||||
if (!suffix.empty()) /* tack on suffix */
|
||||
{
|
||||
if (suffix.length() > 4) {
|
||||
// If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
|
||||
// then we need to ensure that the path ends with a separator.
|
||||
pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
|
||||
}
|
||||
pathBuffer.append(suffix, *pErrorCode);
|
||||
}
|
||||
}
|
||||
|
@ -751,16 +759,19 @@ openCommonData(const char *path, /* Path from OpenChoice? */
|
|||
|
||||
UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
|
||||
|
||||
while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
|
||||
while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
|
||||
{
|
||||
#ifdef UDATA_DEBUG
|
||||
fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
|
||||
#endif
|
||||
uprv_mapFile(&tData, pathBuffer);
|
||||
uprv_mapFile(&tData, pathBuffer, pErrorCode);
|
||||
#ifdef UDATA_DEBUG
|
||||
fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
|
||||
#endif
|
||||
}
|
||||
if (U_FAILURE(*pErrorCode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(OS390_STUBDATA) && defined(OS390BATCH)
|
||||
if (!UDataMemory_isLoaded(&tData)) {
|
||||
|
@ -769,7 +780,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */
|
|||
uprv_strncpy(ourPathBuffer, path, 1019);
|
||||
ourPathBuffer[1019]=0;
|
||||
uprv_strcat(ourPathBuffer, ".dat");
|
||||
uprv_mapFile(&tData, ourPathBuffer);
|
||||
uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -860,7 +871,7 @@ static UBool extendICUData(UErrorCode *pErr)
|
|||
umtx_unlock(&extendICUDataMutex);
|
||||
#endif
|
||||
return didUpdate; /* Return true if ICUData pointer was updated. */
|
||||
/* (Could potentialy have been done by another thread racing */
|
||||
/* (Could potentially have been done by another thread racing */
|
||||
/* us through here, but that's fine, we still return true */
|
||||
/* so that current thread will also examine extended data. */
|
||||
}
|
||||
|
@ -986,12 +997,12 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
|
|||
/* init path iterator for individual files */
|
||||
UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
|
||||
|
||||
while((pathBuffer = iter.next(pErrorCode)) != NULL)
|
||||
while ((pathBuffer = iter.next(pErrorCode)) != NULL)
|
||||
{
|
||||
#ifdef UDATA_DEBUG
|
||||
fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
|
||||
#endif
|
||||
if(uprv_mapFile(&dataMemory, pathBuffer))
|
||||
if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
|
||||
{
|
||||
pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
|
||||
if (pEntryData != NULL) {
|
||||
|
@ -1007,7 +1018,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
|
|||
return pEntryData;
|
||||
}
|
||||
|
||||
/* the data is not acceptable, or some error occured. Either way, unmap the memory */
|
||||
/* the data is not acceptable, or some error occurred. Either way, unmap the memory */
|
||||
udata_close(&dataMemory);
|
||||
|
||||
/* If we had a nasty error, bail out completely. */
|
||||
|
@ -1076,6 +1087,11 @@ static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName
|
|||
}
|
||||
}
|
||||
}
|
||||
// If we failed due to being out-of-memory, then stop early and report the error.
|
||||
if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
|
||||
*pErrorCode = *subErrorCode;
|
||||
return NULL;
|
||||
}
|
||||
/* Data wasn't found. If we were looking for an ICUData item and there is
|
||||
* more data available, load it and try again,
|
||||
* otherwise break out of this loop. */
|
||||
|
@ -1252,7 +1268,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
|
||||
tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
|
||||
}
|
||||
tocEntryPathSuffix = tocEntryPath.data()+tocEntrySuffixIndex; /* suffix starts here */
|
||||
// The +1 is for the U_FILE_SEP_CHAR that is always appended above.
|
||||
tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
|
||||
|
||||
#ifdef UDATA_DEBUG
|
||||
fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
|
||||
|
|
|
@ -333,6 +333,43 @@ uprv_compareInvEbcdic(const UDataSwapper *ds,
|
|||
# error Unknown charset family!
|
||||
#endif
|
||||
|
||||
// utrie_swap.cpp -----------------------------------------------------------***
|
||||
|
||||
/**
|
||||
* Swaps a serialized UTrie.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swaps a serialized UTrie2.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie2_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swaps a serialized UCPTrie.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_swap(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Swaps a serialized UTrie, UTrie2, or UCPTrie.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
utrie_swapAnyVersion(const UDataSwapper *ds,
|
||||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* material... -------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -218,7 +218,7 @@ _uhash_allocate(UHashtable *hash,
|
|||
|
||||
U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
|
||||
|
||||
hash->primeIndex = primeIndex;
|
||||
hash->primeIndex = static_cast<int8_t>(primeIndex);
|
||||
hash->length = PRIMES[primeIndex];
|
||||
|
||||
p = hash->elements = (UHashElement*)
|
||||
|
@ -860,13 +860,13 @@ uhash_hashUChars(const UHashTok key) {
|
|||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_hashChars(const UHashTok key) {
|
||||
const char *s = (const char *)key.pointer;
|
||||
return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, uprv_strlen(s)));
|
||||
return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))));
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_hashIChars(const UHashTok key) {
|
||||
const char *s = (const char *)key.pointer;
|
||||
return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
|
||||
return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s)));
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
|
|
@ -53,22 +53,6 @@ uprv_isInvariantString(const char *s, int32_t length);
|
|||
U_INTERNAL UBool U_EXPORT2
|
||||
uprv_isInvariantUString(const UChar *s, int32_t length);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
/**
|
||||
* Check if a UnicodeString only contains invariant characters.
|
||||
* See utypes.h for details.
|
||||
*
|
||||
* @param s Input string.
|
||||
* @return TRUE if s contains only invariant characters.
|
||||
*/
|
||||
U_INTERNAL inline UBool U_EXPORT2
|
||||
uprv_isInvariantUnicodeString(const icu::UnicodeString &s) {
|
||||
return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length());
|
||||
}
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/**
|
||||
* \def U_UPPER_ORDINAL
|
||||
* Get the ordinal number of an uppercase invariant character
|
||||
|
|
|
@ -0,0 +1,722 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
//
|
||||
// file name: ulayout_props_data.h
|
||||
//
|
||||
// machine-generated by: icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp
|
||||
|
||||
|
||||
#ifdef INCLUDED_FROM_UPROPS_CPP
|
||||
|
||||
static const int32_t maxInPCValue = 14;
|
||||
|
||||
static const uint16_t inpc_trieIndex[765]={
|
||||
0,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0x80,0xc0,0xff,0x13f,0x17e,0x1be,0x17e,0x1fe,0x23e,0x27e,0x2bc,0x2fc,
|
||||
0x33c,0x37b,0x23e,0x3bb,0x3fb,0x439,0x477,0x4ad,0x4e1,0x521,0x531,0x571,0x599,0x5d9,0x619,0x656,
|
||||
0x2b7,0x2c6,0x2d2,0x2c6,0x2ed,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0,0x10,0x20,
|
||||
0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,
|
||||
0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0x80,0x90,0xa0,
|
||||
0xb0,0xc0,0xd0,0xe0,0xf0,0xff,0x10f,0x11f,0x12f,0x13f,0x14f,0x15f,0x16f,0x17e,0x18e,0x19e,
|
||||
0x1ae,0x1be,0x1ce,0x1de,0x1ee,0x17e,0x18e,0x19e,0x1ae,0x1fe,0x20e,0x21e,0x22e,0x23e,0x24e,0x25e,
|
||||
0x26e,0x27e,0x28e,0x29e,0x2ae,0x2bc,0x2cc,0x2dc,0x2ec,0x2fc,0x30c,0x31c,0x32c,0x33c,0x34c,0x35c,
|
||||
0x36c,0x37b,0x38b,0x39b,0x3ab,0x23e,0x24e,0x25e,0x26e,0x3bb,0x3cb,0x3db,0x3eb,0x3fb,0x40b,0x41b,
|
||||
0x42b,0x439,0x449,0x459,0x469,0x477,0x487,0x497,0x4a7,0x4ad,0x4bd,0x4cd,0x4dd,0x4e1,0x4f1,0x501,
|
||||
0x511,0x521,0x531,0x541,0x551,0x531,0x541,0x551,0x561,0x571,0x581,0x591,0x5a1,0x599,0x5a9,0x5b9,
|
||||
0x5c9,0x5d9,0x5e9,0x5f9,0x609,0x619,0x629,0x639,0x649,0x656,0x666,0x676,0x686,0,0,0x68b,
|
||||
0x69a,0,0x6a9,0x6b8,0x6c7,0x6d5,0x6e5,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0x6f3,0,0x6f3,
|
||||
0,0x701,0,0x701,0,0,0,0x70b,0x71b,0x729,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0x739,0x749,0,0,
|
||||
0,0,0,0,0,0x759,0x768,0,0,0,0x772,0,0,0,0x77e,0x78d,
|
||||
0x79b,0,0,0,0,0,0,0,0,0x7ab,0,0,0x7b7,0x7c7,0,0x7cc,
|
||||
0x52c,0x81,0,0x7dc,0,0,0,0x7ea,0x3fb,0,0,0x7fa,0x807,0,0,0,
|
||||
0,0,0,0,0,0,0x817,0x827,0x835,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x2b3,0x83f,0,0x84c,0,0,0,0,
|
||||
0,0x101,0,0,0x858,0x864,0,0x874,0x882,0,0,0x892,0,0x8a0,0x3fb,0,
|
||||
0,0x80,0,0,0x8b0,0x8c0,0,0x2b9,0,0,0x8c7,0x8d6,0x8e3,0,0,0x8f1,
|
||||
0,0,0,0x901,0x2bd,0,0x911,0x151,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0x921,0,0x930,0,0,0x940,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x950,0,0,0x958,0x966,0,0,0,
|
||||
0x81,0,0,0x976,0,0,0,0,0x52d,0,0x981,0x991,0x3cb,0,0,0x659,
|
||||
0x81,0,0,0x99e,0x9ae,0,0,0,0x9bb,0x9cb,0,0,0,0,0,0,
|
||||
0,0,0,0x71,0x9db,0,0xff,0,0,0x9e6,0x9f6,0x14f,0xa04,0x52b,0,0,
|
||||
0,0,0,0,0,0,0x99c,0xa14,0x16f,0,0,0,0,0,0xa24,0xa33,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,0xa43,0xe3,
|
||||
0x214,0,0,0,0xa53,0x2be,0,0,0,0,0,0xa63,0xa73,0,0,0,
|
||||
0,0,0xa7b,0xa8b,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0xa97,0xaa6,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xab5,
|
||||
0,0,0xac2,0,0xad1,0,0,0xadd,0xae7,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,
|
||||
0xaf7,0,0,0,0,0,0xb07,0xb0f,0xb1e,0,0,0,0,0,0,0,
|
||||
0xb2d,0xb3c,0,0,0,0xb44,0xb54,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0xb61,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0x45,0x4d,0x4d,0x4d,0x5d,0x7d,0x9d,0xbd,0xdd,
|
||||
2,2,0xec,0x10a,0x129,0x149,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
2,2,2,2,2,2,0x169,0x188,2,2,2,2,2,2,2,2,
|
||||
2,2,0x1a8,2,2,0x1c8,0x1e6,0x203,0x221,0x23f,0x25f,0x27d,0x297
|
||||
};
|
||||
|
||||
static const uint8_t inpc_trieData[2930]={
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
8,8,8,7,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,8,7,1,0,7,4,
|
||||
7,1,1,1,1,8,8,8,8,7,7,7,7,1,4,7,
|
||||
0,8,1,8,8,8,1,1,0,0,0,0,0,0,0,0,
|
||||
0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
8,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,
|
||||
1,1,1,1,0,0,4,4,0,0,5,5,1,0,0,0,
|
||||
0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,8,
|
||||
8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,1,
|
||||
1,0,0,0,0,8,8,0,0,8,8,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,
|
||||
0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,1,
|
||||
1,1,1,8,0,8,8,0xd,0,7,7,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,8,8,8,8,8,8,0,8,
|
||||
7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,0,7,8,7,1,
|
||||
1,1,1,0,0,4,0xb,0,0,5,0xc,1,0,0,0,0,
|
||||
0,0,0,0,8,0xd,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,7,7,8,7,7,0,
|
||||
0,0,4,4,4,0,5,5,5,8,0,0,0,0,0,0,
|
||||
0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,8,7,7,7,
|
||||
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,8,8,7,7,7,7,
|
||||
0,8,8,9,0,8,8,8,8,0,0,0,0,0,0,0,
|
||||
8,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0xd,7,7,7,7,
|
||||
0,8,0xd,0xd,0,0xd,0xd,8,8,0,0,0,0,0,0,0,
|
||||
7,7,0,0,0,0,0,0,0,0,0,0,0,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,8,8,7,7,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,8,8,0,7,7,7,1,1,0,4,
|
||||
4,4,0,5,5,5,8,0,0,0,0,0,0,0,0,0,
|
||||
7,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,8,0,0,0,0,7,7,7,8,
|
||||
8,1,0,1,0,7,4,0xb,4,5,0xc,5,7,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,
|
||||
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,7,8,7,7,8,8,8,8,1,1,1,0,0,0,0,
|
||||
0,0xe,0xe,0xe,0xe,0xe,7,0,8,8,8,8,8,8,8,8,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,7,8,7,7,8,8,8,8,1,1,0,8,1,0,0,
|
||||
0,0xe,0xe,0xe,0xe,0xe,0,0,0,8,8,8,8,8,8,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
|
||||
1,0,8,0,0,0,0,7,4,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,1,8,9,1,1,9,
|
||||
9,9,9,8,8,8,8,8,7,8,9,8,8,1,0,8,
|
||||
8,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,
|
||||
1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,7,7,8,8,1,4,8,8,8,8,
|
||||
8,1,7,0,8,7,0,1,1,0,0,0,0,0,0,7,
|
||||
7,1,1,0,0,0,0,1,1,0,7,7,7,0,0,7,
|
||||
7,7,7,7,7,7,0,0,8,8,8,8,0,0,0,0,
|
||||
0,0,0,0,0,0,0,1,7,4,8,8,7,7,7,7,
|
||||
7,7,1,0,7,0,0,0,0,0,0,0,0,0,0,7,
|
||||
7,7,8,0,0,8,1,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,7,8,8,8,8,1,1,1,0xb,0xc,5,4,4,4,5,
|
||||
5,8,7,7,8,8,8,8,8,8,8,0,8,0,0,0,
|
||||
0,0,0,0,0,0,8,0,0,8,8,1,7,7,0xd,0xd,
|
||||
8,8,7,7,7,0,0,0,0,7,7,1,7,7,7,7,
|
||||
7,7,1,8,1,0,0,0,0,7,7,7,7,7,0xe,0xe,
|
||||
0xe,7,7,0xe,7,7,7,7,7,0,0,0,0,0,0,0,
|
||||
7,7,0,0,0,0,0,0,0,8,1,4,7,8,0,0,
|
||||
0,0,0,4,1,7,8,8,8,1,1,1,1,0,7,8,
|
||||
7,7,8,8,8,8,1,1,8,1,7,4,4,4,8,8,
|
||||
8,8,8,8,8,8,8,8,0,0,1,8,8,8,8,7,
|
||||
0,0,0,0,0,0,0,0,0,0,0,8,7,8,8,1,
|
||||
1,1,3,9,0xa,4,4,5,5,8,0xd,7,0,0,0,0,
|
||||
0,0,0,0,0,0,0,8,1,8,8,8,0,7,1,1,
|
||||
8,1,4,7,8,8,7,0,1,1,0,0,0,0,0,0,
|
||||
8,7,8,8,7,7,7,8,7,8,0,0,0,0,7,7,
|
||||
7,4,4,0xb,7,7,1,8,8,8,8,4,4,8,1,0,
|
||||
0,0,0,0,0,0,0,8,8,8,0,6,1,1,1,1,
|
||||
1,8,8,1,1,1,1,8,7,6,6,6,6,6,6,6,
|
||||
0,0,0,0,1,0,0,0,0,8,0,0,7,0,0,0,
|
||||
0,0,0,0,0,8,0,0,0,0,8,0,0,0,0,7,
|
||||
7,1,8,7,0,0,0,0,0,0,0,0,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,1,8,0,0,0,0,0,0,
|
||||
0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8,
|
||||
8,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,8,0,0,0,0,0,0,0,0,0,0,0,1,1,1,
|
||||
0,0,0,0,0,0,0,1,1,1,8,1,1,1,1,8,
|
||||
0,0,0,8,7,7,8,8,1,1,4,4,8,7,7,2,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
8,8,8,8,1,8,4,8,1,7,4,1,1,0,0,0,
|
||||
0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,8,
|
||||
7,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0,
|
||||
0,8,7,8,8,1,0xe,0xe,8,8,0xe,7,0xe,0xe,7,8,
|
||||
8,0,0,0,0,0,0,0,0,0,0,0,4,1,8,4,
|
||||
7,0,0,0,7,7,8,7,7,1,7,7,0,7,1,0,
|
||||
0,6,1,1,0,8,6,0,0,0,0,0,1,1,1,8,
|
||||
0,0,0,0,0,0,0,0,8,1,1,0,0,0,0,0,
|
||||
7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
8,8,8,8,1,1,1,1,8,8,8,8,8,0,0,0,
|
||||
0,0,0,0,0,0,7,4,7,1,1,8,8,7,7,1,
|
||||
1,0,0,0,0,0,0,0,8,8,8,1,1,4,8,9,
|
||||
9,8,1,1,0,8,0,0,0,0,0,0,0,0,0,0,
|
||||
0,7,4,7,1,1,1,1,1,1,8,8,8,0xd,7,0,
|
||||
0,0,0,0,0,0,0,1,0,8,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,7,7,7,1,8,8,0xd,0xd,8,
|
||||
7,8,8,0,0,0,0,0,0,8,0,7,4,7,1,1,
|
||||
8,8,8,8,1,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,0,7,7,8,7,7,7,7,0,0,4,4,0,
|
||||
0,5,5,7,0,0,7,7,0,0,8,8,8,8,8,8,
|
||||
8,0,0,0,7,7,1,8,8,7,1,0,0,0,0,0,
|
||||
0,0,0,0,7,4,7,1,1,1,1,1,1,4,8,0xb,
|
||||
5,7,5,8,7,1,1,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,4,7,1,1,1,1,0,0,4,0xb,5,0xc,8,
|
||||
8,7,1,7,7,7,1,1,1,1,1,1,8,8,7,7,
|
||||
8,7,1,0,0,0,0,0,0,0,0,0,0,0,8,7,
|
||||
8,4,7,1,1,8,8,8,8,7,1,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,0,8,7,7,8,8,1,
|
||||
1,4,8,1,8,8,8,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,7,4,7,1,1,1,8,8,8,8,8,7,1,
|
||||
1,0,0,0,0,0,8,1,1,8,8,8,8,8,8,1,
|
||||
0,0,0,0,0,1,1,8,8,8,8,7,0,1,1,1,
|
||||
1,0,8,1,1,8,8,8,7,7,1,1,1,0,0,0,
|
||||
0,0,0,0,0,0,0,1,1,1,1,1,1,8,7,8,
|
||||
0,0,0,0,0,0,0,8,8,1,1,1,1,1,0,8,
|
||||
8,8,8,8,8,7,1,0,0,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,0,7,1,1,1,1,1,1,4,
|
||||
1,8,7,8,8,0,0,0,0,0,0,0,0,0,8,8,
|
||||
8,8,8,1,0,0,0,8,0,8,8,0,8,8,1,8,
|
||||
1,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,
|
||||
7,7,7,0,8,8,0,7,7,8,7,0,0,0,0,0,
|
||||
0,0,0,0,8,1,4,7,0,0,0,0,0,0,0,0,
|
||||
0,0
|
||||
};
|
||||
|
||||
static const UCPTrie inpc_trie={
|
||||
inpc_trieIndex,
|
||||
{ inpc_trieData },
|
||||
765, 2930,
|
||||
0x12000, 0x12,
|
||||
1, 2,
|
||||
0, 0,
|
||||
0x2, 0x0,
|
||||
0x0,
|
||||
};
|
||||
|
||||
static const int32_t maxInSCValue = 35;
|
||||
|
||||
static const uint16_t insc_trieIndex[834]={
|
||||
0,0x40,0x60,0x94,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0xd4,0x112,0x152,0x190,0x1cf,0x20d,0x24c,0x28a,0x2ca,0x308,0x346,0x384,
|
||||
0x3c4,0x402,0x441,0x47f,0x4bf,0x4fd,0x53d,0x57d,0x5bc,0x5fc,0x63b,0x67b,0x69b,0x6db,0x71b,0x758,
|
||||
0x2f8,0x30b,0x317,0x30b,0x332,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x60,0x70,0x80,
|
||||
0x90,0x94,0xa4,0xb4,0xc4,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,
|
||||
0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,
|
||||
0x70,0x40,0x50,0x60,0x70,0xd4,0xe4,0xf4,0x104,0x112,0x122,0x132,0x142,0x152,0x162,0x172,
|
||||
0x182,0x190,0x1a0,0x1b0,0x1c0,0x1cf,0x1df,0x1ef,0x1ff,0x20d,0x21d,0x22d,0x23d,0x24c,0x25c,0x26c,
|
||||
0x27c,0x28a,0x29a,0x2aa,0x2ba,0x2ca,0x2da,0x2ea,0x2fa,0x308,0x318,0x328,0x338,0x346,0x356,0x366,
|
||||
0x376,0x384,0x394,0x3a4,0x3b4,0x3c4,0x3d4,0x3e4,0x3f4,0x402,0x412,0x422,0x432,0x441,0x451,0x461,
|
||||
0x471,0x47f,0x48f,0x49f,0x4af,0x4bf,0x4cf,0x4df,0x4ef,0x4fd,0x50d,0x51d,0x52d,0x53d,0x54d,0x55d,
|
||||
0x56d,0x57d,0x58d,0x59d,0x5ad,0x5bc,0x5cc,0x5dc,0x5ec,0x5fc,0x60c,0x61c,0x62c,0x63b,0x64b,0x65b,
|
||||
0x66b,0x67b,0x68b,0x69b,0x6ab,0x69b,0x6ab,0x6bb,0x6cb,0x6db,0x6eb,0x6fb,0x70b,0x71b,0x72b,0x73b,
|
||||
0x74b,0x758,0x768,0x778,0x788,0xe9,0xe9,0x798,0x7a3,0x7b3,0x7c3,0x7d2,0x7e1,0x7ef,0x7ff,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x80f,0x81d,0xe6,0x81d,0xe6,0x82d,0x80f,0x83d,0xe9,0xe9,0x84d,
|
||||
0x859,0x863,0x872,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x882,0x16c,0x892,0x8a2,0x22d,0xe9,0x8b2,0x8c2,0xe9,0xe9,0x374,0x8d2,
|
||||
0x8e1,0x30,0x40,0x40,0xe9,0x8f1,0xe9,0xe9,0x901,0x90e,0x91e,0x92a,0x30,0x30,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x93a,0xe6,0xe9,0x94a,0x956,0x30,0x40,0x40,0x966,0xe9,0x975,0x985,
|
||||
0xe9,0xe9,0x995,0x9a5,0xe9,0xe9,0x9b5,0x9c2,0x9d2,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x9e2,0x9f0,0x9fe,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0xa08,0xa14,0xa24,0x40,0x40,0x40,0x40,0x40,0x75a,0xa32,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x74,0x40,0x40,0x40,0xa42,0xe9,0xa4f,
|
||||
0x40,0xe9,0xa5f,0xa6d,0xa7c,0xd6,0xe7,0xe9,0xa8c,0xa98,0x30,0xaa8,0xab6,0xac6,0xe9,0xad4,
|
||||
0xe9,0xae4,0xaf3,0x40,0x40,0xb03,0xe9,0xe9,0xb12,0x297,0x30,0xb22,0xb32,0xe3,0xe9,0x889,
|
||||
0xb42,0xb52,0x30,0xe9,0xb61,0xe9,0xe9,0xe9,0xb71,0xb81,0x40,0xb91,0xba1,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xbb1,0xbc1,0xbce,0x30,0xbde,0xbee,0xe9,
|
||||
0xbf8,0x31,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xc08,0xe6,0xe9,
|
||||
0x88a,0xc18,0xc26,0xc30,0xc40,0xc50,0xe9,0xe9,0xc60,0x40,0x40,0x40,0x40,0xc70,0xe9,0x88b,
|
||||
0xc80,0xc90,0xca0,0xe9,0xcad,0xd5,0xe8,0xe9,0xcbd,0xccd,0x30,0x6ba,0x35,0xe1,0x3eb,0x886,
|
||||
0xcdd,0x40,0x40,0x40,0x40,0xced,0x16d,0xcfc,0xdf,0xe9,0xd0c,0xd1c,0x30,0xd2c,0x162,0x172,
|
||||
0xd3c,0x308,0xd4c,0xd5c,0x9ed,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xe9,
|
||||
0xd6c,0xd7a,0xd8a,0x40,0x40,0xd99,0xe9,0xe9,0x91f,0xda9,0x30,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xff,0xdb9,0xdc9,0xdd1,0x40,0x40,0xdb,0xe9,0xe9,
|
||||
0xde1,0xdf1,0x30,0x40,0x40,0xdf,0xe9,0xe01,0xe0e,0x30,0x40,0x40,0x40,0xe9,0xe1e,0xe2e,
|
||||
0xe3e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdf,0xe9,0x886,
|
||||
0xe4e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xe5e,0xe9,0xe9,
|
||||
0xe6b,0xe7b,0xe8b,0xe9,0xe9,0xe97,0xea1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xeb1,0xe9,0xff,
|
||||
0xec1,0xed1,0x6bb,0xee1,0x555,0xe9,0xeef,0x72b,0xeff,0x40,0x40,0x40,0x40,0xf0f,0xe9,0xe9,
|
||||
0xf1e,0xf2e,0x30,0xf3e,0xe9,0xf4a,0xf57,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0xe9,0xf67,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
|
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x45,0x55,0x55,0x55,0x65,0x85,0xa5,0xc5,
|
||||
0xe5,4,4,0xf5,0x114,0x134,0x154,4,0x174,4,0x17d,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,0x19d,0x1bd,4,4,4,
|
||||
4,4,4,4,4,4,4,0x1dd,4,4,0x1fd,0x21d,0x23d,0x25d,0x27d,0x29d,
|
||||
0x2bd,0x2d8
|
||||
};
|
||||
|
||||
static const uint8_t insc_trieData[3960]={
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0xc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0x1c,0x1c,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x22,0x22,
|
||||
0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,
|
||||
0x22,0x22,0,4,4,0,0,0x22,0x22,0x22,5,5,5,5,5,5,
|
||||
5,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0,0,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,
|
||||
5,5,0xc,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,
|
||||
0,0x23,0x23,0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,
|
||||
5,5,5,0,5,0,0,0,5,5,5,5,0,0,0x17,1,
|
||||
0x22,0x22,0x22,0x22,0x22,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,6,0,
|
||||
0,0,0,0,0,0,0,0x22,0,0,0,0,5,5,0,5,
|
||||
0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
5,5,0,0,0,0,0,0,0,0,0,0,2,0,0x1c,0,
|
||||
2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0,0,0x23,0x23,
|
||||
0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,
|
||||
0,5,5,0,5,5,0,5,5,0,0,0x17,0,0x22,0x22,0x22,
|
||||
0,0,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,0,0,0,4,0,
|
||||
0,0,0,0,0,0,5,5,5,5,0,5,0,0,0,0,
|
||||
0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,2,0x12,0xc,
|
||||
0xc,0,0xb,0,0,0,0,0,0,0,0,0,0,2,2,0x20,
|
||||
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0,0x23,
|
||||
0x23,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5,
|
||||
0,5,5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,
|
||||
0,0x22,0x22,0x22,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0x23,0x23,0x22,0x22,0,0,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,
|
||||
0,0,0,5,4,4,4,0x17,0x17,0x17,0,2,2,0x20,0,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,0x23,0,0,0x23,0x23,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,0,5,5,5,5,5,5,5,0,5,5,0,5,
|
||||
5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0,0,0x22,
|
||||
0x22,0,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,0x22,0x22,
|
||||
0,0,0,0,5,5,0,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,5,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0x15,0,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0,0,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,0,0,0,5,
|
||||
5,0,5,0,5,5,0,0,0,5,5,0,0,0,5,5,
|
||||
5,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
0,0,0,0,0x22,0x22,0x22,0,0,0,0x22,0x22,0x22,0,0x22,0x22,
|
||||
0x22,0x1f,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,2,2,0x20,2,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,
|
||||
0,1,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,
|
||||
0,0,0,0,0,0,0,0x22,0x22,0,5,5,5,0,0,0,
|
||||
0,0,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,
|
||||
0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,
|
||||
5,5,5,5,5,0,5,5,5,5,5,0,0,0x17,1,0x22,
|
||||
0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0,0,0,
|
||||
0,0,0,0,0x22,0x22,0,0,0,0,0,0,0,5,0,0x23,
|
||||
0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,
|
||||
0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||
2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,
|
||||
0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,1,0x22,0x22,0x22,
|
||||
0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0xd,0,0,0,0,
|
||||
0,6,6,6,0x22,0,0,0,0,0,0,0,0x23,0x23,0x23,0x22,
|
||||
0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,
|
||||
0,0,0,0,0,0,0,6,6,6,6,6,6,0,0,2,
|
||||
0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0x23,0x23,0x23,0,0,0,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,
|
||||
5,5,5,5,5,5,5,5,5,0,5,0,0,5,5,5,
|
||||
5,5,5,5,0,0,0,0x1f,0,0,0,0,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0,0x22,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0,0,
|
||||
0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0x22,
|
||||
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,0,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0,0x22,0x1e,0x1e,0x1e,0x1e,0xa,2,0x1a,0,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,5,5,0,5,
|
||||
0,0,5,5,0,5,0,0,5,0,0,0,0,0,0,5,
|
||||
5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,0,
|
||||
5,0,5,0,0,5,5,0,5,5,0,0x22,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0,0x22,0xb,0xb,0,0,0x22,0x22,0x22,0x22,0x22,
|
||||
0,0,0,0x1e,0x1e,0x1e,0x1e,0,2,0,0,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0,0,5,5,5,5,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,
|
||||
0x1c,0,0x1c,0,0x17,0,0,0,0,0,0,5,5,5,5,5,
|
||||
5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,0,0,0,0,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x22,2,2,0x1a,
|
||||
1,0,0,8,8,8,8,8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
|
||||
0xf,0xf,0xf,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
|
||||
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
|
||||
0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0,0,0,0,0,0,0x1c,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,5,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,0x20,0x13,0x1a,0xb,0xb,
|
||||
0xb,0xb,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0xc,0,
|
||||
0,0xc,0,5,5,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,5,5,5,
|
||||
5,0xb,0xb,5,0x22,0x1e,0x1e,5,5,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,
|
||||
5,5,0x22,0x22,0x22,0x22,5,5,5,5,5,5,5,5,5,5,
|
||||
5,0xb,0x22,0x22,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,5,0x1e,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1e,0x1e,0x22,0x22,0,0,0x23,
|
||||
0x23,0x23,5,5,5,5,5,5,5,5,5,5,0,5,5,0x22,
|
||||
0x22,0x1a,0,0,0,0,0,0,0,0,0,0,0,5,5,0x22,
|
||||
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0x22,
|
||||
0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x1b,0x1b,0x1c,0x10,
|
||||
0xa,0x1c,0x1c,0x1a,0x13,0x1c,0,0,0,0,0,0,0,0,1,0x1c,
|
||||
0,0,0xc,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
5,5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,0xf,0xf,0,0,
|
||||
0,0,7,7,2,7,7,7,7,7,7,7,0x22,0x1c,0,0,
|
||||
0,0,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,
|
||||
0,0,0x1d,0x1d,0x1d,0x1d,0x1d,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,7,7,7,7,7,7,7,0x1e,0x1e,0,0,0,0,0,
|
||||
0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,
|
||||
0,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,0x23,
|
||||
0x23,5,5,0xb,0xb,0xf,7,7,9,0xf,0xf,0xf,0xf,0,0x13,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,
|
||||
0x1e,0x1e,0x1e,0x1e,0x1a,0x1c,0x1c,0,0,0x1c,2,2,2,0x10,0x20,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,0x17,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,5,5,5,5,5,
|
||||
5,5,0,0,0,0,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
5,5,5,5,5,5,0xf,0xf,0xf,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,
|
||||
0x13,0xf,0xf,5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,1,
|
||||
5,5,5,7,7,5,5,5,5,0x23,0x23,0x17,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,7,7,0x1a,0x1a,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,5,5,5,5,0xf,0xf,0x22,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,7,7,7,7,2,2,0x1c,0x17,0,0,0,0,0,0,
|
||||
0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,5,
|
||||
5,5,4,4,4,0,4,4,4,4,4,4,4,4,4,4,
|
||||
4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x20,0x20,4,0x11,0x11,4,4,4,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0x1c,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x16,0x14,0,0,0xc,0xc,0xc,0xc,0xc,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0x1c,0x1c,0x1c,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0x23,0x23,0,0x23,0x23,0x23,0x1a,5,5,5,5,2,5,5,
|
||||
5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,0,0,0,0,0,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,0x21,0x21,5,
|
||||
5,5,5,0x21,0xf,0xf,5,5,5,5,5,5,5,0xf,5,2,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
|
||||
0xb,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,0,0,
|
||||
0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,2,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0x23,0x22,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
5,5,5,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x1e,
|
||||
0x1e,0x1e,0,0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,7,7,7,0x1a,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,5,5,5,0x23,
|
||||
0x23,0x23,5,5,5,0x17,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,
|
||||
0xb,0xb,5,5,5,5,5,0x22,0,5,5,5,5,5,5,5,
|
||||
5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,5,5,5,5,
|
||||
5,0,0x22,0x22,0x22,0xb,0xb,0xb,0xb,0,0,0,0,0,0,0,
|
||||
0,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
0,0,5,5,5,0xc,0xc,0xc,0,0,0,5,0x1e,0x1e,0x1e,5,
|
||||
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,
|
||||
0x1e,0x1d,0x1e,0x1d,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0x23,0x23,5,5,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,
|
||||
0x22,0,0,0,0,0,0x20,0x13,0,0,0,0,0,0,0,0,
|
||||
0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,
|
||||
0x23,5,0x23,5,5,5,5,5,5,5,5,5,7,7,7,7,
|
||||
7,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x1e,0x1a,0,0,5,0x22,
|
||||
0x22,0x22,0,0x22,0x22,0,0,0,0,0,0x22,0x22,2,0x20,5,5,
|
||||
5,5,0,5,5,5,0,5,5,5,5,5,5,5,0,0,
|
||||
0x17,0x17,0x17,0,0,0,0,0x13,2,2,0x20,0x11,0x11,0x23,0x23,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0,
|
||||
0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x19,
|
||||
2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,
|
||||
2,2,0x20,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,
|
||||
0x22,0x22,0x22,0x13,0x1a,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0,0,0,0,5,0x22,0x22,0,0,0,0,0,0,0,0,0,
|
||||
0x21,0x21,0x21,0x21,0x21,5,5,5,5,5,5,5,5,5,5,5,
|
||||
0x17,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,1,0xe,
|
||||
0xe,0,0,0,0,0,0x1c,0x17,0x22,0x22,0,0,0,0x22,0x22,0x22,
|
||||
0x22,2,0x1f,0x17,0x12,0,0,0,0,0,0,4,0,0x23,0x23,0x23,
|
||||
0x23,5,5,5,0,5,0,5,5,5,5,0,5,5,5,5,
|
||||
5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5,
|
||||
5,5,5,5,5,5,5,5,5,5,5,2,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x17,0x1a,0,0,0,0,0,2,2,2,0x20,
|
||||
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,5,0,5,5,
|
||||
0,5,5,5,5,5,0,0x17,0x17,1,0x22,0x22,0,0,0,0,
|
||||
0,0,0,0x22,0,0,0,0,0,0,2,2,0x23,0x23,0x22,0x22,
|
||||
0,0,4,4,4,4,4,4,4,0,0,0,5,5,5,5,
|
||||
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,2,0x20,
|
||||
0x17,1,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0,0,0,0,0x1c,0,0x23,0x23,0x23,0x23,0x23,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,2,0x20,0x1f,0x17,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0x22,0x22,0x22,0x22,0x22,0x22,0,
|
||||
0,0x22,0x22,0x22,0x22,2,2,0x20,0x1f,0x17,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0x23,0x23,0x23,0x23,0x22,0x22,0,
|
||||
0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,
|
||||
0x1f,0x22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,5,5,5,5,5,5,5,5,5,5,5,2,0x20,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,0,0,0,5,5,
|
||||
5,5,5,5,5,5,5,5,5,0,0,0xb,0xb,0xb,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0x18,0x18,
|
||||
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,2,0x20,0x1f,0x17,0,0,0,0,0,0x23,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,5,0x1c,0x1a,
|
||||
2,2,2,2,0x20,0xe,0xb,0xb,0xb,0xb,0xc,0,0,0,0,0,
|
||||
0xc,0,0x13,0,0,0,0,0,0,0,0,0x23,0x22,0x22,0x22,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,0,0,0xe,0xe,0xe,
|
||||
0xe,7,7,7,7,7,7,2,0x20,0x12,0x13,0,0,0,1,0,
|
||||
0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0x23,5,
|
||||
5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0x22,2,2,0x20,
|
||||
0x1f,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,
|
||||
0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x22,
|
||||
0x22,0x22,0x22,0x22,2,2,0,0,0,0,0,0,0,0,0,0x23,
|
||||
0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,5,5,5,5,0x22,
|
||||
0x22,0x22,0x22,0x22,0x22,0,0,0,0x22,0,0x22,0x22,0,0x22,2,0x20,
|
||||
0x17,0x22,0x1a,0x13,0xd,0xb,0,0,0,0,0,0,0,0,0x23,0x23,
|
||||
0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,0x23,5,5,5,5,5,5,
|
||||
5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,2,0x20,0x13,0,
|
||||
0,0,0,0,0,0,0,5,5,0xc,0x22,0x22,0x22,0x22,0,0,
|
||||
0,0,0,0,0,0,0,0
|
||||
};
|
||||
|
||||
static const UCPTrie insc_trie={
|
||||
insc_trieIndex,
|
||||
{ insc_trieData },
|
||||
834, 3960,
|
||||
0x12000, 0x12,
|
||||
1, 2,
|
||||
0, 0,
|
||||
0x4, 0x40,
|
||||
0x0,
|
||||
};
|
||||
|
||||
static const int32_t maxVoValue = 3;
|
||||
|
||||
static const uint16_t vo_trieIndex[1100]={
|
||||
0,0x40,0x59,0x98,0,0,0,0,0,0,0,0xd0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x33b,0x355,0x363,0x379,0x399,0x3b7,0x3d2,0x3ec,0x355,0x355,0x355,0x40c,0x355,0x355,0x355,0x40c,
|
||||
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,
|
||||
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,
|
||||
0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x355,0x355,0x355,0x40c,
|
||||
0x355,0x355,0x355,0x40c,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x59,0x69,0x79,0x89,
|
||||
0x98,0xa8,0xb8,0xc8,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
|
||||
0,0x10,0x20,0x30,0xd0,0xe0,0xf0,0x100,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
|
||||
0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,
|
||||
0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x10f,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0xa9,0x96,0x11e,0x12c,0xae,0xaa,0,0,0,0,0,
|
||||
0,0x103,0x13c,0,0x14c,0x158,0x166,0x10b,0x175,0x110,0x110,0x110,0x184,0,0,0,
|
||||
0,0,0,0,0x72,0,0xf6,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0x190,0x110,0x198,0,0,0,0,0x103,0x110,0x115,0,0xec,0x1a8,
|
||||
0x1b6,0x10e,0x110,0x110,0x1c6,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x116,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x118,0x10a,0x110,0x1d2,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0x10e,0x110,0,0,
|
||||
0x116,0,0,0,0,0,0x108,0x110,0x1e2,0x114,0x110,0,0,0,0,0,
|
||||
0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x1f1,0x1ff,0x110,0x20e,0x21d,
|
||||
0x110,0x22a,0x110,0x237,0x246,0x256,0x110,0x22a,0x110,0x237,0x261,0x110,0x110,0x26e,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x27e,0x27e,0x27e,0x27e,
|
||||
0x286,0x110,0x28e,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,0,
|
||||
0,0,0,0x110,0,0x110,0x117,0x29b,0x2aa,0,0,0,0,0,0,0,
|
||||
0,0,0x2ba,0x2c9,0x110,0x2d9,0x110,0x2e9,0x2f8,0,0,0,0,0,0,0,
|
||||
0x308,0x318,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,
|
||||
0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0,0,0,0,0,0,0,0,0x328,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,
|
||||
0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x112,0x84,0x98,0xa8,0xa8,0xa8,
|
||||
0xa8,0xa8,0xa8,0xc8,0xc,0xe8,0x100,0x115,0xc,0xc,0xc,0x134,0x153,0x172,0x191,0xc,
|
||||
0x1ab,0xc,0x1cb,0x1eb,0x20b,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0xfb,0xc,0x243,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0xc,0xc,0xc,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0x262,0xc,0xc,0xc,0xc,0x282,0xc,0xc,
|
||||
0xc,0xc,0xc,0x29c,0xc,0xc,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
|
||||
0xc,0x223,0x223,0x2b9,0xc,0xc,0xc,0xc,0xc,0x223,0x100,0xc,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2bc,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2da,0xf8,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0x223,0x2fa,0xc,0xc,0x223,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0x223,0x31a,0x223,0x223,0xc8,0x2b5,0xc,0xc,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,
|
||||
0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x31b,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,
|
||||
0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc
|
||||
};
|
||||
|
||||
static const uint8_t vo_trieData[828]={
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,0,0,0,0,3,0,0,3,0,0,0,0,0,
|
||||
0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
||||
0,0,0,0,0,0,0,0,0,3,3,0,0,0,3,0,
|
||||
0,0,0,3,3,3,0,0,0,0,0,0,3,0,3,3,
|
||||
3,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3,
|
||||
3,3,3,3,3,3,0,0,0,0,0,3,3,0,3,3,
|
||||
0,0,0,0,0,0,3,3,3,3,0,3,0,3,0,3,
|
||||
0,0,0,0,3,0,0,0,0,0,3,3,3,3,3,3,
|
||||
0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0,
|
||||
3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3,
|
||||
3,1,1,3,0,0,0,0,3,3,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,
|
||||
3,3,0,0,0,0,3,3,3,0,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,3,0,3,3,3,3,3,3,3,3,3,3,3,
|
||||
3,3,2,2,3,3,3,3,3,1,1,1,1,1,1,1,
|
||||
1,3,3,1,1,1,1,1,1,1,1,1,1,1,1,3,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,
|
||||
2,3,2,3,2,3,2,3,3,3,3,3,3,2,3,3,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,
|
||||
3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,
|
||||
3,2,2,3,3,3,1,2,3,2,3,2,3,2,3,2,
|
||||
3,3,3,3,3,3,2,2,3,3,3,3,3,1,3,3,
|
||||
3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,
|
||||
3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,
|
||||
3,3,3,0,1,1,1,1,1,1,3,3,3,0,0,0,
|
||||
0,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,
|
||||
3,3,1,1,3,3,2,0,2,3,3,3,3,3,3,3,
|
||||
3,3,3,1,1,0,0,0,2,3,3,3,3,3,3,3,
|
||||
3,3,3,3,1,3,1,3,1,3,3,3,3,3,3,3,
|
||||
3,3,3,3,1,1,1,1,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,3,3,3,1,3,3,3,3,
|
||||
0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,
|
||||
3,0,0,0,3,3,0,0,2,2,3,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,0,0,0,0
|
||||
};
|
||||
|
||||
static const UCPTrie vo_trie={
|
||||
vo_trieIndex,
|
||||
{ vo_trieData },
|
||||
1100, 828,
|
||||
0x110000, 0x110,
|
||||
1, 2,
|
||||
0, 0,
|
||||
0xc, 0x0,
|
||||
0x0,
|
||||
};
|
||||
|
||||
#endif // INCLUDED_FROM_UPROPS_CPP
|
|
@ -798,7 +798,7 @@ _getKeywords(const char *localeID,
|
|||
}
|
||||
keywordsLen += keywordList[i].keywordLen + 1;
|
||||
if(valuesToo) {
|
||||
if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
|
||||
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
|
||||
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
|
||||
}
|
||||
keywordsLen += keywordList[i].valueLen;
|
||||
|
@ -1133,7 +1133,7 @@ uloc_setKeywordValue(const char* keywordName,
|
|||
keyValuePrefix = ';'; /* for any subsequent key-value pair */
|
||||
updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
|
||||
updatedKeysAndValues.append('=', *status);
|
||||
updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
|
||||
updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
|
||||
}
|
||||
if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
|
||||
/* append new entry at the end, it sorts later than existing entries */
|
||||
|
@ -1500,7 +1500,7 @@ _deleteVariant(char* variants, int32_t variantsLen,
|
|||
}
|
||||
if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
|
||||
(variantsLen == toDeleteLen ||
|
||||
(flag=(variants[toDeleteLen] == '_'))))
|
||||
(flag=(variants[toDeleteLen] == '_')) != 0))
|
||||
{
|
||||
int32_t d = toDeleteLen + (flag?1:0);
|
||||
variantsLen -= d;
|
||||
|
@ -2412,7 +2412,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult
|
|||
/* eat spaces prior to semi */
|
||||
for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
|
||||
;
|
||||
int32_t slen = ((t+1)-s);
|
||||
int32_t slen = static_cast<int32_t>(((t+1)-s));
|
||||
if(slen > ULOC_FULLNAME_CAPACITY) {
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return -1; // too big
|
||||
|
|
|
@ -228,7 +228,7 @@ initFromResourceBundle(UErrorCode& sts) {
|
|||
// a timezone key uses a colon instead of a slash in the resource.
|
||||
// e.g. America:Los_Angeles
|
||||
if (uprv_strchr(legacyTypeId, ':') != NULL) {
|
||||
int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
|
||||
int32_t legacyTypeIdLen = static_cast<int32_t>(uprv_strlen(legacyTypeId));
|
||||
char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
|
||||
if (legacyTypeIdBuf == NULL) {
|
||||
sts = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -320,7 +320,7 @@ initFromResourceBundle(UErrorCode& sts) {
|
|||
if (isTZ) {
|
||||
// replace colon with slash if necessary
|
||||
if (uprv_strchr(from, ':') != NULL) {
|
||||
int32_t fromLen = uprv_strlen(from);
|
||||
int32_t fromLen = static_cast<int32_t>(uprv_strlen(from));
|
||||
char* fromBuf = (char*)uprv_malloc(fromLen + 1);
|
||||
if (fromBuf == NULL) {
|
||||
sts = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -472,7 +472,6 @@ isSpecialTypeRgKeyValue(const char* val) {
|
|||
p++;
|
||||
}
|
||||
return (subtagLen == 6);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
U_CFUNC const char*
|
||||
|
|
|
@ -12,11 +12,13 @@
|
|||
#include "unicode/putil.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "putilimp.h"
|
||||
#include "uinvchar.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uvector.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
|
@ -77,19 +79,34 @@ static const char LOCALE_TYPE_YES[] = "yes";
|
|||
|
||||
#define LANG_UND_LEN 3
|
||||
|
||||
/*
|
||||
Updated on 2018-09-12 from
|
||||
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
||||
|
||||
This table has 2 parts. The parts for Grandfathered tags is generated by the
|
||||
following scripts from the IANA language tag registry.
|
||||
|
||||
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
|
||||
egrep -A 7 'Type: grandfathered' | \
|
||||
egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
|
||||
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
|
||||
tr 'A-Z' 'a-z'
|
||||
|
||||
|
||||
The 2nd part is made of five ICU-specific entries. They're kept for
|
||||
the backward compatibility for now, even though there are no preferred
|
||||
values. They may have to be removed for the strict BCP 47 compliance.
|
||||
|
||||
*/
|
||||
static const char* const GRANDFATHERED[] = {
|
||||
/* grandfathered preferred */
|
||||
"art-lojban", "jbo",
|
||||
"cel-gaulish", "xtg-x-cel-gaulish",
|
||||
"en-GB-oed", "en-GB-x-oed",
|
||||
"en-gb-oed", "en-gb-oxendict",
|
||||
"i-ami", "ami",
|
||||
"i-bnn", "bnn",
|
||||
"i-default", "en-x-i-default",
|
||||
"i-enochian", "und-x-i-enochian",
|
||||
"i-hak", "hak",
|
||||
"i-klingon", "tlh",
|
||||
"i-lux", "lb",
|
||||
"i-mingo", "see-x-i-mingo",
|
||||
"i-navajo", "nv",
|
||||
"i-pwn", "pwn",
|
||||
"i-tao", "tao",
|
||||
|
@ -102,17 +119,175 @@ static const char* const GRANDFATHERED[] = {
|
|||
"sgn-ch-de", "sgg",
|
||||
"zh-guoyu", "cmn",
|
||||
"zh-hakka", "hak",
|
||||
"zh-min", "nan-x-zh-min",
|
||||
"zh-min-nan", "nan",
|
||||
"zh-xiang", "hsn",
|
||||
NULL, NULL
|
||||
|
||||
// Grandfathered tags with no preferred value in the IANA
|
||||
// registry. Kept for now for the backward compatibility
|
||||
// because ICU has mapped them this way.
|
||||
"cel-gaulish", "xtg-x-cel-gaulish",
|
||||
"i-default", "en-x-i-default",
|
||||
"i-enochian", "und-x-i-enochian",
|
||||
"i-mingo", "see-x-i-mingo",
|
||||
"zh-min", "nan-x-zh-min",
|
||||
};
|
||||
|
||||
/*
|
||||
Updated on 2018-09-12 from
|
||||
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
||||
|
||||
The table lists redundant tags with preferred value in the IANA languate tag registry.
|
||||
It's generated with the following command:
|
||||
|
||||
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
|
||||
grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
|
||||
awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
|
||||
tr 'A-Z' 'a-z'
|
||||
|
||||
In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
|
||||
a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
|
||||
*/
|
||||
|
||||
static const char* const REDUNDANT[] = {
|
||||
// redundant preferred
|
||||
"sgn-br", "bzs",
|
||||
"sgn-co", "csn",
|
||||
"sgn-de", "gsg",
|
||||
"sgn-dk", "dsl",
|
||||
"sgn-es", "ssp",
|
||||
"sgn-fr", "fsl",
|
||||
"sgn-gb", "bfi",
|
||||
"sgn-gr", "gss",
|
||||
"sgn-ie", "isg",
|
||||
"sgn-it", "ise",
|
||||
"sgn-jp", "jsl",
|
||||
"sgn-mx", "mfs",
|
||||
"sgn-ni", "ncs",
|
||||
"sgn-nl", "dse",
|
||||
"sgn-no", "nsl",
|
||||
"sgn-pt", "psr",
|
||||
"sgn-se", "swl",
|
||||
"sgn-us", "ase",
|
||||
"sgn-za", "sfs",
|
||||
"zh-cmn", "cmn",
|
||||
"zh-cmn-hans", "cmn-hans",
|
||||
"zh-cmn-hant", "cmn-hant",
|
||||
"zh-gan", "gan",
|
||||
"zh-wuu", "wuu",
|
||||
"zh-yue", "yue",
|
||||
|
||||
// variant tag with preferred value
|
||||
"ja-latn-hepburn-heploc", "ja-latn-alalc97",
|
||||
};
|
||||
|
||||
/*
|
||||
Updated on 2018-09-12 from
|
||||
https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
|
||||
|
||||
grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
|
||||
grep -B1 'Preferred' | grep -v '^--' | \
|
||||
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
|
||||
|
||||
Make sure that 2-letter language subtags come before 3-letter subtags.
|
||||
*/
|
||||
static const char DEPRECATEDLANGS[][4] = {
|
||||
/* deprecated new */
|
||||
"in", "id",
|
||||
"iw", "he",
|
||||
"ji", "yi",
|
||||
"in", "id"
|
||||
"jw", "jv",
|
||||
"mo", "ro",
|
||||
"aam", "aas",
|
||||
"adp", "dz",
|
||||
"aue", "ktz",
|
||||
"ayx", "nun",
|
||||
"bgm", "bcg",
|
||||
"bjd", "drl",
|
||||
"ccq", "rki",
|
||||
"cjr", "mom",
|
||||
"cka", "cmr",
|
||||
"cmk", "xch",
|
||||
"coy", "pij",
|
||||
"cqu", "quh",
|
||||
"drh", "khk",
|
||||
"drw", "prs",
|
||||
"gav", "dev",
|
||||
"gfx", "vaj",
|
||||
"ggn", "gvr",
|
||||
"gti", "nyc",
|
||||
"guv", "duz",
|
||||
"hrr", "jal",
|
||||
"ibi", "opa",
|
||||
"ilw", "gal",
|
||||
"jeg", "oyb",
|
||||
"kgc", "tdf",
|
||||
"kgh", "kml",
|
||||
"koj", "kwv",
|
||||
"krm", "bmf",
|
||||
"ktr", "dtp",
|
||||
"kvs", "gdj",
|
||||
"kwq", "yam",
|
||||
"kxe", "tvd",
|
||||
"kzj", "dtp",
|
||||
"kzt", "dtp",
|
||||
"lii", "raq",
|
||||
"lmm", "rmx",
|
||||
"meg", "cir",
|
||||
"mst", "mry",
|
||||
"mwj", "vaj",
|
||||
"myt", "mry",
|
||||
"nad", "xny",
|
||||
"ncp", "kdz",
|
||||
"nnx", "ngv",
|
||||
"nts", "pij",
|
||||
"oun", "vaj",
|
||||
"pcr", "adx",
|
||||
"pmc", "huw",
|
||||
"pmu", "phr",
|
||||
"ppa", "bfy",
|
||||
"ppr", "lcq",
|
||||
"pry", "prt",
|
||||
"puz", "pub",
|
||||
"sca", "hle",
|
||||
"skk", "oyb",
|
||||
"tdu", "dtp",
|
||||
"thc", "tpo",
|
||||
"thx", "oyb",
|
||||
"tie", "ras",
|
||||
"tkk", "twm",
|
||||
"tlw", "weo",
|
||||
"tmp", "tyj",
|
||||
"tne", "kak",
|
||||
"tnf", "prs",
|
||||
"tsf", "taj",
|
||||
"uok", "ema",
|
||||
"xba", "cax",
|
||||
"xia", "acn",
|
||||
"xkh", "waw",
|
||||
"xsj", "suj",
|
||||
"ybd", "rki",
|
||||
"yma", "lrr",
|
||||
"ymt", "mtm",
|
||||
"yos", "zom",
|
||||
"yuu", "yug",
|
||||
};
|
||||
|
||||
/*
|
||||
Updated on 2018-04-24 from
|
||||
|
||||
curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
|
||||
grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
|
||||
grep -B1 'Preferred' | \
|
||||
awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
|
||||
*/
|
||||
static const char DEPRECATEDREGIONS[][3] = {
|
||||
/* deprecated new */
|
||||
"BU", "MM",
|
||||
"DD", "DE",
|
||||
"FX", "FR",
|
||||
"TP", "TL",
|
||||
"YD", "YE",
|
||||
"ZR", "CD",
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -172,6 +347,46 @@ static const char*
|
|||
ultag_getGrandfathered(const ULanguageTag* langtag);
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Helper class to memory manage CharString objects.
|
||||
// Only ever stack-allocated, does not need to inherit UMemory.
|
||||
class CharStringPool {
|
||||
public:
|
||||
CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
|
||||
~CharStringPool() = default;
|
||||
|
||||
CharStringPool(const CharStringPool&) = delete;
|
||||
CharStringPool& operator=(const CharStringPool&) = delete;
|
||||
|
||||
icu::CharString* create() {
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
icu::CharString* const obj = new icu::CharString;
|
||||
if (obj == nullptr) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
pool.addElement(obj, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete obj;
|
||||
return nullptr;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
private:
|
||||
static void U_CALLCONV deleter(void* obj) {
|
||||
delete static_cast<icu::CharString*>(obj);
|
||||
}
|
||||
|
||||
UErrorCode status;
|
||||
icu::UVector pool;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/*
|
||||
* -------------------------------------------------
|
||||
*
|
||||
|
@ -675,6 +890,11 @@ _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
} else {
|
||||
/* resolve deprecated */
|
||||
for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
|
||||
// 2-letter deprecated subtags are listede before 3-letter
|
||||
// ones in DEPRECATEDLANGS[]. Get out of loop on coming
|
||||
// across the 1st 3-letter subtag, if the input is a 2-letter code.
|
||||
// to avoid continuing to try when there's no match.
|
||||
if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
|
||||
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
|
||||
uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
|
||||
len = (int32_t)uprv_strlen(buf);
|
||||
|
@ -721,7 +941,6 @@ _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
|
|||
*(appendAt + reslen) = SEP;
|
||||
}
|
||||
reslen++;
|
||||
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
|
@ -763,6 +982,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
|
|||
*(appendAt + reslen) = SEP;
|
||||
}
|
||||
reslen++;
|
||||
/* resolve deprecated */
|
||||
for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
|
||||
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
|
||||
uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
|
||||
len = (int32_t)uprv_strlen(buf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
|
||||
|
@ -900,7 +1127,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
|
||||
static int32_t
|
||||
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
|
||||
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
|
||||
int32_t attrBufLength = 0;
|
||||
UEnumeration *keywordEnum = NULL;
|
||||
|
@ -920,22 +1146,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
AttributeListEntry *firstAttr = NULL;
|
||||
AttributeListEntry *attr;
|
||||
char *attrValue;
|
||||
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
|
||||
char *pExtBuf = extBuf;
|
||||
int32_t extBufCapacity = sizeof(extBuf);
|
||||
CharStringPool extBufPool;
|
||||
const char *bcpKey=nullptr, *bcpValue=nullptr;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
int32_t keylen;
|
||||
UBool isBcpUExt;
|
||||
|
||||
while (TRUE) {
|
||||
icu::CharString buf;
|
||||
key = uenum_next(keywordEnum, NULL, status);
|
||||
if (key == NULL) {
|
||||
break;
|
||||
}
|
||||
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
|
||||
/* buf must be null-terminated */
|
||||
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
char* buffer;
|
||||
int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
|
||||
|
||||
for (;;) {
|
||||
buffer = buf.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
break;
|
||||
}
|
||||
|
||||
len = uloc_getKeywordValue(
|
||||
localeID, key, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = len;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -945,6 +1197,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
continue;
|
||||
}
|
||||
|
||||
buf.append(buffer, len, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
keylen = (int32_t)uprv_strlen(key);
|
||||
isBcpUExt = (keylen > 1);
|
||||
|
||||
|
@ -1007,7 +1264,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
|
||||
/* we've checked buf is null-terminated above */
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf);
|
||||
bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
|
||||
if (bcpValue == NULL) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
@ -1015,33 +1272,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
continue;
|
||||
}
|
||||
if (bcpValue == buf) {
|
||||
/*
|
||||
if (bcpValue == buf.data()) {
|
||||
/*
|
||||
When uloc_toUnicodeLocaleType(key, buf) returns the
|
||||
input value as is, the value is well-formed, but has
|
||||
no known mapping. This implementation normalizes the
|
||||
the value to lower case
|
||||
value to lower case
|
||||
*/
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
if (bcpValueLen < extBufCapacity) {
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += (bcpValueLen + 1);
|
||||
extBufCapacity -= (bcpValueLen + 1);
|
||||
} else {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
|
||||
int32_t resultCapacity;
|
||||
char* pExtBuf = extBuf->getAppendBuffer(
|
||||
/*minCapacity=*/bcpValueLen,
|
||||
/*desiredCapacityHint=*/bcpValueLen,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
uprv_strcpy(pExtBuf, bcpValue);
|
||||
T_CString_toLowerCase(pExtBuf);
|
||||
|
||||
extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
} else {
|
||||
if (*key == PRIVATEUSE) {
|
||||
if (!_isPrivateuseValueSubtags(buf, len)) {
|
||||
if (!_isPrivateuseValueSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -1049,7 +1317,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
continue;
|
||||
}
|
||||
} else {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
|
||||
if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
|
||||
if (strict) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
|
@ -1058,20 +1326,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
|
|||
}
|
||||
}
|
||||
bcpKey = key;
|
||||
if ((len + 1) < extBufCapacity) {
|
||||
uprv_memcpy(pExtBuf, buf, len);
|
||||
bcpValue = pExtBuf;
|
||||
|
||||
pExtBuf += len;
|
||||
|
||||
*pExtBuf = 0;
|
||||
pExtBuf++;
|
||||
|
||||
extBufCapacity -= (len + 1);
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
icu::CharString* extBuf = extBufPool.create();
|
||||
if (extBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
break;
|
||||
}
|
||||
extBuf->append(buf.data(), len, tmpStatus);
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
break;
|
||||
}
|
||||
bcpValue = extBuf->data();
|
||||
}
|
||||
|
||||
/* create ExtensionListEntry */
|
||||
|
@ -1242,6 +1507,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
|
|||
attrBufIdx += (len + 1);
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
uprv_free(attr);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
|
@ -1460,9 +1726,9 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
|
|||
kwd->value = pType;
|
||||
|
||||
if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
// duplicate keyword is allowed, Only the first
|
||||
// is honored.
|
||||
uprv_free(kwd);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1836,7 +2102,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
}
|
||||
|
||||
/* check if the tag is grandfathered */
|
||||
for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
|
||||
for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
|
||||
if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
|
||||
int32_t newTagLength;
|
||||
|
||||
|
@ -1858,6 +2124,37 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
}
|
||||
}
|
||||
|
||||
size_t parsedLenDelta = 0;
|
||||
if (grandfatheredLen == 0) {
|
||||
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
|
||||
const char* redundantTag = REDUNDANT[i];
|
||||
size_t redundantTagLen = uprv_strlen(redundantTag);
|
||||
// The preferred tag for a redundant tag is always shorter than redundant
|
||||
// tag. A redundant tag may or may not be followed by other subtags.
|
||||
// (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
|
||||
if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
|
||||
const char* redundantTagEnd = tagBuf + redundantTagLen;
|
||||
if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
|
||||
const char* preferredTag = REDUNDANT[i + 1];
|
||||
size_t preferredTagLen = uprv_strlen(preferredTag);
|
||||
uprv_strncpy(t->buf, preferredTag, preferredTagLen);
|
||||
if (*redundantTagEnd == SEP) {
|
||||
uprv_memmove(tagBuf + preferredTagLen,
|
||||
redundantTagEnd,
|
||||
tagLen - redundantTagLen + 1);
|
||||
} else {
|
||||
tagBuf[preferredTagLen] = '\0';
|
||||
}
|
||||
// parsedLen should be the length of the input
|
||||
// before redundantTag is replaced by preferredTag.
|
||||
// Save the delta to add it back later.
|
||||
parsedLenDelta = redundantTagLen - preferredTagLen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* langtag = language
|
||||
* ["-" script]
|
||||
|
@ -1898,10 +2195,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
if (next & LANG) {
|
||||
if (_isLanguageSubtag(pSubtag, subtagLen)) {
|
||||
*pSep = 0; /* terminate */
|
||||
// TODO: move deprecated language code handling here.
|
||||
t->language = T_CString_toLowerCase(pSubtag);
|
||||
|
||||
pLastGoodPosition = pSep;
|
||||
next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
|
||||
next = SCRT | REGN | VART | EXTS | PRIV;
|
||||
if (subtagLen <= 3)
|
||||
next |= EXTL;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -1942,6 +2242,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
if (next & REGN) {
|
||||
if (_isRegionSubtag(pSubtag, subtagLen)) {
|
||||
*pSep = 0;
|
||||
// TODO: move deprecated region code handling here.
|
||||
t->region = T_CString_toUpperCase(pSubtag);
|
||||
|
||||
pLastGoodPosition = pSep;
|
||||
|
@ -2035,7 +2336,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
}
|
||||
}
|
||||
if (next & PRIV) {
|
||||
if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
|
||||
if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
|
||||
char *pPrivuseVal;
|
||||
|
||||
if (pExtension != NULL) {
|
||||
|
@ -2138,7 +2439,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
}
|
||||
|
||||
if (parsedLen != NULL) {
|
||||
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
|
||||
*parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
|
||||
(int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
|
||||
}
|
||||
|
||||
return t;
|
||||
|
@ -2335,31 +2637,66 @@ uloc_toLanguageTag(const char* localeID,
|
|||
int32_t langtagCapacity,
|
||||
UBool strict,
|
||||
UErrorCode* status) {
|
||||
/* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
|
||||
char canonical[256];
|
||||
int32_t reslen = 0;
|
||||
icu::CharString canonical;
|
||||
int32_t reslen;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
UBool hadPosix = FALSE;
|
||||
const char* pKeywordStart;
|
||||
|
||||
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
|
||||
canonical[0] = 0;
|
||||
if (uprv_strlen(localeID) > 0) {
|
||||
uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
|
||||
if (tmpStatus != U_ZERO_ERROR) {
|
||||
int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
|
||||
if (resultCapacity > 0) {
|
||||
char* buffer;
|
||||
|
||||
for (;;) {
|
||||
buffer = canonical.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
|
||||
reslen =
|
||||
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = reslen;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
canonical.append(buffer, reslen, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
reslen = 0;
|
||||
|
||||
/* For handling special case - private use only tag */
|
||||
pKeywordStart = locale_getKeywordsStart(canonical);
|
||||
if (pKeywordStart == canonical) {
|
||||
pKeywordStart = locale_getKeywordsStart(canonical.data());
|
||||
if (pKeywordStart == canonical.data()) {
|
||||
UEnumeration *kwdEnum;
|
||||
int kwdCnt = 0;
|
||||
UBool done = FALSE;
|
||||
|
||||
kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
|
||||
kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
|
||||
if (kwdEnum != NULL) {
|
||||
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
|
||||
if (kwdCnt == 1) {
|
||||
|
@ -2397,12 +2734,12 @@ uloc_toLanguageTag(const char* localeID,
|
|||
}
|
||||
}
|
||||
|
||||
reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
|
||||
reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
|
||||
reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
|
||||
reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
|
||||
|
||||
return reslen;
|
||||
}
|
||||
|
@ -2414,6 +2751,23 @@ uloc_forLanguageTag(const char* langtag,
|
|||
int32_t localeIDCapacity,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* status) {
|
||||
return ulocimp_forLanguageTag(
|
||||
langtag,
|
||||
-1,
|
||||
localeID,
|
||||
localeIDCapacity,
|
||||
parsedLength,
|
||||
status);
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocimp_forLanguageTag(const char* langtag,
|
||||
int32_t tagLen,
|
||||
char* localeID,
|
||||
int32_t localeIDCapacity,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* status) {
|
||||
ULanguageTag *lt;
|
||||
int32_t reslen = 0;
|
||||
const char *subtag, *p;
|
||||
|
@ -2421,7 +2775,7 @@ uloc_forLanguageTag(const char* langtag,
|
|||
int32_t i, n;
|
||||
UBool noRegion = TRUE;
|
||||
|
||||
lt = ultag_parse(langtag, -1, parsedLength, status);
|
||||
lt = ultag_parse(langtag, tagLen, parsedLength, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -61,6 +61,38 @@ ulocimp_getCountry(const char *localeID,
|
|||
char *country, int32_t countryCapacity,
|
||||
const char **pEnd);
|
||||
|
||||
/**
|
||||
* Returns a locale ID for the specified BCP47 language tag string.
|
||||
* If the specified language tag contains any ill-formed subtags,
|
||||
* the first such subtag and all following subtags are ignored.
|
||||
* <p>
|
||||
* This implements the 'Language-Tag' production of BCP47, and so
|
||||
* supports grandfathered (regular and irregular) as well as private
|
||||
* use language tags. Private use tags are represented as 'x-whatever',
|
||||
* and grandfathered tags are converted to their canonical replacements
|
||||
* where they exist. Note that a few grandfathered tags have no modern
|
||||
* replacement, these will be converted using the fallback described in
|
||||
* the first paragraph, so some information might be lost.
|
||||
* @param langtag the input BCP47 language tag.
|
||||
* @param tagLen the length of langtag, or -1 to call uprv_strlen().
|
||||
* @param localeID the output buffer receiving a locale ID for the
|
||||
* specified BCP47 language tag.
|
||||
* @param localeIDCapacity the size of the locale ID output buffer.
|
||||
* @param parsedLength if not NULL, successfully parsed length
|
||||
* for the input language tag is set.
|
||||
* @param err error information if receiving the locald ID
|
||||
* failed.
|
||||
* @return the length of the locale ID.
|
||||
* @internal ICU 63
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocimp_forLanguageTag(const char* langtag,
|
||||
int32_t tagLen,
|
||||
char* localeID,
|
||||
int32_t localeIDCapacity,
|
||||
int32_t* parsedLength,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Get the region to use for supplemental data lookup. Uses
|
||||
* (1) any region specified by locale tag "rg"; if none then
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "uposixdefs.h"
|
||||
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "udatamem.h"
|
||||
#include "umapfile.h"
|
||||
|
||||
|
@ -64,7 +65,7 @@
|
|||
# include "unicode/udata.h"
|
||||
# define LIB_PREFIX "lib"
|
||||
# define LIB_SUFFIX ".dll"
|
||||
/* This is inconvienient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
|
||||
/* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
|
||||
# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
|
||||
# endif
|
||||
#elif MAP_IMPLEMENTATION==MAP_STDIO
|
||||
|
@ -84,7 +85,10 @@
|
|||
*----------------------------------------------------------------------------*/
|
||||
#if MAP_IMPLEMENTATION==MAP_NONE
|
||||
U_CFUNC UBool
|
||||
uprv_mapFile(UDataMemory *pData, const char *path) {
|
||||
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
UDataMemory_init(pData); /* Clear the output struct. */
|
||||
return FALSE; /* no file access */
|
||||
}
|
||||
|
@ -97,12 +101,17 @@
|
|||
uprv_mapFile(
|
||||
UDataMemory *pData, /* Fill in with info on the result doing the mapping. */
|
||||
/* Output only; any original contents are cleared. */
|
||||
const char *path /* File path to be opened/mapped */
|
||||
const char *path, /* File path to be opened/mapped. */
|
||||
UErrorCode *status /* Error status, used to report out-of-memory errors. */
|
||||
)
|
||||
{
|
||||
HANDLE map;
|
||||
HANDLE file;
|
||||
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UDataMemory_init(pData); /* Clear the output struct. */
|
||||
|
||||
/* open the input file */
|
||||
|
@ -111,28 +120,29 @@
|
|||
OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL);
|
||||
#else
|
||||
// First we need to go from char to UTF-16
|
||||
// u_UCharsToChars could work but it requires length.
|
||||
WCHAR utf16Path[MAX_PATH];
|
||||
int32_t i;
|
||||
for (i = 0; i < UPRV_LENGTHOF(utf16Path); i++)
|
||||
{
|
||||
utf16Path[i] = path[i];
|
||||
if (path[i] == '\0')
|
||||
{
|
||||
break;
|
||||
}
|
||||
// Convert from UTF-8 string to UTF-16 string.
|
||||
wchar_t utf16Path[MAX_PATH];
|
||||
int32_t pathUtf16Len = 0;
|
||||
u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
if (i >= UPRV_LENGTHOF(utf16Path))
|
||||
{
|
||||
// Ran out of room, unlikely but be safe
|
||||
utf16Path[UPRV_LENGTHOF(utf16Path) - 1] = '\0';
|
||||
if (*status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
// Report back an error instead of a warning.
|
||||
*status = U_BUFFER_OVERFLOW_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// TODO: Is it worth setting extended parameters to specify random access?
|
||||
file = CreateFile2(utf16Path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL);
|
||||
#endif
|
||||
if(file==INVALID_HANDLE_VALUE) {
|
||||
if (file == INVALID_HANDLE_VALUE) {
|
||||
// If we failed to open the file due to an out-of-memory error, then we want
|
||||
// to report that error back to the caller.
|
||||
if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -165,7 +175,12 @@
|
|||
map = CreateFileMappingFromApp(file, NULL, PAGE_READONLY, 0, NULL);
|
||||
#endif
|
||||
CloseHandle(file);
|
||||
if(map==NULL) {
|
||||
if (map == NULL) {
|
||||
// If we failed to create the mapping due to an out-of-memory error, then
|
||||
// we want to report that error back to the caller.
|
||||
if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -193,12 +208,16 @@
|
|||
|
||||
#elif MAP_IMPLEMENTATION==MAP_POSIX
|
||||
U_CFUNC UBool
|
||||
uprv_mapFile(UDataMemory *pData, const char *path) {
|
||||
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
|
||||
int fd;
|
||||
int length;
|
||||
struct stat mystat;
|
||||
void *data;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UDataMemory_init(pData); /* Clear the output struct. */
|
||||
|
||||
/* determine the length of the file */
|
||||
|
@ -221,6 +240,7 @@
|
|||
#endif
|
||||
close(fd); /* no longer needed */
|
||||
if(data==MAP_FAILED) {
|
||||
// Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -263,11 +283,15 @@
|
|||
}
|
||||
|
||||
U_CFUNC UBool
|
||||
uprv_mapFile(UDataMemory *pData, const char *path) {
|
||||
uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
|
||||
FILE *file;
|
||||
int32_t fileLength;
|
||||
void *p;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
UDataMemory_init(pData); /* Clear the output struct. */
|
||||
/* open the input file */
|
||||
file=fopen(path, "rb");
|
||||
|
@ -286,6 +310,7 @@
|
|||
p=uprv_malloc(fileLength);
|
||||
if(p==NULL) {
|
||||
fclose(file);
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -351,7 +376,7 @@
|
|||
*
|
||||
* TODO: This works the way ICU historically has, but the
|
||||
* whole data fallback search path is so complicated that
|
||||
* proabably almost no one will ever really understand it,
|
||||
* probably almost no one will ever really understand it,
|
||||
* the potential for confusion is large. (It's not just
|
||||
* this one function, but the whole scheme.)
|
||||
*
|
||||
|
@ -391,7 +416,7 @@
|
|||
|
||||
# define DATA_TYPE "dat"
|
||||
|
||||
U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path) {
|
||||
U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
|
||||
const char *inBasename;
|
||||
char *basename;
|
||||
char pathBuffer[1024];
|
||||
|
@ -399,6 +424,10 @@
|
|||
dllhandle *handle;
|
||||
void *val=0;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
|
||||
if(inBasename==NULL) {
|
||||
inBasename = path;
|
||||
|
@ -430,6 +459,7 @@
|
|||
data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
close(fd); /* no longer needed */
|
||||
if(data==MAP_FAILED) {
|
||||
// Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
|
||||
return FALSE;
|
||||
}
|
||||
pData->map = (char *)data + length;
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "unicode/udata.h"
|
||||
#include "putilimp.h"
|
||||
|
||||
U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path);
|
||||
U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status);
|
||||
U_CFUNC void uprv_unmapFile(UDataMemory *pData);
|
||||
|
||||
/* MAP_NONE: no memory mapping, no file access at all */
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -54,6 +54,21 @@ U_NAMESPACE_END
|
|||
|
||||
#include <atomic>
|
||||
|
||||
// Export an explicit template instantiation of std::atomic<int32_t>.
|
||||
// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
|
||||
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
|
||||
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
|
||||
#if defined(__clang__)
|
||||
// Suppress the warning that the explicit instantiation after explicit specialization has no effect.
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Winstantiation-after-specialization"
|
||||
#endif
|
||||
template struct U_COMMON_API std::atomic<int32_t>;
|
||||
#if defined(__clang__)
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
typedef std::atomic<int32_t> u_atomic_int32_t;
|
||||
|
@ -205,7 +220,7 @@ umtx_atomic_dec(u_atomic_int32_t *p);
|
|||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* Low Level Atomic Ops Platfrom Chain */
|
||||
#endif /* Low Level Atomic Ops Platform Chain */
|
||||
|
||||
|
||||
|
||||
|
@ -319,7 +334,7 @@ U_NAMESPACE_END
|
|||
*************************************************************************************************/
|
||||
|
||||
#if defined(U_USER_MUTEX_H)
|
||||
// #inlcude "U_USER_MUTEX_H"
|
||||
// #include "U_USER_MUTEX_H"
|
||||
#include U_MUTEX_XSTR(U_USER_MUTEX_H)
|
||||
|
||||
#elif U_PLATFORM_USES_ONLY_WIN32_API
|
||||
|
@ -389,7 +404,7 @@ struct UConditionVar {
|
|||
#else
|
||||
|
||||
/*
|
||||
* Unknow platform type.
|
||||
* Unknown platform type.
|
||||
* This is an error condition. ICU requires mutexes.
|
||||
*/
|
||||
|
||||
|
@ -401,7 +416,7 @@ struct UConditionVar {
|
|||
|
||||
/**************************************************************************************
|
||||
*
|
||||
* Mutex Implementation function declaratations.
|
||||
* Mutex Implementation function declarations.
|
||||
* Declarations are platform neutral.
|
||||
* Implementations, in umutex.cpp, are platform specific.
|
||||
*
|
||||
|
|
|
@ -466,7 +466,7 @@ static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
|
|||
buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
|
||||
}
|
||||
buffer += ndigits;
|
||||
length += ndigits;
|
||||
length += static_cast<uint16_t>(ndigits);
|
||||
WRITE_CHAR(buffer, bufferLength, length, '>');
|
||||
|
||||
return length;
|
||||
|
|
|
@ -237,13 +237,12 @@ class StringByteSink : public ByteSink {
|
|||
* @stable ICU 4.2
|
||||
*/
|
||||
StringByteSink(StringClass* dest) : dest_(dest) { }
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
|
||||
*
|
||||
* @param dest pointer to string object to append to
|
||||
* @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
|
||||
if (initialAppendCapacity > 0 &&
|
||||
|
@ -251,7 +250,6 @@ class StringByteSink : public ByteSink {
|
|||
dest->reserve(dest->length() + initialAppendCapacity);
|
||||
}
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Append "bytes[0,n-1]" to this.
|
||||
* @param data the pointer to the bytes
|
||||
|
|
|
@ -194,7 +194,6 @@ public:
|
|||
char16_t *dest, int32_t destCapacity, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
* Casing is locale-dependent and context-sensitive.
|
||||
|
@ -214,7 +213,7 @@ public:
|
|||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToLower
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToLower(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -240,7 +239,7 @@ public:
|
|||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToUpper
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToUpper(
|
||||
const char *locale, uint32_t options,
|
||||
|
@ -280,7 +279,7 @@ public:
|
|||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8ToTitle
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8ToTitle(
|
||||
const char *locale, uint32_t options, BreakIterator *iter,
|
||||
|
@ -311,13 +310,12 @@ public:
|
|||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ucasemap_utf8FoldCase
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static void utf8Fold(
|
||||
uint32_t options,
|
||||
StringPiece src, ByteSink &sink, Edits *edits,
|
||||
UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Lowercases a UTF-8 string and optionally records edits.
|
||||
|
|
|
@ -28,6 +28,8 @@ U_NAMESPACE_BEGIN
|
|||
// Use the predefined value.
|
||||
#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
|
||||
# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
|
||||
#elif defined(U_IN_DOXYGEN)
|
||||
# define U_ALIASING_BARRIER(ptr)
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -103,6 +105,7 @@ private:
|
|||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
|
||||
|
@ -134,6 +137,7 @@ Char16Ptr::~Char16Ptr() {}
|
|||
char16_t *Char16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
|
||||
|
@ -209,6 +213,7 @@ private:
|
|||
#endif
|
||||
};
|
||||
|
||||
/// \cond
|
||||
#ifdef U_ALIASING_BARRIER
|
||||
|
||||
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
|
||||
|
@ -240,6 +245,7 @@ ConstChar16Ptr::~ConstChar16Ptr() {}
|
|||
const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
|
||||
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Converts from const char16_t * to const UChar *.
|
||||
|
|
|
@ -88,6 +88,11 @@
|
|||
* <td>icu::UnicodeSet</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Maps from Unicode Code Points to Integer Values</td>
|
||||
* <td>ucptrie.h, umutablecptrie.h</td>
|
||||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Maps from Strings to Integer Values</td>
|
||||
* <td>(no C API)</td>
|
||||
* <td>icu::BytesTrie, icu::UCharsTrie</td>
|
||||
|
@ -208,9 +213,9 @@
|
|||
* <td>C API</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Layout Engine/Complex Text Layout</td>
|
||||
* <td>loengine.h</td>
|
||||
* <td>icu::LayoutEngine,icu::ParagraphLayout</td>
|
||||
* <td>Paragraph Layout / Complex Text Layout</td>
|
||||
* <td>playout.h</td>
|
||||
* <td>icu::ParagraphLayout</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>ICU I/O</td>
|
||||
|
|
|
@ -24,8 +24,8 @@ class UnicodeString;
|
|||
* in linear progression. Does not support moving/reordering of text.
|
||||
*
|
||||
* There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
|
||||
* instances of this class using {@link #addReplace(int, int)} (for change edits) and
|
||||
* {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity,
|
||||
* instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
|
||||
* {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
|
||||
* whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
|
||||
* mapping between code points in the source and destination strings.
|
||||
*
|
||||
|
@ -62,11 +62,11 @@ class UnicodeString;
|
|||
* </ul>
|
||||
*
|
||||
* The "fine changes" and "coarse changes" iterators will step through only the change edits when their
|
||||
* {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when
|
||||
* their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)}
|
||||
* `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
|
||||
* their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
|
||||
* methods are used to walk through the string.
|
||||
*
|
||||
* For examples of how to use this class, see the test <code>TestCaseMapEditsIteratorDocs</code> in
|
||||
* For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
|
||||
* UCharacterCaseTest.java.
|
||||
*
|
||||
* An Edits object tracks a separate UErrorCode, but ICU string transformation functions
|
||||
|
@ -86,7 +86,7 @@ public:
|
|||
/**
|
||||
* Copy constructor.
|
||||
* @param other source edits
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits(const Edits &other) :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(other.length),
|
||||
|
@ -98,7 +98,7 @@ public:
|
|||
* Move constructor, might leave src empty.
|
||||
* This object will have the same contents that the source object had.
|
||||
* @param src source edits
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits(Edits &&src) U_NOEXCEPT :
|
||||
array(stackArray), capacity(STACK_CAPACITY), length(src.length),
|
||||
|
@ -117,7 +117,7 @@ public:
|
|||
* Assignment operator.
|
||||
* @param other source edits
|
||||
* @return *this
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &operator=(const Edits &other);
|
||||
|
||||
|
@ -127,7 +127,7 @@ public:
|
|||
* The behavior is undefined if *this and src are the same object.
|
||||
* @param src source edits
|
||||
* @return *this
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &operator=(Edits &&src) U_NOEXCEPT;
|
||||
|
||||
|
@ -173,13 +173,11 @@ public:
|
|||
*/
|
||||
UBool hasChanges() const { return numChanges != 0; }
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* @return the number of change edits
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t numberOfChanges() const { return numChanges; }
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Access to the list of edits.
|
||||
|
@ -189,9 +187,9 @@ public:
|
|||
* starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
|
||||
* span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
|
||||
*
|
||||
* The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)},
|
||||
* and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator
|
||||
* to make it point to the corresponding edit.
|
||||
* The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
|
||||
* and `findDestinationIndex(int32_t, UErrorCode &)` methods.
|
||||
* Calling any of these methods mutates the iterator to make it point to the corresponding edit.
|
||||
*
|
||||
* For more information, see the documentation for {@link Edits}.
|
||||
*
|
||||
|
@ -202,7 +200,7 @@ public:
|
|||
struct U_COMMON_API Iterator U_FINAL : public UMemory {
|
||||
/**
|
||||
* Default constructor, empty iterator.
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Iterator() :
|
||||
array(nullptr), index(0), length(0),
|
||||
|
@ -253,7 +251,6 @@ public:
|
|||
return findIndex(i, TRUE, errorCode) == 0;
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Moves the iterator to the edit that contains the destination index.
|
||||
* The destination index may be found in a no-change edit
|
||||
|
@ -271,7 +268,7 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return TRUE if the edit for the destination index was found
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
|
||||
return findIndex(i, FALSE, errorCode) == 0;
|
||||
|
@ -297,7 +294,7 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return destination index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
|
||||
|
||||
|
@ -321,10 +318,9 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return source index; undefined if i is not 0..string length
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns whether the edit currently represented by the iterator is a change edit.
|
||||
|
@ -366,13 +362,13 @@ public:
|
|||
/**
|
||||
* The start index of the current span in the replacement string; the span has length
|
||||
* {@link #newLength}. Well-defined only if the current edit is a change edit.
|
||||
* <p>
|
||||
* The <em>replacement string</em> is the concatenation of all substrings of the destination
|
||||
*
|
||||
* The *replacement string* is the concatenation of all substrings of the destination
|
||||
* string corresponding to change edits.
|
||||
* <p>
|
||||
*
|
||||
* This method is intended to be used together with operations that write only replacement
|
||||
* characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified
|
||||
* in-place.
|
||||
* characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
|
||||
* The source string can then be modified in-place.
|
||||
*
|
||||
* @return the current index into the replacement-characters-only string,
|
||||
* not counting unchanged spans
|
||||
|
@ -475,7 +471,6 @@ public:
|
|||
return Iterator(array, length, FALSE, FALSE);
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Merges the two input Edits and appends the result to this object.
|
||||
*
|
||||
|
@ -501,10 +496,9 @@ public:
|
|||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return *this, with the merged edits appended
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
private:
|
||||
void releaseArray() U_NOEXCEPT;
|
||||
|
|
|
@ -28,6 +28,7 @@ U_NAMESPACE_BEGIN
|
|||
* enum bitset for boolean fields. Similar to Java EnumSet<>.
|
||||
* Needs to range check. Used for private instance variables.
|
||||
* @internal
|
||||
* \cond
|
||||
*/
|
||||
template<typename T, uint32_t minValue, uint32_t limitValue>
|
||||
class EnumSet {
|
||||
|
@ -60,6 +61,8 @@ private:
|
|||
uint32_t fBools;
|
||||
};
|
||||
|
||||
/** \endcond */
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
|
|
@ -67,16 +67,14 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Construct an empty FilteredBreakIteratorBuilder.
|
||||
* In this state, it will not suppress any segment boundaries.
|
||||
* @param status The error code.
|
||||
* @return the new builder
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Suppress a certain string from being the end of a segment.
|
||||
|
@ -95,7 +93,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
* This function does not create any new segment boundaries, but only serves to un-do
|
||||
* the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
|
||||
* locale data which may be suppressing certain strings.
|
||||
* @param exception the exception to remove
|
||||
* @param string the exception to remove
|
||||
* @param status error code
|
||||
* @return returns TRUE if the string was present and now removed,
|
||||
* FALSE if the call was a no-op because the string was not being suppressed.
|
||||
|
@ -114,7 +112,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
*/
|
||||
virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Wrap (adopt) an existing break iterator in a new filtered instance.
|
||||
* The resulting BreakIterator is owned by the caller.
|
||||
|
@ -126,12 +123,11 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
|
|||
* @param adoptBreakIterator the break iterator to adopt
|
||||
* @param status error code
|
||||
* @return the new BreakIterator, owned by the caller.
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
|
||||
return build(adoptBreakIterator, status);
|
||||
}
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
protected:
|
||||
/**
|
||||
|
|
|
@ -110,7 +110,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
#if UCONFIG_ENABLE_PLUGINS
|
||||
#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -31,6 +31,10 @@
|
|||
#ifndef LOCID_H
|
||||
#define LOCID_H
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/strenum.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/putil.h"
|
||||
|
@ -280,6 +284,16 @@ public:
|
|||
*/
|
||||
Locale(const Locale& other);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Move constructor; might leave source in bogus state.
|
||||
* This locale will have the same contents that the source locale had.
|
||||
*
|
||||
* @param other The Locale object being moved in.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
Locale(Locale&& other) U_NOEXCEPT;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
|
@ -296,6 +310,19 @@ public:
|
|||
*/
|
||||
Locale& operator=(const Locale& other);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Move assignment operator; might leave source in bogus state.
|
||||
* This locale will have the same contents that the source locale had.
|
||||
* The behavior is undefined if *this and the source are the same object.
|
||||
*
|
||||
* @param other The Locale object being moved in.
|
||||
* @return *this
|
||||
* @draft ICU 63
|
||||
*/
|
||||
Locale& operator=(Locale&& other) U_NOEXCEPT;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Checks if two locale keys are the same.
|
||||
*
|
||||
|
@ -362,6 +389,55 @@ public:
|
|||
UErrorCode& success);
|
||||
#endif /* U_HIDE_SYSTEM_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Returns a Locale for the specified BCP47 language tag string.
|
||||
* If the specified language tag contains any ill-formed subtags,
|
||||
* the first such subtag and all following subtags are ignored.
|
||||
* <p>
|
||||
* This implements the 'Language-Tag' production of BCP47, and so
|
||||
* supports grandfathered (regular and irregular) as well as private
|
||||
* use language tags. Private use tags are represented as 'x-whatever',
|
||||
* and grandfathered tags are converted to their canonical replacements
|
||||
* where they exist. Note that a few grandfathered tags have no modern
|
||||
* replacement, these will be converted using the fallback described in
|
||||
* the first paragraph, so some information might be lost.
|
||||
* @param tag the input BCP47 language tag.
|
||||
* @param status error information if creating the Locale failed.
|
||||
* @return the Locale for the specified BCP47 language tag.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns a well-formed language tag for this Locale.
|
||||
* <p>
|
||||
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
|
||||
* requirement will be silently omitted from the result.
|
||||
*
|
||||
* If this function fails, partial output may have been written to the sink.
|
||||
*
|
||||
* @param sink the output sink receiving the BCP47 language
|
||||
* tag for this Locale.
|
||||
* @param status error information if creating the language tag failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Returns a well-formed language tag for this Locale.
|
||||
* <p>
|
||||
* <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
|
||||
* requirement will be silently omitted from the result.
|
||||
*
|
||||
* @param status error information if creating the language tag failed.
|
||||
* @return the BCP47 language tag for this Locale.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass>
|
||||
inline StringClass toLanguageTag(UErrorCode& status) const;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Creates a locale which has had minimal canonicalization
|
||||
* as per uloc_getName().
|
||||
|
@ -432,6 +508,69 @@ public:
|
|||
*/
|
||||
const char * getBaseName() const;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Add the likely subtags for this Locale, per the algorithm described
|
||||
* in the following CLDR technical report:
|
||||
*
|
||||
* http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
*
|
||||
* If this Locale is already in the maximal form, or not valid, or there is
|
||||
* no data available for maximization, the Locale will be unchanged.
|
||||
*
|
||||
* For example, "und-Zzzz" cannot be maximized, since there is no
|
||||
* reasonable maximization.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* "en" maximizes to "en_Latn_US"
|
||||
*
|
||||
* "de" maximizes to "de_Latn_US"
|
||||
*
|
||||
* "sr" maximizes to "sr_Cyrl_RS"
|
||||
*
|
||||
* "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
|
||||
*
|
||||
* "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
|
||||
*
|
||||
* @param status error information if maximizing this Locale failed.
|
||||
* If this Locale is not well-formed, the error code is
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void addLikelySubtags(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Minimize the subtags for this Locale, per the algorithm described
|
||||
* in the following CLDR technical report:
|
||||
*
|
||||
* http://www.unicode.org/reports/tr35/#Likely_Subtags
|
||||
*
|
||||
* If this Locale is already in the minimal form, or not valid, or there is
|
||||
* no data available for minimization, the Locale will be unchanged.
|
||||
*
|
||||
* Since the minimization algorithm relies on proper maximization, see the
|
||||
* comments for addLikelySubtags for reasons why there might not be any
|
||||
* data.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* "en_Latn_US" minimizes to "en"
|
||||
*
|
||||
* "de_Latn_US" minimizes to "de"
|
||||
*
|
||||
* "sr_Cyrl_RS" minimizes to "sr"
|
||||
*
|
||||
* "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
|
||||
* script, and minimizing to "zh" would imply "zh_Hans_CN".)
|
||||
*
|
||||
* @param status error information if maximizing this Locale failed.
|
||||
* If this Locale is not well-formed, the error code is
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void minimizeSubtags(UErrorCode& status);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
|
@ -439,13 +578,62 @@ public:
|
|||
* @param status the status code
|
||||
* @return pointer to StringEnumeration class, or NULL if there are no keywords.
|
||||
* Client must dispose of it by calling delete.
|
||||
* @see getKeywords
|
||||
* @stable ICU 2.8
|
||||
*/
|
||||
StringEnumeration * createKeywords(UErrorCode &status) const;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the list of Unicode keywords for the specified locale.
|
||||
*
|
||||
* @param status the status code
|
||||
* @return pointer to StringEnumeration class, or NULL if there are no keywords.
|
||||
* Client must dispose of it by calling delete.
|
||||
* @see getUnicodeKeywords
|
||||
* @draft ICU 63
|
||||
*/
|
||||
StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
|
||||
|
||||
/**
|
||||
* Gets the set of keywords for this Locale.
|
||||
*
|
||||
* A wrapper to call createKeywords() and write the resulting
|
||||
* keywords as standard strings (or compatible objects) into any kind of
|
||||
* container that can be written to by an STL style output iterator.
|
||||
*
|
||||
* @param iterator an STL style output iterator to write the keywords to.
|
||||
* @param status error information if creating set of keywords failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass, typename OutputIterator>
|
||||
inline void getKeywords(OutputIterator iterator, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Gets the set of Unicode keywords for this Locale.
|
||||
*
|
||||
* A wrapper to call createUnicodeKeywords() and write the resulting
|
||||
* keywords as standard strings (or compatible objects) into any kind of
|
||||
* container that can be written to by an STL style output iterator.
|
||||
*
|
||||
* @param iterator an STL style output iterator to write the keywords to.
|
||||
* @param status error information if creating set of keywords failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass, typename OutputIterator>
|
||||
inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const;
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the value for a keyword.
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword for which we want the value. Case insensitive.
|
||||
* @param buffer The buffer to receive the keyword value.
|
||||
* @param bufferCapacity The capacity of receiving buffer
|
||||
|
@ -456,12 +644,81 @@ public:
|
|||
*/
|
||||
int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Gets the value for a keyword.
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword for which we want the value.
|
||||
* @param sink the sink to receive the keyword value.
|
||||
* @param status error information if getting the value failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Gets the value for a keyword.
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword for which we want the value.
|
||||
* @param status error information if getting the value failed.
|
||||
* @return the keyword value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass>
|
||||
inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Gets the Unicode value for a Unicode keyword.
|
||||
*
|
||||
* This uses Unicode key-value pairs, like "co-phonebk".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword for which we want the value.
|
||||
* @param sink the sink to receive the keyword value.
|
||||
* @param status error information if getting the value failed.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Gets the Unicode value for a Unicode keyword.
|
||||
*
|
||||
* This uses Unicode key-value pairs, like "co-phonebk".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword for which we want the value.
|
||||
* @param status error information if getting the value failed.
|
||||
* @return the keyword value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
template<typename StringClass>
|
||||
inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Sets or removes the value for a keyword.
|
||||
*
|
||||
* For removing all keywords, use getBaseName(),
|
||||
* and construct a new Locale if it differs from getName().
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword to be set. Case insensitive.
|
||||
* @param keywordValue value of the keyword to be set. If 0-length or
|
||||
* NULL, will result in the keyword being removed. No error is given if
|
||||
|
@ -472,6 +729,48 @@ public:
|
|||
*/
|
||||
void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Sets or removes the value for a keyword.
|
||||
*
|
||||
* For removing all keywords, use getBaseName(),
|
||||
* and construct a new Locale if it differs from getName().
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword to be set.
|
||||
* @param keywordValue value of the keyword to be set. If 0-length or
|
||||
* NULL, will result in the keyword being removed. No error is given if
|
||||
* that keyword does not exist.
|
||||
* @param status Returns any error information while performing this operation.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Sets or removes the Unicode value for a Unicode keyword.
|
||||
*
|
||||
* For removing all keywords, use getBaseName(),
|
||||
* and construct a new Locale if it differs from getName().
|
||||
*
|
||||
* This uses Unicode key-value pairs, like "co-phonebk".
|
||||
*
|
||||
* ICU4C doesn't do automatic conversion between legacy and Unicode
|
||||
* keywords and values in getters and setters (as opposed to ICU4J).
|
||||
*
|
||||
* @param keywordName name of the keyword to be set.
|
||||
* @param keywordValue value of the keyword to be set. If 0-length or
|
||||
* NULL, will result in the keyword being removed. No error is given if
|
||||
* that keyword does not exist.
|
||||
* @param status Returns any error information while performing this operation.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* returns the locale's three-letter language code, as specified
|
||||
* in ISO draft standard ISO-639-2.
|
||||
|
@ -759,12 +1058,12 @@ private:
|
|||
|
||||
/**
|
||||
* A friend to allow the default locale to be set by either the C or C++ API.
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
friend void U_CALLCONV locale_available_init();
|
||||
};
|
||||
|
@ -775,6 +1074,17 @@ Locale::operator!=(const Locale& other) const
|
|||
return !operator==(other);
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
template<typename StringClass> inline StringClass
|
||||
Locale::toLanguageTag(UErrorCode& status) const
|
||||
{
|
||||
StringClass result;
|
||||
StringByteSink<StringClass> sink(&result);
|
||||
toLanguageTag(sink, status);
|
||||
return result;
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
inline const char *
|
||||
Locale::getCountry() const
|
||||
{
|
||||
|
@ -805,6 +1115,62 @@ Locale::getName() const
|
|||
return fullName;
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
template<typename StringClass, typename OutputIterator> inline void
|
||||
Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
|
||||
{
|
||||
LocalPointer<StringEnumeration> keys(createKeywords(status));
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
for (;;) {
|
||||
int32_t resultLength;
|
||||
const char* buffer = keys->next(&resultLength, status);
|
||||
if (U_FAILURE(status) || buffer == nullptr) {
|
||||
return;
|
||||
}
|
||||
*iterator++ = StringClass(buffer, resultLength);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename StringClass, typename OutputIterator> inline void
|
||||
Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
|
||||
{
|
||||
LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status));
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
for (;;) {
|
||||
int32_t resultLength;
|
||||
const char* buffer = keys->next(&resultLength, status);
|
||||
if (U_FAILURE(status) || buffer == nullptr) {
|
||||
return;
|
||||
}
|
||||
*iterator++ = StringClass(buffer, resultLength);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename StringClass> inline StringClass
|
||||
Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
|
||||
{
|
||||
StringClass result;
|
||||
StringByteSink<StringClass> sink(&result);
|
||||
getKeywordValue(keywordName, sink, status);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename StringClass> inline StringClass
|
||||
Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const
|
||||
{
|
||||
StringClass result;
|
||||
StringByteSink<StringClass> sink(&result);
|
||||
getUnicodeKeywordValue(keywordName, sink, status);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
inline UBool
|
||||
Locale::isBogus(void) const {
|
||||
return fIsBogus;
|
||||
|
|
|
@ -771,8 +771,8 @@ public:
|
|||
* @stable ICU 4.8
|
||||
*/
|
||||
UMessagePatternArgType getArgType() const {
|
||||
UMessagePatternPartType type=getType();
|
||||
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
UMessagePatternPartType msgType=getType();
|
||||
if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
return (UMessagePatternArgType)value;
|
||||
} else {
|
||||
return UMSGPAT_ARG_TYPE_NONE;
|
||||
|
|
|
@ -241,7 +241,7 @@ public:
|
|||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
|
@ -391,7 +391,7 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return TRUE if s is normalized
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
|
||||
|
@ -559,7 +559,7 @@ public:
|
|||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
|
@ -686,7 +686,7 @@ public:
|
|||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return TRUE if s is normalized
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
* and/or from other macros that are predefined by the compiler
|
||||
* or defined in standard (POSIX or platform or compiler) headers.
|
||||
*
|
||||
* As a temporary workaround, you can add an explicit <code>#define</code> for some macros
|
||||
* As a temporary workaround, you can add an explicit \#define for some macros
|
||||
* before it is first tested, or add an equivalent -D macro definition
|
||||
* to the compiler's command line.
|
||||
*
|
||||
|
@ -207,6 +207,9 @@
|
|||
# define CYGWINMSVC
|
||||
#endif
|
||||
*/
|
||||
#ifdef U_IN_DOXYGEN
|
||||
# define CYGWINMSVC
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def U_PLATFORM_USES_ONLY_WIN32_API
|
||||
|
@ -417,6 +420,9 @@
|
|||
#ifndef __has_cpp_attribute
|
||||
# define __has_cpp_attribute(x) 0
|
||||
#endif
|
||||
#ifndef __has_declspec_attribute
|
||||
# define __has_declspec_attribute(x) 0
|
||||
#endif
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
@ -493,13 +499,8 @@ namespace std {
|
|||
*/
|
||||
#ifdef U_NOEXCEPT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS /* Visual Studio */
|
||||
# define U_NOEXCEPT
|
||||
#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_noexcept) || __has_extension(cxx_noexcept) \
|
||||
|| (defined(_MSC_VER) && _MSC_VER >= 1900) /* Visual Studio 2015 */
|
||||
# define U_NOEXCEPT noexcept
|
||||
#else
|
||||
# define U_NOEXCEPT
|
||||
# define U_NOEXCEPT noexcept
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -519,6 +520,8 @@ namespace std {
|
|||
(__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough"))
|
||||
# define U_FALLTHROUGH [[clang::fallthrough]]
|
||||
# endif
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 7)
|
||||
# define U_FALLTHROUGH __attribute__((fallthrough))
|
||||
#endif
|
||||
|
||||
#ifndef U_FALLTHROUGH
|
||||
|
@ -763,7 +766,8 @@ namespace std {
|
|||
#elif U_HAVE_CHAR16_T \
|
||||
|| (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|
||||
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
|
||||
|| (defined(__HP_cc) && __HP_cc >= 111106)
|
||||
|| (defined(__HP_cc) && __HP_cc >= 111106) \
|
||||
|| (defined(U_IN_DOXYGEN))
|
||||
# define U_DECLARE_UTF16(string) u ## string
|
||||
#elif U_SIZEOF_WCHAR_T == 2 \
|
||||
&& (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
|
||||
|
@ -782,6 +786,8 @@ namespace std {
|
|||
/* Use the predefined value. */
|
||||
#elif defined(U_STATIC_IMPLEMENTATION)
|
||||
# define U_EXPORT
|
||||
#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport))
|
||||
# define U_EXPORT __declspec(dllexport)
|
||||
#elif defined(__GNUC__)
|
||||
# define U_EXPORT __attribute__((visibility("default")))
|
||||
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|
||||
|
@ -789,8 +795,6 @@ namespace std {
|
|||
# define U_EXPORT __global
|
||||
/*#elif defined(__HP_aCC) || defined(__HP_cc)
|
||||
# define U_EXPORT __declspec(dllexport)*/
|
||||
#elif defined(_MSC_VER)
|
||||
# define U_EXPORT __declspec(dllexport)
|
||||
#else
|
||||
# define U_EXPORT
|
||||
#endif
|
||||
|
@ -806,7 +810,7 @@ namespace std {
|
|||
|
||||
#ifdef U_IMPORT
|
||||
/* Use the predefined value. */
|
||||
#elif defined(_MSC_VER)
|
||||
#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport))
|
||||
/* Windows needs to export/import data. */
|
||||
# define U_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
|
|
|
@ -83,6 +83,7 @@ typedef unsigned char uint8_t;
|
|||
|
||||
#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
|
||||
|
||||
/// \cond
|
||||
#if ! U_HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
@ -122,6 +123,7 @@ typedef unsigned int uint32_t;
|
|||
typedef unsigned long long uint64_t;
|
||||
#endif
|
||||
#endif
|
||||
/// \endcond
|
||||
|
||||
#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ private:
|
|||
* If present, UStack of LanguageBreakEngine objects that might handle
|
||||
* dictionary characters. Searched from top to bottom to find an object to
|
||||
* handle a given character.
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
UStack *fLanguageBreakEngines;
|
||||
|
||||
|
@ -108,14 +108,14 @@ private:
|
|||
* If present, the special LanguageBreakEngine used for handling
|
||||
* characters that are in the dictionary set, but not handled by any
|
||||
* LangugageBreakEngine.
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
UnhandledEngine *fUnhandledBreakEngine;
|
||||
|
||||
/**
|
||||
* Counter for the number of characters encountered with the "dictionary"
|
||||
* flag set.
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
uint32_t fDictionaryCharCount;
|
||||
|
||||
|
@ -150,7 +150,7 @@ private:
|
|||
*
|
||||
* The break iterator adopts the memory, and will
|
||||
* free it when done.
|
||||
* @internal
|
||||
* @internal (private)
|
||||
*/
|
||||
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
|
||||
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
*/
|
||||
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Titlecase the string as a whole rather than each word.
|
||||
* (Titlecase only the character at index 0, possibly adjusted.)
|
||||
|
@ -50,7 +48,7 @@
|
|||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_WHOLE_STRING 0x20
|
||||
|
||||
|
@ -63,12 +61,10 @@
|
|||
* including both an options bit and an explicit BreakIterator.
|
||||
*
|
||||
* @see U_TITLECASE_ADJUST_TO_CASED
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_SENTENCES 0x40
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Do not lowercase non-initial parts of words when titlecasing.
|
||||
* Option bit for titlecasing APIs that take an options bit set.
|
||||
|
@ -112,8 +108,6 @@
|
|||
*/
|
||||
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Adjust each titlecasing BreakIterator index to the next cased character.
|
||||
* (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
|
||||
|
@ -130,7 +124,7 @@
|
|||
* It is an error to specify multiple titlecasing adjustment options together.
|
||||
*
|
||||
* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_TITLECASE_ADJUST_TO_CASED 0x400
|
||||
|
||||
|
@ -141,7 +135,7 @@
|
|||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_EDITS_NO_RESET 0x2000
|
||||
|
||||
|
@ -153,12 +147,10 @@
|
|||
* @see CaseMap
|
||||
* @see Edits
|
||||
* @see Normalizer2
|
||||
* @draft ICU 60
|
||||
* @stable ICU 60
|
||||
*/
|
||||
#define U_OMIT_UNCHANGED_TEXT 0x4000
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
|
||||
* Compare strings in code point order instead of code unit order.
|
||||
|
|
|
@ -26,8 +26,10 @@
|
|||
*/
|
||||
|
||||
// Forward declaration.
|
||||
/// \cond
|
||||
struct UHashtable;
|
||||
typedef struct UHashtable UHashtable;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* Build options for BytesTrieBuilder and CharsTrieBuilder.
|
||||
|
@ -64,7 +66,7 @@ class U_COMMON_API StringTrieBuilder : public UObject {
|
|||
public:
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** @internal */
|
||||
static UBool hashNode(const void *node);
|
||||
static int32_t hashNode(const void *node);
|
||||
/** @internal */
|
||||
static UBool equalNodes(const void *left, const void *right);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
@ -188,7 +190,10 @@ protected:
|
|||
|
||||
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
|
||||
// it is needed for layout of other objects.
|
||||
/** @internal */
|
||||
/**
|
||||
* @internal
|
||||
* \cond
|
||||
*/
|
||||
class Node : public UObject {
|
||||
public:
|
||||
Node(int32_t initialHash) : hash(initialHash), offset(0) {}
|
||||
|
@ -391,7 +396,9 @@ protected:
|
|||
int32_t length;
|
||||
Node *next; // A branch sub-node.
|
||||
};
|
||||
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
/// \endcond
|
||||
|
||||
/** @internal */
|
||||
virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
|
||||
|
|
|
@ -323,6 +323,10 @@
|
|||
* these special values are designed that way. Also, the implementation
|
||||
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
|
||||
*
|
||||
* Note: The numeric values of the related constants will not change:
|
||||
* They are tied to the use of 7-bit byte values (plus the override bit)
|
||||
* and of the UBiDiLevel=uint8_t data type in this API.
|
||||
*
|
||||
* @see UBIDI_DEFAULT_LTR
|
||||
* @see UBIDI_DEFAULT_RTL
|
||||
* @see UBIDI_LEVEL_OVERRIDE
|
||||
|
@ -386,6 +390,8 @@ typedef uint8_t UBiDiLevel;
|
|||
|
||||
/**
|
||||
* Maximum explicit embedding level.
|
||||
* Same as the max_depth value in the
|
||||
* <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
|
||||
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -1996,7 +2002,7 @@ U_CDECL_BEGIN
|
|||
*
|
||||
* @return The directional property / Bidi class for the given code point
|
||||
* <code>c</code> if the default class has been overridden, or
|
||||
* <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
|
||||
* <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
|
||||
* if the standard Bidi class value for <code>c</code> is to be used.
|
||||
* @see ubidi_setClassCallback
|
||||
* @see ubidi_getClassCallback
|
||||
|
@ -2010,7 +2016,7 @@ U_CDECL_END
|
|||
/**
|
||||
* Retrieve the Bidi class for a given code point.
|
||||
* <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
|
||||
* value other than <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
|
||||
* value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
|
||||
* that value is used; otherwise the default class determination mechanism is invoked.</p>
|
||||
*
|
||||
* @param pBiDi is the paragraph <code>UBiDi</code> object.
|
||||
|
|
|
@ -26,33 +26,38 @@
|
|||
/**
|
||||
* \file
|
||||
* \brief Bidi Transformations
|
||||
*/
|
||||
|
||||
/**
|
||||
* `UBiDiOrder` indicates the order of text.
|
||||
*
|
||||
* <code>UBiDiOrder</code> indicates the order of text.<p>
|
||||
* This bidi transformation engine supports all possible combinations (4 in
|
||||
* total) of input and output text order:
|
||||
* <ul>
|
||||
* <li><logical input, visual output>: unless the output direction is RTL, this
|
||||
* corresponds to a normal operation of the Bidi algorithm as described in the
|
||||
* Unicode Technical Report and implemented by <code>UBiDi</code> when the
|
||||
* reordering mode is set to <code>UBIDI_REORDER_DEFAULT</code>. Visual RTL
|
||||
* mode is not supported by <code>UBiDi</code> and is accomplished through
|
||||
* reversing a visual LTR string,</li>
|
||||
* <li><visual input, logical output>: unless the input direction is RTL, this
|
||||
* corresponds to an "inverse bidi algorithm" in <code>UBiDi</code> with the
|
||||
* reordering mode set to <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>.
|
||||
* Visual RTL mode is not not supported by <code>UBiDi</code> and is
|
||||
* accomplished through reversing a visual LTR string,</li>
|
||||
* <li><logical input, logical output>: if the input and output base directions
|
||||
* mismatch, this corresponds to the <code>UBiDi</code> implementation with the
|
||||
* reordering mode set to <code>UBIDI_REORDER_RUNS_ONLY</code>; and if the
|
||||
* input and output base directions are identical, the transformation engine
|
||||
* will only handle character mirroring and Arabic shaping operations without
|
||||
* reordering,</li>
|
||||
* <li><visual input, visual output>: this reordering mode is not supported by
|
||||
* the <code>UBiDi</code> engine; it implies character mirroring, Arabic
|
||||
* shaping, and - if the input/output base directions mismatch - string
|
||||
* reverse operations.</li>
|
||||
* </ul>
|
||||
*
|
||||
* - <logical input, visual output>: unless the output direction is RTL, this
|
||||
* corresponds to a normal operation of the Bidi algorithm as described in the
|
||||
* Unicode Technical Report and implemented by `UBiDi` when the
|
||||
* reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
|
||||
* mode is not supported by `UBiDi` and is accomplished through
|
||||
* reversing a visual LTR string,
|
||||
*
|
||||
* - <visual input, logical output>: unless the input direction is RTL, this
|
||||
* corresponds to an "inverse bidi algorithm" in `UBiDi` with the
|
||||
* reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
|
||||
* Visual RTL mode is not not supported by `UBiDi` and is
|
||||
* accomplished through reversing a visual LTR string,
|
||||
*
|
||||
* - <logical input, logical output>: if the input and output base directions
|
||||
* mismatch, this corresponds to the `UBiDi` implementation with the
|
||||
* reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
|
||||
* input and output base directions are identical, the transformation engine
|
||||
* will only handle character mirroring and Arabic shaping operations without
|
||||
* reordering,
|
||||
*
|
||||
* - <visual input, visual output>: this reordering mode is not supported by
|
||||
* the `UBiDi` engine; it implies character mirroring, Arabic
|
||||
* shaping, and - if the input/output base directions mismatch - string
|
||||
* reverse operations.
|
||||
* @see ubidi_setInverse
|
||||
* @see ubidi_setReorderingMode
|
||||
* @see UBIDI_REORDER_DEFAULT
|
||||
|
|
|
@ -27,6 +27,24 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/stringoptions.h"
|
||||
#include "unicode/ucpmap.h"
|
||||
|
||||
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
|
||||
|
||||
#define USET_DEFINED
|
||||
|
||||
/**
|
||||
* USet is the C API type corresponding to C++ class UnicodeSet.
|
||||
* It is forward-declared here to avoid including unicode/uset.h file if related
|
||||
* APIs are not used.
|
||||
*
|
||||
* @see ucnv_getUnicodeSet
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
typedef struct USet USet;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
|
@ -61,6 +79,18 @@ U_CDECL_BEGIN
|
|||
* "About the Unicode Character Database" (http://www.unicode.org/ucd/)
|
||||
* and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html).
|
||||
*
|
||||
* Many properties are accessible via generic functions that take a UProperty selector.
|
||||
* - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point.
|
||||
* - u_getIntPropertyValue() returns an integer value per property and code point.
|
||||
* For each supported enumerated or catalog property, there is
|
||||
* an enum type for all of the property's values, and
|
||||
* u_getIntPropertyValue() returns the numeric values of those constants.
|
||||
* - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
|
||||
* all code points for which the property is true.
|
||||
* - u_getIntPropertyMap() returns a map for each
|
||||
* ICU-supported enumerated/catalog/int-valued property which
|
||||
* maps all Unicode code points to their values for that property.
|
||||
*
|
||||
* Many functions are designed to match java.lang.Character functions.
|
||||
* See the individual function documentation,
|
||||
* and see the JDK 1.4 java.lang.Character documentation
|
||||
|
@ -546,12 +576,34 @@ typedef enum UProperty {
|
|||
(http://www.unicode.org/reports/tr9/)
|
||||
Returns UBidiPairedBracketType values. @stable ICU 52 */
|
||||
UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
|
||||
/**
|
||||
* Enumerated property Indic_Positional_Category.
|
||||
* New in Unicode 6.0 as provisional property Indic_Matra_Category;
|
||||
* renamed and changed to informative in Unicode 8.0.
|
||||
* See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
|
||||
/**
|
||||
* Enumerated property Indic_Syllabic_Category.
|
||||
* New in Unicode 6.0 as provisional; informative since Unicode 8.0.
|
||||
* See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
|
||||
/**
|
||||
* Enumerated property Vertical_Orientation.
|
||||
* Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
|
||||
* New as a UCD property in Unicode 10.0.
|
||||
* @stable ICU 63
|
||||
*/
|
||||
UCHAR_VERTICAL_ORIENTATION=0x1018,
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the last constant for enumerated/integer Unicode properties.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UCHAR_INT_LIMIT=0x1016,
|
||||
UCHAR_INT_LIMIT=0x1019,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/** Bitmask property General_Category_Mask.
|
||||
|
@ -2320,6 +2372,161 @@ typedef enum UHangulSyllableType {
|
|||
#endif // U_HIDE_DEPRECATED_API
|
||||
} UHangulSyllableType;
|
||||
|
||||
/**
|
||||
* Indic Positional Category constants.
|
||||
*
|
||||
* @see UCHAR_INDIC_POSITIONAL_CATEGORY
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef enum UIndicPositionalCategory {
|
||||
/*
|
||||
* Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
|
||||
* It matches lines like
|
||||
* U_INPC_<Unicode Indic_Positional_Category value name>
|
||||
*/
|
||||
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_NA,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_BOTTOM,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_BOTTOM_AND_LEFT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_BOTTOM_AND_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_LEFT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_LEFT_AND_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_OVERSTRUCK,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP_AND_BOTTOM,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP_AND_LEFT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP_AND_LEFT_AND_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_TOP_AND_RIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_INPC_VISUAL_ORDER_LEFT,
|
||||
} UIndicPositionalCategory;
|
||||
|
||||
/**
|
||||
* Indic Syllabic Category constants.
|
||||
*
|
||||
* @see UCHAR_INDIC_SYLLABIC_CATEGORY
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef enum UIndicSyllabicCategory {
|
||||
/*
|
||||
* Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
|
||||
* It matches lines like
|
||||
* U_INSC_<Unicode Indic_Syllabic_Category value name>
|
||||
*/
|
||||
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_OTHER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_AVAGRAHA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_BINDU,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_BRAHMI_JOINING_NUMBER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CANTILLATION_MARK,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_DEAD,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_FINAL,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_HEAD_LETTER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_INITIAL_POSTFIXED,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_KILLER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_MEDIAL,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_PLACEHOLDER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_PRECEDING_REPHA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_PREFIXED,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_SUBJOINED,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_SUCCEEDING_REPHA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_CONSONANT_WITH_STACKER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_GEMINATION_MARK,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_INVISIBLE_STACKER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_JOINER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_MODIFYING_LETTER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_NON_JOINER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_NUKTA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_NUMBER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_NUMBER_JOINER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_PURE_KILLER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_REGISTER_SHIFTER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_SYLLABLE_MODIFIER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_TONE_LETTER,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_TONE_MARK,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_VIRAMA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_VISARGA,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_VOWEL,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_VOWEL_DEPENDENT,
|
||||
/** @stable ICU 63 */
|
||||
U_INSC_VOWEL_INDEPENDENT,
|
||||
} UIndicSyllabicCategory;
|
||||
|
||||
/**
|
||||
* Vertical Orientation constants.
|
||||
*
|
||||
* @see UCHAR_VERTICAL_ORIENTATION
|
||||
* @stable ICU 63
|
||||
*/
|
||||
typedef enum UVerticalOrientation {
|
||||
/*
|
||||
* Note: UVerticalOrientation constants are parsed by preparseucd.py.
|
||||
* It matches lines like
|
||||
* U_VO_<Unicode Vertical_Orientation value name>
|
||||
*/
|
||||
|
||||
/** @stable ICU 63 */
|
||||
U_VO_ROTATED,
|
||||
/** @stable ICU 63 */
|
||||
U_VO_TRANSFORMED_ROTATED,
|
||||
/** @stable ICU 63 */
|
||||
U_VO_TRANSFORMED_UPRIGHT,
|
||||
/** @stable ICU 63 */
|
||||
U_VO_UPRIGHT,
|
||||
} UVerticalOrientation;
|
||||
|
||||
/**
|
||||
* Check a binary Unicode property for a code point.
|
||||
*
|
||||
|
@ -2342,6 +2549,7 @@ typedef enum UHangulSyllableType {
|
|||
* does not have data for the property at all, or not for this code point.
|
||||
*
|
||||
* @see UProperty
|
||||
* @see u_getBinaryPropertySet
|
||||
* @see u_getIntPropertyValue
|
||||
* @see u_getUnicodeVersion
|
||||
* @stable ICU 2.1
|
||||
|
@ -2349,6 +2557,28 @@ typedef enum UHangulSyllableType {
|
|||
U_STABLE UBool U_EXPORT2
|
||||
u_hasBinaryProperty(UChar32 c, UProperty which);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns a frozen USet for a binary property.
|
||||
* The library retains ownership over the returned object.
|
||||
* Sets an error code if the property number is not one for a binary property.
|
||||
*
|
||||
* The returned set contains all code points for which the property is true.
|
||||
*
|
||||
* @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the property as a set
|
||||
* @see UProperty
|
||||
* @see u_hasBinaryProperty
|
||||
* @see Unicode::fromUSet
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI const USet * U_EXPORT2
|
||||
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Check if a code point has the Alphabetic Unicode property.
|
||||
* Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
|
||||
|
@ -2449,6 +2679,7 @@ u_isUWhiteSpace(UChar32 c);
|
|||
* @see u_hasBinaryProperty
|
||||
* @see u_getIntPropertyMinValue
|
||||
* @see u_getIntPropertyMaxValue
|
||||
* @see u_getIntPropertyMap
|
||||
* @see u_getUnicodeVersion
|
||||
* @stable ICU 2.2
|
||||
*/
|
||||
|
@ -2505,6 +2736,28 @@ u_getIntPropertyMinValue(UProperty which);
|
|||
U_STABLE int32_t U_EXPORT2
|
||||
u_getIntPropertyMaxValue(UProperty which);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
|
||||
* The library retains ownership over the returned object.
|
||||
* Sets an error code if the property number is not one for an "int property".
|
||||
*
|
||||
* The returned object maps all Unicode code points to their values for that property.
|
||||
* For documentation of the integer values see u_getIntPropertyValue().
|
||||
*
|
||||
* @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the property as a map
|
||||
* @see UProperty
|
||||
* @see u_getIntPropertyValue
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI const UCPMap * U_EXPORT2
|
||||
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Get the numeric value for a Unicode code point as defined in the
|
||||
* Unicode Character Database.
|
||||
|
|
|
@ -53,19 +53,18 @@
|
|||
#include "unicode/uenum.h"
|
||||
#include "unicode/localpointer.h"
|
||||
|
||||
#ifndef __USET_H__
|
||||
#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
|
||||
|
||||
#define USET_DEFINED
|
||||
|
||||
/**
|
||||
* USet is the C API type for Unicode sets.
|
||||
* It is forward-declared here to avoid including the header file if related
|
||||
* USet is the C API type corresponding to C++ class UnicodeSet.
|
||||
* It is forward-declared here to avoid including unicode/uset.h file if related
|
||||
* conversion APIs are not used.
|
||||
* See unicode/uset.h
|
||||
*
|
||||
* @see ucnv_getUnicodeSet
|
||||
* @stable ICU 2.6
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
struct USet;
|
||||
/** @stable ICU 2.6 */
|
||||
typedef struct USet USet;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -183,7 +183,7 @@
|
|||
*/
|
||||
#ifdef U_HAVE_LIB_SUFFIX
|
||||
/* Use the predefined value. */
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME)
|
||||
#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
|
||||
# define U_HAVE_LIB_SUFFIX 1
|
||||
#endif
|
||||
|
||||
|
@ -431,17 +431,6 @@
|
|||
# define UCONFIG_HAVE_PARSEALLINPUT 1
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* \def UCONFIG_FORMAT_FASTPATHS_49
|
||||
* This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
#ifndef UCONFIG_FORMAT_FASTPATHS_49
|
||||
# define UCONFIG_FORMAT_FASTPATHS_49 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_NO_FILTERED_BREAK_ITERATION
|
||||
* This switch turns off filtered break iteration code.
|
||||
|
|
|
@ -0,0 +1,162 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucpmap.h
|
||||
// created: 2018sep03 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPMAP_H__
|
||||
#define __UCPMAP_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* This file defines an abstract map from Unicode code points to integer values.
|
||||
*
|
||||
* @see UCPMap
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
* @draft ICU 63
|
||||
*/
|
||||
typedef struct UCPMap UCPMap;
|
||||
|
||||
/**
|
||||
* Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
|
||||
* Most users should use UCPMAP_RANGE_NORMAL.
|
||||
*
|
||||
* @see ucpmap_getRange
|
||||
* @see ucptrie_getRange
|
||||
* @see umutablecptrie_getRange
|
||||
* @draft ICU 63
|
||||
*/
|
||||
enum UCPMapRangeOption {
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map.
|
||||
* Most users should use this option.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_NORMAL,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that lead surrogates (U+D800..U+DBFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_LEAD(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
|
||||
/**
|
||||
* ucpmap_getRange() enumerates all same-value ranges as stored in the map,
|
||||
* except that all surrogates (U+D800..U+DFFF) are treated as having the
|
||||
* surrogateValue, which is passed to getRange() as a separate parameter.
|
||||
* The surrogateValue is not transformed via filter().
|
||||
* See U_IS_SURROGATE(c).
|
||||
*
|
||||
* Most users should use UCPMAP_RANGE_NORMAL instead.
|
||||
*
|
||||
* This option is useful for maps that map surrogate code *units* to
|
||||
* special values optimized for UTF-16 string processing
|
||||
* or for special error behavior for unpaired surrogates,
|
||||
* but those values are not to be associated with the lead surrogate code *points*.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPMAP_RANGE_FIXED_ALL_SURROGATES
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPMapRangeOption UCPMapRangeOption;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the map, with range checking.
|
||||
* Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param map the map
|
||||
* @param c the code point
|
||||
* @return the map value,
|
||||
* or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucpmap_get(const UCPMap *map, UChar32 c);
|
||||
|
||||
/**
|
||||
* Callback function type: Modifies a map value.
|
||||
* Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
|
||||
* The modified value will be returned by the getRange function.
|
||||
*
|
||||
* Can be used to ignore some of the value bits,
|
||||
* make a filter for one of several values,
|
||||
* return a value index computed from the map value, etc.
|
||||
*
|
||||
* @param context an opaque pointer, as passed into the getRange function
|
||||
* @param value a value from the map
|
||||
* @return the modified value
|
||||
* @draft ICU 63
|
||||
*/
|
||||
typedef uint32_t U_CALLCONV
|
||||
UCPMapValueFilter(const void *context, uint32_t value);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a map.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param map the map
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the map data value,
|
||||
* or NULL if the values from the map are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, map value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucpmap_getRange(const UCPMap *map, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
#endif
|
|
@ -0,0 +1,646 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// ucptrie.h (modified from utrie2.h)
|
||||
// created: 2017dec29 Markus W. Scherer
|
||||
|
||||
#ifndef __UCPTRIE_H__
|
||||
#define __UCPTRIE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/ucpmap.h"
|
||||
#include "unicode/utf8.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* This file defines an immutable Unicode code point trie.
|
||||
*
|
||||
* @see UCPTrie
|
||||
* @see UMutableCPTrie
|
||||
*/
|
||||
|
||||
#ifndef U_IN_DOXYGEN
|
||||
/** @internal */
|
||||
typedef union UCPTrieData {
|
||||
/** @internal */
|
||||
const void *ptr0;
|
||||
/** @internal */
|
||||
const uint16_t *ptr16;
|
||||
/** @internal */
|
||||
const uint32_t *ptr32;
|
||||
/** @internal */
|
||||
const uint8_t *ptr8;
|
||||
} UCPTrieData;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Immutable Unicode code point trie structure.
|
||||
* Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
|
||||
* For details see http://site.icu-project.org/design/struct/utrie
|
||||
*
|
||||
* Do not access UCPTrie fields directly; use public functions and macros.
|
||||
* Functions are easy to use: They support all trie types and value widths.
|
||||
*
|
||||
* When performance is really important, macros provide faster access.
|
||||
* Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
|
||||
* There are "fast" macros for special optimized use cases.
|
||||
*
|
||||
* The macros will return bogus values, or may crash, if used on the wrong type or value width.
|
||||
*
|
||||
* @see UMutableCPTrie
|
||||
* @draft ICU 63
|
||||
*/
|
||||
struct UCPTrie {
|
||||
#ifndef U_IN_DOXYGEN
|
||||
/** @internal */
|
||||
const uint16_t *index;
|
||||
/** @internal */
|
||||
UCPTrieData data;
|
||||
|
||||
/** @internal */
|
||||
int32_t indexLength;
|
||||
/** @internal */
|
||||
int32_t dataLength;
|
||||
/** Start of the last range which ends at U+10FFFF. @internal */
|
||||
UChar32 highStart;
|
||||
/** highStart>>12 @internal */
|
||||
uint16_t shifted12HighStart;
|
||||
|
||||
/** @internal */
|
||||
int8_t type; // UCPTrieType
|
||||
/** @internal */
|
||||
int8_t valueWidth; // UCPTrieValueWidth
|
||||
|
||||
/** padding/reserved @internal */
|
||||
uint32_t reserved32;
|
||||
/** padding/reserved @internal */
|
||||
uint16_t reserved16;
|
||||
|
||||
/**
|
||||
* Internal index-3 null block offset.
|
||||
* Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
|
||||
* @internal
|
||||
*/
|
||||
uint16_t index3NullOffset;
|
||||
/**
|
||||
* Internal data null block offset, not shifted.
|
||||
* Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
|
||||
* @internal
|
||||
*/
|
||||
int32_t dataNullOffset;
|
||||
/** @internal */
|
||||
uint32_t nullValue;
|
||||
|
||||
#ifdef UCPTRIE_DEBUG
|
||||
/** @internal */
|
||||
const char *name;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef struct UCPTrie UCPTrie;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Selectors for the type of a UCPTrie.
|
||||
* Different trade-offs for size vs. speed.
|
||||
*
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see ucptrie_getType
|
||||
* @draft ICU 63
|
||||
*/
|
||||
enum UCPTrieType {
|
||||
/**
|
||||
* For ucptrie_openFromBinary() to accept any type.
|
||||
* ucptrie_getType() will return the actual type.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_ANY = -1,
|
||||
/**
|
||||
* Fast/simple/larger BMP data structure. Use functions and "fast" macros.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_FAST,
|
||||
/**
|
||||
* Small/slower BMP data structure. Use functions and "small" macros.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_TYPE_SMALL
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPTrieType UCPTrieType;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Selectors for the number of bits in a UCPTrie data value.
|
||||
*
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see ucptrie_getValueWidth
|
||||
* @draft ICU 63
|
||||
*/
|
||||
enum UCPTrieValueWidth {
|
||||
/**
|
||||
* For ucptrie_openFromBinary() to accept any data value width.
|
||||
* ucptrie_getValueWidth() will return the actual data value width.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_ANY = -1,
|
||||
/**
|
||||
* The trie stores 16 bits per data value.
|
||||
* It returns them as unsigned values 0..0xffff=65535.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_16,
|
||||
/**
|
||||
* The trie stores 32 bits per data value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_32,
|
||||
/**
|
||||
* The trie stores 8 bits per data value.
|
||||
* It returns them as unsigned values 0..0xff=255.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
UCPTRIE_VALUE_BITS_8
|
||||
};
|
||||
#ifndef U_IN_DOXYGEN
|
||||
typedef enum UCPTrieValueWidth UCPTrieValueWidth;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Opens a trie from its binary form, stored in 32-bit-aligned memory.
|
||||
* Inverse of ucptrie_toBinary().
|
||||
*
|
||||
* The memory must remain valid and unchanged as long as the trie is used.
|
||||
* You must ucptrie_close() the trie once you are done using it.
|
||||
*
|
||||
* @param type selects the trie type; results in an
|
||||
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
|
||||
* use UCPTRIE_TYPE_ANY to accept any type
|
||||
* @param valueWidth selects the number of bits in a data value; results in an
|
||||
* U_INVALID_FORMAT_ERROR if it does not match the binary data;
|
||||
* use UCPTRIE_VALUE_BITS_ANY to accept any data value width
|
||||
* @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
|
||||
* @param length the number of bytes available at data;
|
||||
* can be more than necessary
|
||||
* @param pActualLength receives the actual number of bytes at data taken up by the trie data;
|
||||
* can be NULL
|
||||
* @param pErrorCode an in/out ICU UErrorCode
|
||||
* @return the trie
|
||||
*
|
||||
* @see umutablecptrie_open
|
||||
* @see umutablecptrie_buildImmutable
|
||||
* @see ucptrie_toBinary
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrie * U_EXPORT2
|
||||
ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
|
||||
const void *data, int32_t length, int32_t *pActualLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Closes a trie and releases associated memory.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ucptrie_close(UCPTrie *trie);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalUCPTriePointer
|
||||
* "Smart pointer" class, closes a UCPTrie via ucptrie_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Returns the trie type.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @return the trie type
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see UCPTRIE_TYPE_ANY
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrieType U_EXPORT2
|
||||
ucptrie_getType(const UCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Returns the number of bits in a trie data value.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @return the number of bits in a trie data value
|
||||
* @see ucptrie_openFromBinary
|
||||
* @see UCPTRIE_VALUE_BITS_ANY
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI UCPTrieValueWidth U_EXPORT2
|
||||
ucptrie_getValueWidth(const UCPTrie *trie);
|
||||
|
||||
/**
|
||||
* Returns the value for a code point as stored in the trie, with range checking.
|
||||
* Returns the trie error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
|
||||
* Easier to use because, unlike the macros, this function works on all UCPTrie
|
||||
* objects, for all types and value widths.
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param c the code point
|
||||
* @return the trie value,
|
||||
* or the trie error value if the code point is not in the range 0..U+10FFFF
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
ucptrie_get(const UCPTrie *trie, UChar32 c);
|
||||
|
||||
/**
|
||||
* Returns the last code point such that all those from start to there have the same value.
|
||||
* Can be used to efficiently iterate over all same-value ranges in a trie.
|
||||
* (This is normally faster than iterating over code points and get()ting each value,
|
||||
* but much slower than a data structure that stores ranges directly.)
|
||||
*
|
||||
* If the UCPMapValueFilter function pointer is not NULL, then
|
||||
* the value to be delivered is passed through that function, and the return value is the end
|
||||
* of the range where all values are modified to the same actual value.
|
||||
* The value is unchanged if that function pointer is NULL.
|
||||
*
|
||||
* Example:
|
||||
* \code
|
||||
* UChar32 start = 0, end;
|
||||
* uint32_t value;
|
||||
* while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
|
||||
* NULL, NULL, &value)) >= 0) {
|
||||
* // Work with the range start..end and its value.
|
||||
* start = end + 1;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param start range start
|
||||
* @param option defines whether surrogates are treated normally,
|
||||
* or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
|
||||
* @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
|
||||
* @param filter a pointer to a function that may modify the trie data value,
|
||||
* or NULL if the values from the trie are to be used unmodified
|
||||
* @param context an opaque pointer that is passed on to the filter function
|
||||
* @param pValue if not NULL, receives the value that every code point start..end has;
|
||||
* may have been modified by filter(context, trie value)
|
||||
* if that function pointer is not NULL
|
||||
* @return the range end code point, or -1 if start is not a valid code point
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI UChar32 U_EXPORT2
|
||||
ucptrie_getRange(const UCPTrie *trie, UChar32 start,
|
||||
UCPMapRangeOption option, uint32_t surrogateValue,
|
||||
UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
|
||||
|
||||
/**
|
||||
* Writes a memory-mappable form of the trie into 32-bit aligned memory.
|
||||
* Inverse of ucptrie_openFromBinary().
|
||||
*
|
||||
* @param trie the trie
|
||||
* @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
|
||||
* can be NULL if capacity==0
|
||||
* @param capacity the number of bytes available at data, or 0 for pure preflighting
|
||||
* @param pErrorCode an in/out ICU UErrorCode;
|
||||
* U_BUFFER_OVERFLOW_ERROR if the capacity is too small
|
||||
* @return the number of bytes written or (if buffer overflow) needed for the trie
|
||||
*
|
||||
* @see ucptrie_openFromBinary()
|
||||
* @draft ICU 63
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 16-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_16
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 32-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_32
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
|
||||
|
||||
/**
|
||||
* Macro parameter value for a trie with 8-bit data values.
|
||||
* Use the name of this macro as a "dataAccess" parameter in other macros.
|
||||
* Do not use this macro in any other way.
|
||||
*
|
||||
* @see UCPTRIE_VALUE_BITS_8
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
|
||||
|
||||
/**
|
||||
* Returns a trie value for a code point, with range checking.
|
||||
* Returns the trie error value if c is not in the range 0..U+10FFFF.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point
|
||||
* @return The code point's trie value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
|
||||
|
||||
/**
|
||||
* Returns a 16-bit trie value for a code point, with range checking.
|
||||
* Returns the trie error value if c is not in the range U+0000..U+10FFFF.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point
|
||||
* @return The code point's trie value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
|
||||
dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
|
||||
|
||||
/**
|
||||
* UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
|
||||
* and gets a value from the trie.
|
||||
* Sets the trie error value if c is an unpaired surrogate.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param src (const UChar *, in/out) the source text pointer
|
||||
* @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
|
||||
* @param c (UChar32, out) variable for the code point
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \
|
||||
(c) = *(src)++; \
|
||||
int32_t __index; \
|
||||
if (!U16_IS_SURROGATE(c)) { \
|
||||
__index = _UCPTRIE_FAST_INDEX(trie, c); \
|
||||
} else { \
|
||||
uint16_t __c2; \
|
||||
if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
|
||||
++(src); \
|
||||
(c) = U16_GET_SUPPLEMENTARY((c), __c2); \
|
||||
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
|
||||
} else { \
|
||||
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
|
||||
* and gets a value from the trie.
|
||||
* Sets the trie error value if c is an unpaired surrogate.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param start (const UChar *, in) the start pointer for the text
|
||||
* @param src (const UChar *, in/out) the source text pointer
|
||||
* @param c (UChar32, out) variable for the code point
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \
|
||||
(c) = *--(src); \
|
||||
int32_t __index; \
|
||||
if (!U16_IS_SURROGATE(c)) { \
|
||||
__index = _UCPTRIE_FAST_INDEX(trie, c); \
|
||||
} else { \
|
||||
uint16_t __c2; \
|
||||
if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
|
||||
--(src); \
|
||||
(c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
|
||||
__index = _UCPTRIE_SMALL_INDEX(trie, c); \
|
||||
} else { \
|
||||
__index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8: Post-increments src and gets a value from the trie.
|
||||
* Sets the trie error value for an ill-formed byte sequence.
|
||||
*
|
||||
* Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
|
||||
* because it would be more work to do so and is often not needed.
|
||||
* If the trie value differs from the error value, then the byte sequence is well-formed,
|
||||
* and the code point can be assembled without revalidation.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param src (const char *, in/out) the source text pointer
|
||||
* @param limit (const char *, in) the limit pointer for the text (must not be NULL)
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \
|
||||
int32_t __lead = (uint8_t)*(src)++; \
|
||||
if (!U8_IS_SINGLE(__lead)) { \
|
||||
uint8_t __t1, __t2, __t3; \
|
||||
if ((src) != (limit) && \
|
||||
(__lead >= 0xe0 ? \
|
||||
__lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \
|
||||
U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
|
||||
++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
|
||||
: /* U+10000..U+10FFFF */ \
|
||||
(__lead -= 0xf0) <= 4 && \
|
||||
U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
|
||||
(__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
|
||||
(__t2 = *(src) - 0x80) <= 0x3f && \
|
||||
++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = __lead >= (trie)->shifted12HighStart ? \
|
||||
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
|
||||
ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
|
||||
: /* U+0080..U+07FF */ \
|
||||
__lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
|
||||
(__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
|
||||
++(src); \
|
||||
} else { \
|
||||
__lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \
|
||||
} \
|
||||
} \
|
||||
(result) = dataAccess(trie, __lead); \
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF-8: Pre-decrements src and gets a value from the trie.
|
||||
* Sets the trie error value for an ill-formed byte sequence.
|
||||
*
|
||||
* Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
|
||||
* because it would be more work to do so and is often not needed.
|
||||
* If the trie value differs from the error value, then the byte sequence is well-formed,
|
||||
* and the code point can be assembled without revalidation.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param start (const char *, in) the start pointer for the text
|
||||
* @param src (const char *, in/out) the source text pointer
|
||||
* @param result (out) variable for the trie lookup result
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \
|
||||
int32_t __index = (uint8_t)*--(src); \
|
||||
if (!U8_IS_SINGLE(__index)) { \
|
||||
__index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
|
||||
(const uint8_t *)(src)); \
|
||||
(src) -= __index & 7; \
|
||||
__index >>= 3; \
|
||||
} \
|
||||
(result) = dataAccess(trie, __index); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a trie value for an ASCII code point, without range checking.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie (of either fast or small type)
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point; must be U+0000..U+007F
|
||||
* @return The ASCII code point's trie value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
|
||||
|
||||
/**
|
||||
* Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
|
||||
* Can be used to look up a value for a UTF-16 code unit if other parts of
|
||||
* the string processing check for surrogates.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
|
||||
* @return The BMP code point's trie value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
|
||||
|
||||
/**
|
||||
* Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
|
||||
* without range checking.
|
||||
*
|
||||
* @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
|
||||
* @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
|
||||
* @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
|
||||
* @return The supplementary code point's trie value.
|
||||
* @draft ICU 63
|
||||
*/
|
||||
#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
|
||||
|
||||
/* Internal definitions ----------------------------------------------------- */
|
||||
|
||||
#ifndef U_IN_DOXYGEN
|
||||
|
||||
/**
|
||||
* Internal implementation constants.
|
||||
* These are needed for the API macros, but users should not use these directly.
|
||||
* @internal
|
||||
*/
|
||||
enum {
|
||||
/** @internal */
|
||||
UCPTRIE_FAST_SHIFT = 6,
|
||||
|
||||
/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
|
||||
UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
|
||||
|
||||
/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
|
||||
UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
|
||||
|
||||
/** @internal */
|
||||
UCPTRIE_SMALL_MAX = 0xfff,
|
||||
|
||||
/**
|
||||
* Offset from dataLength (to be subtracted) for fetching the
|
||||
* value returned for out-of-range code points and ill-formed UTF-8/16.
|
||||
* @internal
|
||||
*/
|
||||
UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
|
||||
/**
|
||||
* Offset from dataLength (to be subtracted) for fetching the
|
||||
* value returned for code points highStart..U+10FFFF.
|
||||
* @internal
|
||||
*/
|
||||
UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
|
||||
};
|
||||
|
||||
/* Internal functions and macros -------------------------------------------- */
|
||||
// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
|
||||
|
||||
/** @internal */
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);
|
||||
|
||||
/** @internal */
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
|
||||
|
||||
/**
|
||||
* Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
|
||||
* Do not call directly.
|
||||
* @internal
|
||||
*/
|
||||
U_INTERNAL int32_t U_EXPORT2
|
||||
ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
|
||||
const uint8_t *start, const uint8_t *src);
|
||||
|
||||
/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
|
||||
#define _UCPTRIE_FAST_INDEX(trie, c) \
|
||||
((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
|
||||
|
||||
/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
|
||||
#define _UCPTRIE_SMALL_INDEX(trie, c) \
|
||||
((c) >= (trie)->highStart ? \
|
||||
(trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
|
||||
ucptrie_internalSmallIndex(trie, c))
|
||||
|
||||
/**
|
||||
* Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
|
||||
* Returns the data index.
|
||||
* @internal
|
||||
*/
|
||||
#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
|
||||
((uint32_t)(c) <= (uint32_t)(fastMax) ? \
|
||||
_UCPTRIE_FAST_INDEX(trie, c) : \
|
||||
(uint32_t)(c) <= 0x10ffff ? \
|
||||
_UCPTRIE_SMALL_INDEX(trie, c) : \
|
||||
(trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif // U_IN_DOXYGEN
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
#endif
|
|
@ -60,6 +60,7 @@ enum UCurrencyUsage {
|
|||
UCURR_USAGE_COUNT=2
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
};
|
||||
/** Currency Usage used for Decimal Format */
|
||||
typedef enum UCurrencyUsage UCurrencyUsage;
|
||||
|
||||
/**
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче